PyPI - glyphh-code - Versions diffs - 0.2.0__py3-none-any.whl - Mend

glyphh-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

glyphh_code/CLAUDE.md +95 -0
glyphh_code/__init__.py +3 -0
glyphh_code/ast_extract.py +403 -0
glyphh_code/banner.py +76 -0
glyphh_code/cli.py +65 -0
glyphh_code/compile.py +381 -0
glyphh_code/config.yaml +13 -0
glyphh_code/drift.py +74 -0
glyphh_code/encoder.py +1119 -0
glyphh_code/hooks/enforce-glyphh-search.sh +32 -0
glyphh_code/hooks/post-commit-compile.sh +104 -0
glyphh_code/hooks/post-git-compile.sh +148 -0
glyphh_code/manifest.yaml +18 -0
glyphh_code/plugin.py +357 -0
glyphh_code/setup.py +378 -0
glyphh_code-0.2.0.dist-info/METADATA +288 -0
glyphh_code-0.2.0.dist-info/RECORD +21 -0
glyphh_code-0.2.0.dist-info/WHEEL +5 -0
glyphh_code-0.2.0.dist-info/entry_points.txt +5 -0
glyphh_code-0.2.0.dist-info/licenses/LICENSE +661 -0
glyphh_code-0.2.0.dist-info/top_level.txt +1 -0

glyphh_code/CLAUDE.md ADDED Viewed

@@ -0,0 +1,95 @@
+# Glyphh Code Intelligence
+This project uses Glyphh for codebase search.
+The Glyphh index is compiled from every file in this repo.
+Follow these rules in every session without exception.
+## Tools available
+glyphh_search   find files by natural language query
+glyphh_related  find files related to a given file
+glyphh_stats    index statistics
+## Navigation rules
+ALWAYS call glyphh_search before reading any file.
+ALWAYS call glyphh_related before editing a file.
+NEVER use Grep to find files. Use glyphh_search instead.
+NEVER use Glob to find files. Use glyphh_search instead.
+NEVER use the Agent tool to explore the codebase. Use glyphh_search instead.
+NEVER scan directories to find relevant code.
+NEVER read multiple files speculatively.
+Only fall back to Grep or Glob if glyphh_search returns no results above 0.50.
+Search results include top_tokens and imports for each file.
+Use top_tokens to understand what the file is about.
+Use imports to understand what it depends on.
+Only read the file if top_tokens and imports do not answer the question.
+Prefer files with confidence above 0.70.
+If the result state is ASK, tell the user the candidates and ask which to use.
+## Debugging rules
+When investigating a bug or error:
+  1. Call glyphh_search with the error type or concept from the stack trace
+  2. Check top_tokens and imports from results before reading any file
+  3. Read only files with confidence above 0.70
+  4. Call glyphh_related on the target file before making any change
+## Editing rules
+Before editing any file:
+  1. Call glyphh_related to understand blast radius
+  2. Review top_tokens and imports of related files
+After editing:
+  A Claude Code PostToolUse hook runs compile.py --incremental in the
+  background after every git commit to update the index automatically.
+  No manual recompile needed.
+## Query guide
+Good queries for glyphh_search use specific domain vocabulary:
+  auth token validation
+  stripe webhook handler
+  user profile fetch
+  database connection pool
+  error boundary component
+  payment retry logic
+  session expiry check
+Poor queries are too generic and will return low-confidence results:
+  utils
+  helper
+  index
+  common
+  base
+## Search result shape
+glyphh_search returns:
+  state         DONE or ASK
+  matches       list of results when state is DONE
+    file        relative file path
+    confidence  0.0 to 1.0, prefer above 0.70
+    top_tokens  dominant concepts in the file
+    imports     what the file depends on
+    extension   file type
+  candidates    list of options when state is ASK
+glyphh_related returns:
+  state         DONE or ASK
+  file          the queried file
+  related       list of semantically similar files
+    file        relative file path
+    similarity  0.0 to 1.0
+    top_tokens  dominant concepts
+    imports     dependencies

glyphh_code/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Glyphh Code — codebase intelligence for Claude Code."""
+__version__ = "0.1.0"

glyphh_code/ast_extract.py ADDED Viewed

@@ -0,0 +1,403 @@
+"""
+Language-agnostic AST extraction for Glyphh Code model.
+Uses tree-sitter to extract structural signals from source files:
+  - defines: top-level class/function/method names (split into words)
+  - imports: module/package dependencies
+  - docstring: module-level description (first docstring or comment block)
+  - file_role: source, test, config, docs, example, script
+Supports any language with a tree-sitter grammar installed.
+Falls back to regex extraction for unsupported languages.
+Usage:
+    from ast_extract import extract_file_symbols
+    result = extract_file_symbols("src/server/auth.py", content)
+    # {"defines": "AuthMiddleware check_scope ...",
+    #  "imports": "fastmcp.server.middleware ...",
+    #  "docstring": "Authorization middleware for ...",
+    #  "file_role": "source"}
+"""
+import re
+from pathlib import Path
+# ---------------------------------------------------------------------------
+# Tree-sitter grammar loading
+# ---------------------------------------------------------------------------
+_PARSERS: dict[str, object] = {}
+_TS_AVAILABLE = False
+try:
+    from tree_sitter import Language, Parser
+    _TS_AVAILABLE = True
+except ImportError:
+    pass
+# Extension → (grammar module name, tree-sitter language name)
+_GRAMMAR_MAP: dict[str, tuple[str, str]] = {
+    ".py": ("tree_sitter_python", "python"),
+    ".js": ("tree_sitter_javascript", "javascript"),
+    ".jsx": ("tree_sitter_javascript", "javascript"),
+    ".ts": ("tree_sitter_typescript", "typescript"),
+    ".tsx": ("tree_sitter_typescript", "tsx"),
+    ".go": ("tree_sitter_go", "go"),
+    ".rs": ("tree_sitter_rust", "rust"),
+    ".java": ("tree_sitter_java", "java"),
+    ".c": ("tree_sitter_c", "c"),
+    ".h": ("tree_sitter_c", "c"),
+    ".cpp": ("tree_sitter_cpp", "cpp"),
+    ".hpp": ("tree_sitter_cpp", "cpp"),
+    ".rb": ("tree_sitter_ruby", "ruby"),
+    ".cs": ("tree_sitter_c_sharp", "c_sharp"),
+    ".swift": ("tree_sitter_swift", "swift"),
+}
+# Node types for definitions across languages
+_DEFINE_TYPES = frozenset({
+    # Python
+    "function_definition", "class_definition",
+    # JS/TS
+    "function_declaration", "class_declaration",
+    "method_definition", "arrow_function",
+    "export_statement",
+    # Go
+    "function_declaration", "method_declaration",
+    "type_declaration",
+    # Rust
+    "function_item", "struct_item", "enum_item",
+    "impl_item", "trait_item", "type_item",
+    # Java
+    "method_declaration", "class_declaration",
+    "interface_declaration", "enum_declaration",
+    # C/C++
+    "function_definition", "struct_specifier",
+    "class_specifier", "enum_specifier",
+    # Ruby
+    "method", "class", "module",
+})
+# Node types for imports across languages
+_IMPORT_TYPES = frozenset({
+    # Python
+    "import_statement", "import_from_statement",
+    # JS/TS
+    "import_statement", "import_declaration",
+    # Go
+    "import_declaration", "import_spec",
+    # Rust
+    "use_declaration",
+    # Java
+    "import_declaration",
+    # C/C++
+    "preproc_include",
+    # Ruby
+    "call",  # require/require_relative — filtered by content
+})
+def _get_parser(ext: str):
+    """Get or create a tree-sitter parser for the given file extension."""
+    if not _TS_AVAILABLE:
+        return None
+    if ext in _PARSERS:
+        return _PARSERS[ext]
+    grammar_info = _GRAMMAR_MAP.get(ext)
+    if not grammar_info:
+        _PARSERS[ext] = None
+        return None
+    module_name, lang_name = grammar_info
+    try:
+        import importlib
+        mod = importlib.import_module(module_name)
+        # tree-sitter 0.22+ API: language() function returns Language
+        if hasattr(mod, "language"):
+            lang = Language(mod.language())
+        else:
+            # tree-sitter 0.21 API: use Language.build_library or direct path
+            _PARSERS[ext] = None
+            return None
+        parser = Parser(lang)
+        _PARSERS[ext] = parser
+        return parser
+    except (ImportError, Exception):
+        _PARSERS[ext] = None
+        return None
+# ---------------------------------------------------------------------------
+# Tree-sitter extraction
+# ---------------------------------------------------------------------------
+def _split_name(name: str) -> str:
+    """Split CamelCase and snake_case into space-separated words.
+    AuthorizationMiddleware → authorization middleware
+    check_scope → check scope
+    SSETransport → sse transport
+    """
+    # Insert space before uppercase runs: SSETransport → SSE Transport
+    s = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", name)
+    # Insert space before single uppercase: checkScope → check Scope
+    s = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", s)
+    # Replace underscores with spaces
+    s = s.replace("_", " ")
+    return s.lower().strip()
+def _extract_name_from_node(node) -> str:
+    """Extract the name identifier from a definition node."""
+    for child in node.children:
+        if child.type in ("identifier", "name", "property_identifier",
+                          "type_identifier"):
+            return child.text.decode("utf-8")
+        # For export statements, look deeper
+        if child.type in ("function_declaration", "class_declaration",
+                          "lexical_declaration", "variable_declaration"):
+            return _extract_name_from_node(child)
+    return ""
+def _extract_ts(content: str, ext: str) -> dict:
+    """Extract symbols using tree-sitter."""
+    parser = _get_parser(ext)
+    if parser is None:
+        return {}
+    tree = parser.parse(content.encode("utf-8"))
+    root = tree.root_node
+    defines = []
+    imports = []
+    docstring = ""
+    for node in root.children:
+        # Top-level definitions
+        if node.type in _DEFINE_TYPES:
+            name = _extract_name_from_node(node)
+            if name and not name.startswith("_"):
+                defines.append(name)
+        # Imports
+        elif node.type in _IMPORT_TYPES:
+            text = node.text.decode("utf-8").strip()
+            imports.append(text)
+        # Module docstring — first expression_statement containing a string
+        elif not docstring and node.type == "expression_statement":
+            for child in node.children:
+                if child.type in ("string", "concatenated_string"):
+                    raw = child.text.decode("utf-8")
+                    # Strip quotes
+                    for q in ('"""', "'''", '"', "'"):
+                        if raw.startswith(q) and raw.endswith(q):
+                            raw = raw[len(q):-len(q)]
+                            break
+                    docstring = raw.strip()
+                    break
+        # Module docstring — first comment block
+        elif not docstring and node.type == "comment":
+            docstring = node.text.decode("utf-8").lstrip("/#* ").strip()
+    return {
+        "defines_raw": defines,
+        "imports_raw": imports,
+        "docstring": docstring,
+    }
+# ---------------------------------------------------------------------------
+# Regex fallback extraction
+# ---------------------------------------------------------------------------
+# Patterns for common definition syntaxes
+_DEF_PATTERNS = [
+    # Python: def name, class Name
+    re.compile(r"^(?:def|class)\s+(\w+)", re.MULTILINE),
+    # JS/TS: function name, class Name, export function name
+    re.compile(r"^(?:export\s+)?(?:function|class)\s+(\w+)", re.MULTILINE),
+    # Go: func Name, func (r *Receiver) Name, type Name struct
+    re.compile(r"^func\s+(?:\([^)]*\)\s+)?(\w+)", re.MULTILINE),
+    re.compile(r"^type\s+(\w+)\s+(?:struct|interface)", re.MULTILINE),
+    # Rust: fn name, struct Name, enum Name, impl Name
+    re.compile(r"^(?:pub\s+)?(?:fn|struct|enum|trait|impl)\s+(\w+)", re.MULTILINE),
+    # Java/C#: public class Name, void methodName
+    re.compile(r"^(?:public|private|protected)?\s*(?:static\s+)?(?:class|interface|enum)\s+(\w+)", re.MULTILINE),
+    # C/C++: return_type function_name(
+    re.compile(r"^(?:\w+\s+)+(\w+)\s*\(", re.MULTILINE),
+    # Ruby: def name, class Name, module Name
+    re.compile(r"^(?:def|class|module)\s+(\w+)", re.MULTILINE),
+]
+_IMPORT_PATTERNS = [
+    # Python: import x, from x import y
+    re.compile(r"^(?:from\s+([\w.]+)\s+)?import\s+([\w., ]+)", re.MULTILINE),
+    # JS/TS: import ... from "module"
+    re.compile(r"""^import\s+.*?from\s+['"]([^'"]+)['"]""", re.MULTILINE),
+    # Go: import "package"
+    re.compile(r"""^\s*"([^"]+)"$""", re.MULTILINE),
+    # Rust: use crate::path
+    re.compile(r"^use\s+([\w:]+)", re.MULTILINE),
+    # C/C++: #include <file> or "file"
+    re.compile(r'^#include\s+[<"]([^>"]+)[>"]', re.MULTILINE),
+    # Ruby: require "file"
+    re.compile(r"""^require(?:_relative)?\s+['"]([^'"]+)['"]""", re.MULTILINE),
+]
+def _extract_regex(content: str) -> dict:
+    """Fallback: extract symbols using regex patterns."""
+    defines = []
+    for pat in _DEF_PATTERNS:
+        for m in pat.finditer(content):
+            name = m.group(1)
+            if name and not name.startswith("_") and name not in defines:
+                defines.append(name)
+    imports = []
+    for pat in _IMPORT_PATTERNS:
+        for m in pat.finditer(content):
+            # Take the last non-None group
+            for g in reversed(m.groups()):
+                if g:
+                    imports.append(g.strip())
+                    break
+    # Docstring: first triple-quoted string or comment block
+    docstring = ""
+    m = re.search(r'^(?:"""(.*?)"""|\'\'\'(.*?)\'\'\')', content, re.DOTALL)
+    if m:
+        docstring = (m.group(1) or m.group(2) or "").strip()
+    elif not docstring:
+        # First comment block
+        lines = content.split("\n")
+        comment_lines = []
+        for line in lines:
+            stripped = line.strip()
+            if stripped.startswith(("#", "//", "*", "/*")):
+                comment_lines.append(stripped.lstrip("#/* "))
+            elif comment_lines:
+                break
+            elif stripped:
+                break
+        if comment_lines:
+            docstring = " ".join(comment_lines)
+    return {
+        "defines_raw": defines,
+        "imports_raw": imports,
+        "docstring": docstring,
+    }
+# ---------------------------------------------------------------------------
+# Role detection
+# ---------------------------------------------------------------------------
+def _detect_role(file_path: str) -> str:
+    """Detect file role from path heuristics."""
+    parts = Path(file_path).parts
+    name = Path(file_path).stem
+    ext = Path(file_path).suffix
+    # Test files
+    if any(p in ("tests", "test", "__tests__", "spec") for p in parts):
+        return "test"
+    if name.startswith("test_") or name.endswith("_test") or name.endswith(".test"):
+        return "test"
+    if name.startswith("spec_") or name.endswith("_spec") or name.endswith(".spec"):
+        return "test"
+    # Examples
+    if any(p in ("examples", "example", "demo", "demos", "samples") for p in parts):
+        return "example"
+    # Config
+    if ext in (".yaml", ".yml", ".toml", ".json", ".ini", ".cfg", ".conf"):
+        return "config"
+    if name in ("setup", "pyproject", "package", "tsconfig", "webpack",
+                "Makefile", "Dockerfile", "docker-compose", "Cargo"):
+        return "config"
+    # Docs
+    if ext in (".md", ".rst", ".txt"):
+        return "docs"
+    if any(p in ("docs", "doc", "documentation") for p in parts):
+        return "docs"
+    # Scripts
+    if ext in (".sh", ".bash", ".zsh"):
+        return "script"
+    return "source"
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def extract_file_symbols(file_path: str, content: str) -> dict:
+    """Extract structural symbols from a source file.
+    Args:
+        file_path: Relative path to the file (for role detection + extension)
+        content: File contents as string
+    Returns:
+        dict with keys:
+            defines — space-separated words from top-level symbol names
+            imports — space-separated import module/package names
+            docstring — module-level description (first docstring/comment)
+            file_role — source, test, config, docs, example, script
+    """
+    ext = Path(file_path).suffix
+    # Try tree-sitter first, fall back to regex
+    result = _extract_ts(content, ext)
+    if not result:
+        result = _extract_regex(content)
+    # Split define names into searchable words
+    define_words = []
+    for name in result.get("defines_raw", []):
+        define_words.append(name)  # Keep original name
+        split = _split_name(name)
+        if split != name.lower():
+            define_words.append(split)
+    # Clean up imports into module names
+    import_names = []
+    for imp in result.get("imports_raw", []):
+        # Extract module name from full import statement
+        # "from fastmcp.server import auth" → "fastmcp server auth"
+        cleaned = re.sub(r"^(?:from|import|use|require|include)\s+", "", imp)
+        cleaned = re.sub(r"\s+import\s+.*", "", cleaned)
+        cleaned = cleaned.replace(".", " ").replace("::", " ").replace("/", " ")
+        cleaned = re.sub(r"[^a-zA-Z0-9_ ]", "", cleaned)
+        if cleaned.strip():
+            import_names.append(cleaned.strip())
+    docstring = result.get("docstring", "")
+    # Truncate long docstrings — first sentence is usually enough
+    if len(docstring) > 200:
+        # Cut at first period or newline
+        for sep in (".\n", ". ", "\n\n", "\n"):
+            idx = docstring.find(sep)
+            if 20 < idx < 200:
+                docstring = docstring[:idx + 1]
+                break
+        else:
+            docstring = docstring[:200]
+    return {
+        "defines": " ".join(define_words),
+        "imports": " ".join(import_names),
+        "docstring": docstring.strip(),
+        "file_role": _detect_role(file_path),
+    }

glyphh_code/banner.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""
+Banner for the glyphh-code CLI.
+Reuses the Glyphh brand theme from the runtime.
+"""
+import sys
+import time
+import click
+try:
+    from glyphh.cli import theme
+except ImportError:
+    # Fallback if runtime not installed yet
+    class _FallbackTheme:
+        PRIMARY = "magenta"
+        ACCENT = "bright_magenta"
+        MUTED = "bright_black"
+        SUCCESS = "green"
+        WARNING = "yellow"
+        ERROR = "red"
+        INFO = "cyan"
+        TEXT = "white"
+        TEXT_DIM = "bright_black"
+    theme = _FallbackTheme()
+# Characters per second for streaming effect
+_CPS = 800
+def _stream(text: str, fg: str | None = None, bold: bool = False):
+    """Print text character-by-character with optional color."""
+    delay = 1.0 / _CPS
+    styled = click.style(text, fg=fg, bold=bold) if (fg or bold) else text
+    i = 0
+    while i < len(styled):
+        if styled[i] == '\x1b':
+            j = i + 1
+            while j < len(styled) and styled[j] != 'm':
+                j += 1
+            sys.stdout.write(styled[i:j + 1])
+            i = j + 1
+        else:
+            sys.stdout.write(styled[i])
+            sys.stdout.flush()
+            time.sleep(delay)
+            i += 1
+    sys.stdout.write('\n')
+    sys.stdout.flush()
+def print_banner():
+    """Print the glyphh-code welcome banner."""
+    click.echo()
+    _stream("        _             _     _             _", fg=theme.PRIMARY)
+    _stream("   __ _| |_   _ _ __ | |__ | |__     __ _(_)", fg=theme.PRIMARY)
+    _stream("  / _` | | | | | '_ \\| '_ \\| '_ \\   / _` | |", fg=theme.PRIMARY)
+    _stream(" | (_| | | |_| | |_) | | | | | | | | (_| | |", fg=theme.ACCENT)
+    _stream("  \\__, |_|\\__, | .__/|_| |_|_| |_|  \\__,_|_|", fg="cyan")
+    _stream("  |___/   |___/|_|                      code", fg="bright_cyan")
+    click.echo()
+    _stream("  codebase intelligence for claude code", fg="bright_cyan")
+    click.echo()
+def print_status(repo: str, port: int, mcp_url: str, file_count: int):
+    """Print init status after setup completes."""
+    dot = click.style("●", fg=theme.SUCCESS)
+    click.echo(f"  {dot} {click.style('ready', fg=theme.SUCCESS)}")
+    click.echo()
+    click.secho(f"  Repo:      {repo}", fg=theme.TEXT_DIM)
+    click.secho(f"  Files:     {file_count} indexed", fg=theme.TEXT_DIM)
+    click.secho(f"  MCP:       {mcp_url}", fg=theme.ACCENT)
+    click.secho(f"  Storage:   SQLite (local)", fg=theme.TEXT_DIM)
+    click.secho(f"  Auth:      none (local mode)", fg=theme.TEXT_DIM)
+    click.echo()

glyphh_code/cli.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""
+glyphh-code CLI entry point.
+Usage:
+    glyphh-code init [path]    Set up Glyphh Code for a repository
+    glyphh-code compile [path] Recompile the index
+    glyphh-code serve [path]   Start the MCP server
+    glyphh-code status         Show current status
+"""
+import click
+from . import __version__
+@click.group()
+@click.version_option(__version__, prog_name="glyphh-code")
+def cli():
+    """Glyphh Code — codebase intelligence for Claude Code."""
+    pass
+@cli.command()
+@click.argument("path", default=".", type=click.Path(exists=True))
+@click.option("--port", "-p", default=8002, type=int, help="Server port (default: 8002)")
+def init(path, port):
+    """Set up Glyphh Code for a repository.
+    Compiles the codebase, starts the MCP server, and configures Claude Code.
+    Everything is local — no account, no Docker, no auth required.
+    """
+    from .setup import run_init
+    run_init(path, port)
+@cli.command()
+@click.argument("path", default=".", type=click.Path(exists=True))
+def compile(path):
+    """Recompile the index for a repository."""
+    from .setup import run_compile
+    run_compile(path)
+@cli.command()
+@click.argument("path", default=".", type=click.Path(exists=True))
+@click.option("--port", "-p", default=8002, type=int, help="Server port (default: 8002)")
+def serve(path, port):
+    """Start the MCP server."""
+    from .setup import run_serve
+    run_serve(path, port)
+@cli.command()
+def status():
+    """Show Glyphh Code status."""
+    from .setup import run_status
+    run_status()
+def main():
+    cli()
+if __name__ == "__main__":
+    main()