npm - @pmaddire/gcie - Versions diffs - 0.1.2 - Mend

@pmaddire/gcie 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/AGENT.md +256 -0
package/AGENT_USAGE.md +231 -0
package/ARCHITECTURE.md +151 -0
package/CLAUDE.md +69 -0
package/DEBUGGING_PLAYBOOK.md +160 -0
package/KNOWLEDGE_INDEX.md +154 -0
package/POTENTIAL_UPDATES +130 -0
package/PROJECT.md +141 -0
package/README.md +371 -0
package/REPO_DIGITAL_TWIN.md +98 -0
package/ROADMAP.md +301 -0
package/SETUP_ANY_REPO.md +85 -0
package/bin/gcie-init.js +20 -0
package/bin/gcie.js +45 -0
package/cli/__init__.py +1 -0
package/cli/app.py +163 -0
package/cli/commands/__init__.py +1 -0
package/cli/commands/cache.py +35 -0
package/cli/commands/context.py +2426 -0
package/cli/commands/context_slices.py +617 -0
package/cli/commands/debug.py +24 -0
package/cli/commands/index.py +17 -0
package/cli/commands/query.py +20 -0
package/cli/commands/setup.py +73 -0
package/config/__init__.py +1 -0
package/config/scanner_config.py +82 -0
package/context/__init__.py +1 -0
package/context/architecture_bootstrap.py +170 -0
package/context/architecture_index.py +185 -0
package/context/architecture_parser.py +170 -0
package/context/architecture_slicer.py +308 -0
package/context/context_router.py +70 -0
package/context/fallback_evaluator.py +21 -0
package/coverage_integration/__init__.py +1 -0
package/coverage_integration/coverage_loader.py +55 -0
package/debugging/__init__.py +12 -0
package/debugging/bug_localizer.py +81 -0
package/debugging/execution_path_analyzer.py +42 -0
package/embeddings/__init__.py +6 -0
package/embeddings/encoder.py +45 -0
package/embeddings/faiss_index.py +72 -0
package/git_integration/__init__.py +1 -0
package/git_integration/git_miner.py +78 -0
package/graphs/__init__.py +17 -0
package/graphs/call_graph.py +70 -0
package/graphs/code_graph.py +81 -0
package/graphs/execution_graph.py +35 -0
package/graphs/git_graph.py +43 -0
package/graphs/graph_store.py +25 -0
package/graphs/node_factory.py +21 -0
package/graphs/test_graph.py +65 -0
package/graphs/validators.py +28 -0
package/graphs/variable_graph.py +51 -0
package/knowledge_index/__init__.py +1 -0
package/knowledge_index/index_builder.py +60 -0
package/knowledge_index/models.py +35 -0
package/knowledge_index/query_api.py +38 -0
package/knowledge_index/store.py +23 -0
package/llm_context/__init__.py +6 -0
package/llm_context/context_builder.py +67 -0
package/llm_context/snippet_selector.py +57 -0
package/package.json +14 -0
package/parser/__init__.py +18 -0
package/parser/ast_parser.py +216 -0
package/parser/call_resolver.py +52 -0
package/parser/models.py +75 -0
package/parser/tree_sitter_adapter.py +56 -0
package/parser/variable_extractor.py +31 -0
package/retrieval/__init__.py +17 -0
package/retrieval/cache.py +22 -0
package/retrieval/hybrid_retriever.py +249 -0
package/retrieval/query_parser.py +38 -0
package/retrieval/ranking.py +43 -0
package/retrieval/semantic_retriever.py +39 -0
package/retrieval/symbolic_retriever.py +80 -0
package/scanner/__init__.py +5 -0
package/scanner/file_filters.py +37 -0
package/scanner/models.py +44 -0
package/scanner/repository_scanner.py +55 -0
package/scripts/bootstrap_from_github.ps1 +41 -0
package/tracing/__init__.py +1 -0
package/tracing/runtime_tracer.py +60 -0

package/cli/commands/context_slices.py ADDED Viewed

@@ -0,0 +1,617 @@
+"""CLI command: context slices."""
+from __future__ import annotations
+from pathlib import Path
+import subprocess
+import re
+import json
+from llm_context.snippet_selector import estimate_tokens
+from context.context_router import route_context
+from .context import run_context
+_FRONTEND_KEYWORDS = {
+    "frontend",
+    "ui",
+    "ux",
+    "component",
+    "react",
+    "vue",
+    "svelte",
+    "angular",
+    "css",
+    "style",
+    "layout",
+    "toolbar",
+    "canvas",
+    "page",
+    "view",
+}
+_BACKEND_KEYWORDS = {
+    "backend",
+    "api",
+    "endpoint",
+    "server",
+    "service",
+    "pipeline",
+    "worker",
+    "job",
+    "queue",
+    "model",
+    "schema",
+    "db",
+    "database",
+    "sql",
+    "migration",
+    "redis",
+    "cache",
+    "auth",
+    "controller",
+    "router",
+}
+_WIRING_KEYWORDS = {
+    "wiring",
+    "route",
+    "routes",
+    "router",
+    "entry",
+    "bootstrap",
+    "app",
+    "main",
+    "index",
+    "init",
+}
+_TEST_KEYWORDS = {
+    "test",
+    "tests",
+    "spec",
+    "pytest",
+    "coverage",
+    "regression",
+}
+_PROFILE_SETTINGS = {
+    "recall": {
+        "stage_a_budget": 400,
+        "stage_b_budget": 800,
+        "max_total": 1200,
+        "pin_budget": 300,
+        "include_tests": False,
+    },
+    "low": {
+        "stage_a_budget": 300,
+        "stage_b_budget": 600,
+        "max_total": 800,
+        "pin_budget": 200,
+        "include_tests": False,
+    },
+}
+def _slice_path(repo: str, segment: str) -> str:
+    return str(Path(repo) / segment)
+def _frontend_bias(query: str) -> bool:
+    text = query.lower()
+    return any(keyword in text for keyword in _FRONTEND_KEYWORDS)
+def _backend_bias(query: str) -> bool:
+    text = query.lower()
+    return any(keyword in text for keyword in _BACKEND_KEYWORDS)
+def _wiring_needed(query: str) -> bool:
+    text = query.lower()
+    return any(keyword in text for keyword in _WIRING_KEYWORDS)
+def _needs_tests(query: str, include_tests: bool) -> bool:
+    if include_tests:
+        return True
+    text = query.lower()
+    return any(keyword in text for keyword in _TEST_KEYWORDS)
+def _node_file_path(node_id: str) -> str:
+    if node_id.startswith("file:"):
+        return node_id[len("file:") :]
+    if node_id.startswith("function:"):
+        return node_id[len("function:") :].split("::", 1)[0]
+    if node_id.startswith("class:"):
+        return node_id[len("class:") :].split("::", 1)[0]
+    return ""
+def _infer_slice_from_path(path: str) -> str:
+    lowered = path.replace("\\", "/").lower()
+    if "/frontend/" in lowered or lowered.startswith("frontend/"):
+        return "frontend"
+    if "/backend/" in lowered or lowered.startswith("backend/"):
+        return "backend"
+    if "/tests/" in lowered or lowered.startswith("tests/"):
+        return "tests"
+    return "pin"
+def _is_test_path(path: str) -> bool:
+    lowered = path.replace("\\", "/").lower()
+    if "/tests/" in lowered or lowered.startswith("tests/"):
+        return True
+    filename = Path(lowered).name
+    return (
+        filename.startswith("test_")
+        or filename.endswith("_test.py")
+        or ".test." in filename
+        or ".spec." in filename
+    )
+def _is_wiring_path(path: str) -> bool:
+    lowered = path.replace("\\", "/").lower()
+    filename = Path(lowered).name
+    wiring_names = {
+        "app.py",
+        "main.py",
+        "server.py",
+        "wsgi.py",
+        "asgi.py",
+        "app.js",
+        "app.jsx",
+        "app.ts",
+        "app.tsx",
+        "main.js",
+        "main.jsx",
+        "main.ts",
+        "main.tsx",
+        "index.js",
+        "index.jsx",
+        "index.ts",
+        "index.tsx",
+    }
+    if filename in wiring_names:
+        return True
+    return any(token in lowered for token in ("/routes/", "/router/"))
+def _classify_roles(path: str) -> set[str]:
+    roles: set[str] = set()
+    if _is_test_path(path):
+        roles.add("test")
+        return roles
+    if _is_wiring_path(path):
+        roles.add("wiring")
+    roles.add("implementation")
+    return roles
+def _dedupe_by_file(snippets: list[dict]) -> list[dict]:
+    best: dict[str, dict] = {}
+    for item in snippets:
+        path = _node_file_path(item.get("node_id", ""))
+        if not path:
+            continue
+        current = best.get(path)
+        if current is None or item.get("score", 0.0) > current.get("score", 0.0):
+            best[path] = item
+    return sorted(best.values(), key=lambda s: s.get("score", 0.0), reverse=True)
+_EXCLUDE_GLOBS = ["!**/.gcie/**", "!**/.git/**", "!**/.venv/**", "!**/node_modules/**"]
+_INCLUDE_GLOBS = ["**/*.py", "**/*.md", "**/*.js", "**/*.ts", "**/*.tsx"]
+def _rg_top_files(query: str, top_n: int = 5) -> list[str]:
+    terms = [t for t in re.split(r"[^A-Za-z0-9_]+", query.lower()) if len(t) >= 3]
+    if not terms:
+        return []
+    pattern = "|".join(re.escape(t) for t in sorted(set(terms)))
+    cmd = ["rg", "--count", "-i", pattern]
+    for g in _INCLUDE_GLOBS:
+        cmd.extend(["-g", g])
+    for g in _EXCLUDE_GLOBS:
+        cmd.extend(["-g", g])
+    cmd.append(".")
+    try:
+        proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
+    except Exception:
+        return []
+    counts = {}
+    for line in proc.stdout.splitlines():
+        if ":" not in line:
+            continue
+        path, count = line.rsplit(":", 1)
+        try:
+            counts[path] = int(count.strip())
+        except ValueError:
+            continue
+    ranked = sorted(counts.items(), key=lambda item: item[1], reverse=True)
+    return [path for path, _ in ranked[:top_n]]
+def _index_files_for_query(query: str) -> list[str]:
+    index_path = Path(".gcie") / "architecture_index.json"
+    if not index_path.exists():
+        return []
+    try:
+        data = json.loads(index_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+    tokens = [t for t in re.split(r"[^A-Za-z0-9_]+", query.lower()) if len(t) >= 3]
+    if not tokens:
+        return []
+    files: list[str] = []
+    for subsystem in data.get("subsystems", []):
+        name = (subsystem.get("name") or "").lower()
+        if not name:
+            continue
+        key_files = subsystem.get("key_files", []) or []
+        if any(token in name or name in token for token in tokens):
+            for path in key_files:
+                files.append(path)
+            continue
+        if any(token in (path.lower()) for path in key_files for token in tokens):
+            for path in key_files:
+                files.append(path)
+    file_map = data.get("file_map", {})
+    for path in file_map.keys():
+        lowered = path.lower()
+        if any(token in lowered for token in tokens):
+            files.append(path)
+    return files
+def _file_snippet(path: Path, max_lines: int = 120) -> str:
+    try:
+        lines = path.read_text(encoding="utf-8").splitlines()
+    except Exception:
+        return ""
+    return "\n".join(lines[:max_lines]).strip()
+def _merge_snippets(existing: list[dict], extra: list[dict], max_total: int) -> list[dict]:
+    by_id: dict[str, dict] = {item.get("node_id", ""): item for item in existing}
+    for item in extra:
+        node_id = item.get("node_id", "")
+        if node_id and node_id not in by_id:
+            by_id[node_id] = item
+    merged = list(by_id.values())
+    merged.sort(key=lambda s: s.get("score", 0.0), reverse=True)
+    out: list[dict] = []
+    used = 0
+    for item in merged:
+        tokens = estimate_tokens(item.get("content", ""))
+        if used + tokens > max_total:
+            continue
+        out.append(item)
+        used += tokens
+    return out
+def _total_tokens(snippets: list[dict]) -> int:
+    return sum(estimate_tokens(item.get("content", "")) for item in snippets)
+def _found_roles_by_slice(snippets: list[dict]) -> dict[str, set[str]]:
+    found: dict[str, set[str]] = {}
+    for item in snippets:
+        path = _node_file_path(item.get("node_id", ""))
+        if not path:
+            continue
+        slice_name = item.get("slice", "unknown")
+        roles = _classify_roles(path)
+        found.setdefault(slice_name, set()).update(roles)
+    return found
+def _required_roles_for_slice(
+    slice_name: str,
+    query: str,
+    *,
+    include_tests: bool,
+    pin: str | None,
+    slice_names: set[str],
+) -> set[str]:
+    roles: set[str] = set()
+    wiring_required = _frontend_bias(query) or _wiring_needed(query) or bool(pin)
+    tests_required = _needs_tests(query, include_tests)
+    if slice_name in {"backend", "frontend"}:
+        roles.add("implementation")
+    if wiring_required:
+        if slice_name == "frontend" and "frontend" in slice_names:
+            roles.add("wiring")
+        elif slice_name == "backend" and "frontend" not in slice_names:
+            roles.add("wiring")
+    if tests_required and slice_name == "tests":
+        roles.add("test")
+    return roles
+def _missing_required_slices(
+    snippets: list[dict],
+    slices: list[tuple[str, str]],
+    query: str,
+    *,
+    include_tests: bool,
+    pin: str | None,
+) -> set[str]:
+    slice_names = {name for name, _ in slices}
+    found_roles = _found_roles_by_slice(snippets)
+    missing: set[str] = set()
+    for name, _ in slices:
+        required_roles = _required_roles_for_slice(
+            name,
+            query,
+            include_tests=include_tests,
+            pin=pin,
+            slice_names=slice_names,
+        )
+        if not required_roles:
+            continue
+        if not required_roles.issubset(found_roles.get(name, set())):
+            missing.add(name)
+    return missing
+def _trim_to_budget(snippets: list[dict], max_total: int, required_slices: set[str]) -> list[dict]:
+    # Ensure at least one snippet per required slice, if available.
+    required: list[dict] = []
+    remaining: list[dict] = []
+    seen_required: set[str] = set()
+    for item in snippets:
+        slice_name = item.get("slice")
+        if slice_name in required_slices and slice_name not in seen_required:
+            required.append(item)
+            seen_required.add(slice_name)
+        else:
+            remaining.append(item)
+    ordered = required + remaining
+    out: list[dict] = []
+    used = 0
+    for item in ordered:
+        t = estimate_tokens(item.get("content", ""))
+        if used + t > max_total and item.get("slice") not in required_slices:
+            continue
+        out.append(item)
+        used += t
+        if used >= max_total and all(s in seen_required for s in required_slices):
+            break
+    return out
+def _apply_profile(
+    profile: str | None,
+    *,
+    stage_a_budget: int,
+    stage_b_budget: int,
+    max_total: int,
+    pin_budget: int,
+    include_tests: bool,
+) -> tuple[int, int, int, int, bool, str]:
+    if not profile:
+        return stage_a_budget, stage_b_budget, max_total, pin_budget, include_tests, "custom"
+    key = profile.lower()
+    settings = _PROFILE_SETTINGS.get(key)
+    if settings is None:
+        return stage_a_budget, stage_b_budget, max_total, pin_budget, include_tests, "custom"
+    return (
+        settings["stage_a_budget"],
+        settings["stage_b_budget"],
+        settings["max_total"],
+        settings["pin_budget"],
+        settings["include_tests"],
+        key,
+    )
+def run_context_slices(
+    repo: str,
+    query: str,
+    *,
+    stage_a_budget: int,
+    stage_b_budget: int,
+    max_total: int,
+    intent: str | None,
+    pin: str | None,
+    pin_budget: int,
+    include_tests: bool,
+    profile: str | None = None,
+) -> dict:
+    stage_a_budget, stage_b_budget, max_total, pin_budget, include_tests, profile_used = _apply_profile(
+        profile,
+        stage_a_budget=stage_a_budget,
+        stage_b_budget=stage_b_budget,
+        max_total=max_total,
+        pin_budget=pin_budget,
+        include_tests=include_tests,
+    )
+    payload = route_context(
+        repo,
+        query,
+        intent=intent,
+        max_total=max_total,
+        profile=profile_used,
+        normal_runner=lambda: run_context_slices_normal(
+            repo,
+            query,
+            stage_a_budget=stage_a_budget,
+            stage_b_budget=stage_b_budget,
+            max_total=max_total,
+            intent=intent,
+            pin=pin,
+            pin_budget=pin_budget,
+            include_tests=include_tests,
+            profile=profile_used,
+        ),
+    )
+    if payload.get("mode") == "normal" and payload.get("fallback_reason"):
+        direct = run_context(repo, query, budget=max_total * 2, intent=intent, top_k=60)
+        snippets = direct.get("snippets", [])
+        extra = []
+        index_files = _index_files_for_query(query)
+        if index_files:
+            for rel in index_files:
+                path = Path(rel)
+                if not path.exists():
+                    continue
+                content = _file_snippet(path)
+                if content:
+                    extra.append({"node_id": f"file:{rel}", "score": 0.9, "content": content})
+        else:
+            for rel in _rg_top_files(query, top_n=12):
+                path = Path(rel)
+                if not path.exists():
+                    continue
+                content = _file_snippet(path)
+                if content:
+                    extra.append({"node_id": f"file:{rel}", "score": 0.2, "content": content})
+        snippets = _merge_snippets(snippets, extra, max_total=max_total * 2)
+        payload = {
+            "query": direct.get("query", query),
+            "profile": profile_used,
+            "mode": "direct",
+            "intent": intent,
+            "snippets": snippets,
+            "token_estimate": _total_tokens(snippets),
+            "fallback_reason": payload.get("fallback_reason"),
+            "secondary_fallback": "normal_empty",
+        }
+    return payload
+def run_context_slices_normal(
+    repo: str,
+    query: str,
+    *,
+    stage_a_budget: int,
+    stage_b_budget: int,
+    max_total: int,
+    intent: str | None,
+    pin: str | None,
+    pin_budget: int,
+    include_tests: bool,
+    profile: str | None,
+) -> dict:
+    repo_path = Path(repo)
+    slices = []
+    frontend_bias = _frontend_bias(query)
+    backend_bias = _backend_bias(query)
+    frontend_path = _slice_path(repo, "frontend")
+    backend_path = _slice_path(repo, "backend")
+    tests_path = _slice_path(repo, "tests")
+    if frontend_bias and Path(frontend_path).exists():
+        slices.append(("frontend", frontend_path))
+    if (backend_bias or not frontend_bias) and Path(backend_path).exists():
+        slices.append(("backend", backend_path))
+    if _needs_tests(query, include_tests) and Path(tests_path).exists():
+        slices.append(("tests", tests_path))
+    results: dict[str, dict] = {}
+    collected: list[dict] = []
+    # Pin first (cheap, high signal)
+    if pin:
+        pin_path = str(repo_path / pin)
+        if Path(pin_path).exists():
+            pin_result = run_context(pin_path, query, budget=pin_budget, intent=intent)
+            results["pin"] = pin_result
+            for item in pin_result.get("snippets", []):
+                node_path = _node_file_path(item.get("node_id", ""))
+                item["slice"] = _infer_slice_from_path(node_path)
+                collected.append(item)
+    # Stage A
+    for name, path in slices:
+        if Path(path).exists():
+            res = run_context(path, query, budget=stage_a_budget, intent=intent)
+            results[name] = res
+            for item in res.get("snippets", []):
+                item["slice"] = name
+                collected.append(item)
+    # Stage B only for missing required roles
+    missing = _missing_required_slices(
+        collected,
+        slices,
+        query,
+        include_tests=include_tests,
+        pin=pin,
+    )
+    for name, path in slices:
+        if name in missing and Path(path).exists():
+            res = run_context(path, query, budget=stage_b_budget, intent=intent)
+            results[f"{name}_retry"] = res
+            for item in res.get("snippets", []):
+                item["slice"] = name
+                collected.append(item)
+    deduped = _dedupe_by_file(collected)
+    required_slices = {
+        name
+        for name, _ in slices
+        if _required_roles_for_slice(
+            name,
+            query,
+            include_tests=include_tests,
+            pin=pin,
+            slice_names={n for n, _ in slices},
+        )
+    }
+    missing_after = _missing_required_slices(
+        deduped,
+        slices,
+        query,
+        include_tests=include_tests,
+        pin=pin,
+    )
+    effective_max = max_total if not missing_after else max_total * 4
+    trimmed = _trim_to_budget(deduped, max_total=effective_max, required_slices=required_slices)
+    return {
+        "query": query,
+        "profile": profile,
+        "mode": "normal",
+        "stage_a_budget": stage_a_budget,
+        "stage_b_budget": stage_b_budget,
+        "max_total_tokens": max_total,
+        "intent": intent,
+        "results": results,
+        "snippets": trimmed,
+        "token_estimate": _total_tokens(trimmed),
+    }

package/cli/commands/debug.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""CLI command: debug."""
+from __future__ import annotations
+from pathlib import Path
+import networkx as nx
+from debugging.bug_localizer import localize_bug
+from graphs.call_graph import build_call_graph
+from graphs.variable_graph import build_variable_graph
+from parser.ast_parser import parse_python_file
+def run_debug(path: str, query: str) -> dict[str, list[str]]:
+    target = Path(path)
+    module = parse_python_file(target)
+    graph = nx.compose(build_call_graph((module,)), build_variable_graph((module,)))
+    report = localize_bug(graph, query)
+    return {
+        "relevant_functions": list(report.relevant_functions),
+        "call_chain": list(report.call_chain),
+        "variable_modifications": list(report.variable_modifications),
+    }

package/cli/commands/index.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""CLI command: index."""
+from __future__ import annotations
+from pathlib import Path
+from scanner.repository_scanner import scan_repository
+def run_index(path: str) -> dict[str, int]:
+    manifest = scan_repository(Path(path))
+    return {
+        "total_files": manifest.total_files,
+        "source_files": len(manifest.source_files),
+        "test_files": len(manifest.test_files),
+        "config_files": len(manifest.config_files),
+    }

package/cli/commands/query.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""CLI command: query."""
+from __future__ import annotations
+from pathlib import Path
+import networkx as nx
+from graphs.call_graph import build_call_graph
+from graphs.variable_graph import build_variable_graph
+from parser.ast_parser import parse_python_file
+from retrieval.symbolic_retriever import symbolic_retrieve
+def run_query(path: str, query: str, max_hops: int = 2) -> list[str]:
+    target = Path(path)
+    module = parse_python_file(target)
+    graph = nx.compose(build_call_graph((module,)), build_variable_graph((module,)))
+    candidates = symbolic_retrieve(graph, query, max_hops=max_hops)
+    return [c.node_id for c in candidates]