@pmaddire/gcie 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +6 -2
- package/GCIE_USAGE.md +212 -153
- package/README.md +30 -8
- package/bench_questions.py +69 -0
- package/cli/app.py +198 -162
- package/cli/commands/adaptation.py +341 -0
- package/cli/commands/context.py +682 -34
- package/cli/commands/context_slices.py +1322 -601
- package/cli/commands/setup.py +86 -72
- package/context/architecture_slicer.py +2 -1
- package/llm_context/snippet_selector.py +1 -1
- package/package.json +1 -1
- package/retrieval/hybrid_retriever.py +9 -1
package/cli/commands/setup.py
CHANGED
|
@@ -1,73 +1,87 @@
|
|
|
1
|
-
"""One-command repository setup for GCIE."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from context.architecture_bootstrap import ensure_initialized
|
|
8
|
-
|
|
9
|
-
from .
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
target.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
1
|
+
"""One-command repository setup for GCIE."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from context.architecture_bootstrap import ensure_initialized
|
|
8
|
+
|
|
9
|
+
from .adaptation import run_post_init_adaptation
|
|
10
|
+
from .index import run_index
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _repo_root() -> Path:
|
|
14
|
+
return Path(__file__).resolve().parents[2]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
|
|
18
|
+
if not source.exists():
|
|
19
|
+
return "source_missing"
|
|
20
|
+
if target.exists() and not force:
|
|
21
|
+
return "skipped_existing"
|
|
22
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8")
|
|
24
|
+
return "written"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def run_setup(
|
|
28
|
+
path: str,
|
|
29
|
+
*,
|
|
30
|
+
force: bool = False,
|
|
31
|
+
include_agent_usage: bool = True,
|
|
32
|
+
include_setup_doc: bool = True,
|
|
33
|
+
run_index_pass: bool = True,
|
|
34
|
+
run_adaptation_pass: bool = False,
|
|
35
|
+
adaptation_benchmark_size: int = 10,
|
|
36
|
+
adaptation_efficiency_iterations: int = 5,
|
|
37
|
+
) -> dict:
|
|
38
|
+
"""Initialize a repository so GCIE can be used immediately."""
|
|
39
|
+
target = Path(path).resolve()
|
|
40
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
config = ensure_initialized(target)
|
|
43
|
+
gcie_dir = target / ".gcie"
|
|
44
|
+
|
|
45
|
+
status: dict[str, object] = {
|
|
46
|
+
"repo": target.as_posix(),
|
|
47
|
+
"gcie_dir": gcie_dir.as_posix(),
|
|
48
|
+
"architecture_initialized": True,
|
|
49
|
+
"files": {},
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
source_root = _repo_root()
|
|
53
|
+
copied: dict[str, str] = {}
|
|
54
|
+
|
|
55
|
+
if include_agent_usage:
|
|
56
|
+
copied["GCIE_USAGE.md"] = _copy_if_needed(
|
|
57
|
+
source_root / "GCIE_USAGE.md",
|
|
58
|
+
target / "GCIE_USAGE.md",
|
|
59
|
+
force=force,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if include_setup_doc:
|
|
63
|
+
copied["SETUP_ANY_REPO.md"] = _copy_if_needed(
|
|
64
|
+
source_root / "SETUP_ANY_REPO.md",
|
|
65
|
+
target / "SETUP_ANY_REPO.md",
|
|
66
|
+
force=force,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
status["files"] = copied
|
|
70
|
+
status["context_config"] = config
|
|
71
|
+
|
|
72
|
+
if run_index_pass:
|
|
73
|
+
status["index"] = run_index(target.as_posix())
|
|
74
|
+
else:
|
|
75
|
+
status["index"] = {"skipped": True}
|
|
76
|
+
|
|
77
|
+
if run_adaptation_pass:
|
|
78
|
+
status["adaptation"] = run_post_init_adaptation(
|
|
79
|
+
target.as_posix(),
|
|
80
|
+
benchmark_size=adaptation_benchmark_size,
|
|
81
|
+
efficiency_iterations=adaptation_efficiency_iterations,
|
|
82
|
+
clear_profile=True,
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
status["adaptation"] = {"skipped": True}
|
|
86
|
+
|
|
73
87
|
return status
|
|
@@ -51,6 +51,7 @@ _ARCH_KEYWORDS = {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
|
|
54
55
|
@dataclass
|
|
55
56
|
class ArchitectureSliceResult:
|
|
56
57
|
query: str
|
|
@@ -100,7 +101,7 @@ def _snippet_from_lines(lines: list[str], max_lines: int) -> str:
|
|
|
100
101
|
def _collect_snippets(repo_path: Path, files: list[str], max_lines: int = 120) -> tuple[list[dict], list[str]]:
|
|
101
102
|
snippets: list[dict] = []
|
|
102
103
|
missing: list[str] = []
|
|
103
|
-
for rel_path in files:
|
|
104
|
+
for idx, rel_path in enumerate(files):
|
|
104
105
|
file_path = repo_path / rel_path
|
|
105
106
|
if not file_path.exists():
|
|
106
107
|
missing.append(rel_path)
|
package/package.json
CHANGED
|
@@ -126,7 +126,14 @@ def _semantic_node_scores(
|
|
|
126
126
|
return {}, ()
|
|
127
127
|
|
|
128
128
|
retriever = SemanticRetriever([text for _, text in entries])
|
|
129
|
-
|
|
129
|
+
# Keep semantic fan-out intentionally bounded to reduce noisy candidates.
|
|
130
|
+
if len(query_terms) >= 10:
|
|
131
|
+
fanout = 3.0
|
|
132
|
+
elif len(query_terms) >= 6:
|
|
133
|
+
fanout = 2.5
|
|
134
|
+
else:
|
|
135
|
+
fanout = 2.0
|
|
136
|
+
semantic_top_k = min(max(int(round(top_k * fanout)), max(12, top_k + 4)), len(entries))
|
|
130
137
|
hits = retriever.retrieve(query, top_k=semantic_top_k)
|
|
131
138
|
|
|
132
139
|
aggregates: dict[str, _SemanticAggregate] = {}
|
|
@@ -247,3 +254,4 @@ def hybrid_retrieve(
|
|
|
247
254
|
out.append(HybridCandidate(node_id=item.node_id, score=item.score, rationale=rationale))
|
|
248
255
|
|
|
249
256
|
return tuple(out)
|
|
257
|
+
|