graperoot 3.9.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graperoot/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ """Dual-graph context engine for AI coding agents."""
2
+ __version__ = "3.9.24"
@@ -0,0 +1,539 @@
1
+ #!/Users/krishnakant/.dual-graph/venv/bin/python3
2
+ """Core context packing module for DGC pre-injection mode (v3.8.37).
3
+
4
+ Generates compact, token-efficient context blocks from the dual-graph
5
+ retrieval results. Output is markdown suitable for injection into
6
+ Claude's system prompt before the first user turn.
7
+
8
+ v3.8.33 changes:
9
+ - Budget default 3000 → 5000
10
+ - Full structured summaries (params, returns, calls) instead of 200-char truncation
11
+ - Code-first priority: inline code gets budget before edges
12
+ - More functions inlined (top 3 per file instead of 2)
13
+
14
+ v3.8.34 changes:
15
+ - Bulletproof Python venv setup (5-level fallback chain)
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import sys
22
+ from math import ceil
23
+ from pathlib import Path
24
+ from typing import Any
25
+
26
+ from graperoot.dg import retrieve as _dg_retrieve, load_graph as _dg_load_graph, classify_intent as _classify_intent
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # 1. load_summaries
31
+ # ---------------------------------------------------------------------------
32
+
33
+ def load_summaries(project_root: Path) -> dict:
34
+ """Load structured_summaries.json from the project's .dual-graph/ dir."""
35
+ path = project_root / ".dual-graph" / "structured_summaries.json"
36
+ if not path.exists():
37
+ return {}
38
+ try:
39
+ data = json.loads(path.read_text(encoding="utf-8"))
40
+ if isinstance(data, dict):
41
+ return data
42
+ return {}
43
+ except (json.JSONDecodeError, OSError):
44
+ return {}
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # 2. expand_summary (replaces condense_summary)
49
+ # ---------------------------------------------------------------------------
50
+
51
+ def expand_summary(summary: dict) -> str:
52
+ """Convert a structured summary into a rich multi-line block.
53
+
54
+ Includes function signatures with params/returns, internal call graph,
55
+ class hierarchies, and key variables. No arbitrary truncation.
56
+
57
+ Output example:
58
+ Order management API endpoints.
59
+ Functions:
60
+ get_orders(db: Session, status: str = None) -> list[OrderResponse]
61
+ calls: order_to_response, db.query | decorators: @router.get
62
+ create_order(db: Session, data: OrderCreate) -> Order
63
+ calls: validate_menu_items
64
+ Classes: OrderResponse(BaseModel), OrderCreate(BaseModel)
65
+ Vars: router (used by: get_orders, create_order)
66
+ """
67
+ if not summary or not isinstance(summary, dict):
68
+ return ""
69
+
70
+ lines: list[str] = []
71
+
72
+ # Description
73
+ desc = summary.get("description", "").strip()
74
+ if desc:
75
+ if not desc.endswith("."):
76
+ desc += "."
77
+ lines.append(desc)
78
+
79
+ # Functions — full detail
80
+ funcs = summary.get("functions") or {}
81
+ if isinstance(funcs, dict) and funcs:
82
+ lines.append("Functions:")
83
+ for fname, finfo in list(funcs.items())[:8]:
84
+ if not fname:
85
+ continue
86
+ params = finfo.get("params", "")
87
+ if isinstance(params, list):
88
+ params = ", ".join(params)
89
+ ret = finfo.get("returns", "")
90
+ sig = f" {fname}({params})"
91
+ if ret:
92
+ sig += f" -> {ret}"
93
+ # Add async marker
94
+ if finfo.get("async"):
95
+ sig = f" async {fname}({params})"
96
+ if ret:
97
+ sig += f" -> {ret}"
98
+ lines.append(sig)
99
+
100
+ # Sub-details: calls, decorators
101
+ details: list[str] = []
102
+ calls_int = finfo.get("calls_internal", [])
103
+ calls_ext = finfo.get("calls_external", [])
104
+ all_calls = (calls_int or []) + (calls_ext or [])
105
+ if all_calls:
106
+ details.append("calls: " + ", ".join(all_calls[:6]))
107
+ decorators = finfo.get("decorators", [])
108
+ if decorators:
109
+ details.append("decorators: " + ", ".join(decorators[:3]))
110
+ if details:
111
+ lines.append(" " + " | ".join(details))
112
+
113
+ # Classes — with bases and methods
114
+ classes = summary.get("classes") or {}
115
+ if isinstance(classes, dict) and classes:
116
+ cls_parts: list[str] = []
117
+ for cname, cinfo in list(classes.items())[:5]:
118
+ bases = cinfo.get("bases", "")
119
+ if isinstance(bases, list):
120
+ bases = ", ".join(bases)
121
+ label = f"{cname}({bases})" if bases else cname
122
+ methods = cinfo.get("methods", [])
123
+ if methods and isinstance(methods, list):
124
+ label += f" [{', '.join(methods[:4])}]"
125
+ cls_parts.append(label)
126
+ lines.append("Classes: " + ", ".join(cls_parts))
127
+
128
+ # Variables — with usage info
129
+ variables = summary.get("variables", [])
130
+ if variables:
131
+ var_parts: list[str] = []
132
+ for v in variables[:6]:
133
+ if isinstance(v, dict):
134
+ name = v.get("name", "")
135
+ used_by = v.get("used_by", [])
136
+ if name:
137
+ if used_by:
138
+ var_parts.append(f"{name} (used by: {', '.join(used_by[:3])})")
139
+ else:
140
+ var_parts.append(name)
141
+ elif isinstance(v, str):
142
+ var_parts.append(v)
143
+ if var_parts:
144
+ lines.append("Vars: " + ", ".join(var_parts))
145
+
146
+ # Internal call graph
147
+ internal_graph = summary.get("internal_graph", [])
148
+ if internal_graph and isinstance(internal_graph, list):
149
+ graph_strs: list[str] = []
150
+ for edge in internal_graph[:6]:
151
+ if isinstance(edge, dict):
152
+ frm = edge.get("from", "")
153
+ to = edge.get("to", "")
154
+ rel = edge.get("rel", "calls")
155
+ if frm and to:
156
+ graph_strs.append(f"{frm}->{to}")
157
+ if graph_strs:
158
+ lines.append("Call graph: " + ", ".join(graph_strs))
159
+
160
+ # Imports
161
+ imports = summary.get("imports", [])
162
+ if imports and isinstance(imports, list):
163
+ imp_strs: list[str] = []
164
+ for imp in imports[:5]:
165
+ if isinstance(imp, dict):
166
+ mod = imp.get("module", "")
167
+ names = imp.get("names", [])
168
+ if mod:
169
+ if names:
170
+ imp_strs.append(f"{mod}({', '.join(names[:4])})")
171
+ else:
172
+ imp_strs.append(mod)
173
+ if imp_strs:
174
+ lines.append("Imports: " + ", ".join(imp_strs))
175
+
176
+ return "\n".join(lines)
177
+
178
+
179
+ def condense_summary(summary: dict) -> str:
180
+ """Backward-compatible one-liner summary (used as fallback)."""
181
+ if not summary or not isinstance(summary, dict):
182
+ return ""
183
+ parts: list[str] = []
184
+ desc = summary.get("description", "").strip()
185
+ if desc:
186
+ if not desc.endswith("."):
187
+ desc += "."
188
+ parts.append(desc)
189
+ funcs = summary.get("functions") or {}
190
+ if isinstance(funcs, dict) and funcs:
191
+ sigs = []
192
+ for fname, finfo in list(funcs.items())[:5]:
193
+ if not fname:
194
+ continue
195
+ params = finfo.get("params", "")
196
+ if isinstance(params, list):
197
+ params = ", ".join(params)
198
+ ret = finfo.get("returns", "")
199
+ sig = f"{fname}({params})"
200
+ if ret:
201
+ sig += f" -> {ret}"
202
+ sigs.append(sig)
203
+ if sigs:
204
+ parts.append("Functions: " + ", ".join(sigs))
205
+ text = " | ".join(parts) if len(parts) > 1 else (parts[0] if parts else "")
206
+ return text[:300]
207
+
208
+
209
+ # ---------------------------------------------------------------------------
210
+ # 3. estimate_tokens
211
+ # ---------------------------------------------------------------------------
212
+
213
+ def estimate_tokens(text: str) -> int:
214
+ """Rough token estimate: ceil(len / 3.2)."""
215
+ if not text:
216
+ return 0
217
+ return ceil(len(text) / 3.2)
218
+
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # 4. read_symbol_content
222
+ # ---------------------------------------------------------------------------
223
+
224
+ def read_symbol_content(
225
+ project_root: Path,
226
+ file_id: str,
227
+ functions: dict,
228
+ max_lines: int = 80,
229
+ ) -> str:
230
+ """Read function bodies from a file for inline inclusion.
231
+
232
+ v3.8.33: bumped max_lines 60→80, reads top 3 functions instead of 2.
233
+ """
234
+ file_path = project_root / file_id
235
+ if not file_path.exists():
236
+ return ""
237
+ try:
238
+ all_lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
239
+ except OSError:
240
+ return ""
241
+
242
+ if not functions:
243
+ return ""
244
+
245
+ sorted_fns = sorted(
246
+ functions.items(),
247
+ key=lambda kv: kv[1].get("line_start", 0),
248
+ )
249
+
250
+ chunks: list[str] = []
251
+ total_lines = 0
252
+
253
+ for fn_name, fn_info in sorted_fns[:3]: # was [:2]
254
+ start = fn_info.get("line_start", 0)
255
+ end = fn_info.get("line_end", start + 20)
256
+ start = max(0, start)
257
+ end = min(len(all_lines) - 1, end)
258
+ span = end - start + 1
259
+
260
+ if total_lines + span > max_lines:
261
+ end = start + (max_lines - total_lines) - 1
262
+ if end < start:
263
+ break
264
+ span = end - start + 1
265
+
266
+ header = f"# {file_id}::{fn_name} (lines {start + 1}-{end + 1})"
267
+ body = "\n".join(all_lines[start : end + 1])
268
+ chunks.append(f"{header}\n{body}")
269
+ total_lines += span
270
+
271
+ if total_lines >= max_lines:
272
+ break
273
+
274
+ return "\n\n".join(chunks)
275
+
276
+
277
+ # ---------------------------------------------------------------------------
278
+ # 5. pack (v3.8.33 — reordered priorities)
279
+ # ---------------------------------------------------------------------------
280
+
281
+ def _file_id(entry: Any) -> str:
282
+ if isinstance(entry, dict):
283
+ return str(entry.get("id", ""))
284
+ return str(entry)
285
+
286
+
287
+ def _basename(path: str) -> str:
288
+ return path.rsplit("/", 1)[-1] if "/" in path else path
289
+
290
+
291
+ def pack(
292
+ retrieve_result: Any,
293
+ summaries: dict,
294
+ graph: dict,
295
+ project_root: Path,
296
+ token_budget: int = 5000,
297
+ max_files: int = 8,
298
+ max_read_targets: int = 8,
299
+ ) -> str:
300
+ """Build a compact markdown context block from retrieval results.
301
+
302
+ v3.8.33 priority order (code-first):
303
+ 1. Relevant Files — file path + FULL structured summary
304
+ 2. Key Code — inline function bodies (gets priority over edges)
305
+ 3. Recommended Reads — function locations with line numbers
306
+ 4. Key Relationships — import/call/extends edges (lowest priority)
307
+ """
308
+ files_list = getattr(retrieve_result, "files", []) if not isinstance(retrieve_result, dict) else retrieve_result.get("files", [])
309
+ edges_list = getattr(retrieve_result, "edges", []) if not isinstance(retrieve_result, dict) else retrieve_result.get("edges", [])
310
+
311
+ file_ids: list[str] = []
312
+ for entry in files_list[:max_files]:
313
+ fid = _file_id(entry)
314
+ if fid:
315
+ file_ids.append(fid)
316
+
317
+ chosen_set = set(file_ids)
318
+
319
+ # ── Section 1: Relevant Files (full structured summaries) ────────────
320
+ sec1_lines: list[str] = ["### Relevant Files"]
321
+ for fid in file_ids:
322
+ base_file = fid.split("::")[0] if "::" in fid else fid
323
+ summary_obj = summaries.get(base_file, {})
324
+
325
+ # Try full expanded summary first, fall back to condensed
326
+ expanded = expand_summary(summary_obj) if summary_obj else ""
327
+ if expanded:
328
+ # Indent multi-line summaries under the file path
329
+ summary_lines = expanded.split("\n")
330
+ sec1_lines.append(f"- **`{fid}`**")
331
+ for sl in summary_lines:
332
+ sec1_lines.append(f" {sl}")
333
+ else:
334
+ # Fall back to graph node summary
335
+ fallback = ""
336
+ for node in graph.get("nodes", []):
337
+ if node.get("id") == fid or node.get("id") == base_file:
338
+ fallback = (node.get("summary", "") or "")[:300]
339
+ break
340
+ if fallback:
341
+ sec1_lines.append(f"- **`{fid}`**: {fallback}")
342
+ else:
343
+ sec1_lines.append(f"- **`{fid}`**")
344
+
345
+ sec1 = "\n".join(sec1_lines)
346
+ sec1_tokens = estimate_tokens(sec1)
347
+
348
+ # ── Header ───────────────────────────────────────────────────────────
349
+ header = "## Project Context (auto-generated by DGC v3.8.37)\n"
350
+ header_tokens = estimate_tokens(header)
351
+
352
+ remaining = token_budget - header_tokens - sec1_tokens
353
+
354
+ # ── Section 2 (was 4): Key Code — PRIORITY over edges ───────────────
355
+ sec_code = ""
356
+ if remaining > 400:
357
+ code_budget = min(remaining, int(token_budget * 0.45)) # up to 45% for code
358
+ sec_code_parts: list[str] = ["### Key Code (pre-loaded)"]
359
+ code_tokens_used = estimate_tokens(sec_code_parts[0])
360
+
361
+ for fid in file_ids[:4]: # was [:3]
362
+ if code_budget - code_tokens_used < 200:
363
+ break
364
+ base_file = fid.split("::")[0] if "::" in fid else fid
365
+ summary_obj = summaries.get(base_file, {})
366
+ funcs_map = summary_obj.get("functions") or {}
367
+
368
+ fn_dict: dict[str, dict] = {}
369
+ if isinstance(funcs_map, dict) and funcs_map:
370
+ for fn_name, fn_info in list(funcs_map.items())[:3]: # was [:2]
371
+ if not fn_name:
372
+ continue
373
+ lines_arr = fn_info.get("lines", [])
374
+ if isinstance(lines_arr, list) and len(lines_arr) == 2:
375
+ fn_dict[fn_name] = {
376
+ "line_start": lines_arr[0],
377
+ "line_end": lines_arr[1],
378
+ "description": fn_info.get("description", ""),
379
+ }
380
+ else:
381
+ for node in graph.get("nodes", []):
382
+ if node.get("kind") == "symbol" and node.get("path") == base_file:
383
+ fn_dict[node.get("name", "")] = {
384
+ "line_start": node.get("line_start", 0),
385
+ "line_end": node.get("line_end", 20),
386
+ }
387
+ if len(fn_dict) >= 3:
388
+ break
389
+
390
+ if not fn_dict:
391
+ continue
392
+
393
+ code = read_symbol_content(project_root, base_file, fn_dict, max_lines=80)
394
+ if not code:
395
+ continue
396
+
397
+ ext = base_file.rsplit(".", 1)[-1] if "." in base_file else ""
398
+ lang_map = {"py": "python", "ts": "typescript", "tsx": "tsx", "js": "javascript", "jsx": "jsx", "go": "go"}
399
+ lang = lang_map.get(ext, ext)
400
+
401
+ block = f"```{lang}\n{code}\n```"
402
+ block_tokens = estimate_tokens(block)
403
+
404
+ if code_tokens_used + block_tokens > code_budget:
405
+ char_budget = int((code_budget - code_tokens_used) * 3.2)
406
+ if char_budget > 100:
407
+ truncated = code[:char_budget].rsplit("\n", 1)[0]
408
+ block = f"```{lang}\n{truncated}\n# ... truncated\n```"
409
+ sec_code_parts.append(block)
410
+ break
411
+
412
+ sec_code_parts.append(block)
413
+ code_tokens_used += block_tokens
414
+
415
+ if len(sec_code_parts) > 1:
416
+ sec_code = "\n".join(sec_code_parts)
417
+ remaining -= estimate_tokens(sec_code)
418
+
419
+ # ── Section 3: Recommended Reads ─────────────────────────────────────
420
+ sec_reads = ""
421
+ if remaining > 200:
422
+ sec3_lines: list[str] = ["### Recommended Reads"]
423
+ read_targets_added = 0
424
+
425
+ for fid in file_ids:
426
+ if read_targets_added >= max_read_targets:
427
+ break
428
+ base_file = fid.split("::")[0] if "::" in fid else fid
429
+ summary_obj = summaries.get(base_file, {})
430
+ funcs = summary_obj.get("functions", [])
431
+ if not funcs:
432
+ for node in graph.get("nodes", []):
433
+ if node.get("kind") == "symbol" and node.get("path") == base_file:
434
+ line_s = node.get("line_start", 0) + 1
435
+ line_e = node.get("line_end", line_s) + 1
436
+ name = node.get("name", "unknown")
437
+ sec3_lines.append(f"- `{base_file}` lines {line_s}-{line_e}: `{name}()`")
438
+ read_targets_added += 1
439
+ if read_targets_added >= max_read_targets:
440
+ break
441
+ continue
442
+
443
+ funcs_iter = funcs.items() if isinstance(funcs, dict) else []
444
+ for fn_name, fn_info in list(funcs_iter)[:3]:
445
+ if read_targets_added >= max_read_targets:
446
+ break
447
+ if not fn_name:
448
+ continue
449
+ lines_arr = fn_info.get("lines", [])
450
+ if isinstance(lines_arr, list) and len(lines_arr) == 2:
451
+ line_s, line_e = lines_arr
452
+ else:
453
+ continue
454
+ if line_s == 0 and line_e == 0:
455
+ continue
456
+ display_start = line_s + 1 if line_s >= 0 else line_s
457
+ display_end = line_e + 1 if line_e >= 0 else line_e
458
+ desc = fn_info.get("description", "")
459
+ desc_part = f" -- {desc}" if desc else ""
460
+ sec3_lines.append(
461
+ f"- `{base_file}` lines {display_start}-{display_end}: `{fn_name}()`{desc_part}"
462
+ )
463
+ read_targets_added += 1
464
+
465
+ if read_targets_added > 0:
466
+ sec_reads = "\n".join(sec3_lines)
467
+ remaining -= estimate_tokens(sec_reads)
468
+
469
+ # ── Section 4: Key Relationships (lowest priority) ───────────────────
470
+ sec_edges = ""
471
+ if remaining > 150:
472
+ sec4_lines: list[str] = ["### Key Relationships"]
473
+ relevant_rels = {"imports", "calls", "extends", "implements", "requires", "contains", "references"}
474
+ edge_count = 0
475
+ for edge in edges_list:
476
+ if edge_count >= 10:
477
+ break
478
+ frm = str(edge.get("from", ""))
479
+ to = str(edge.get("to", ""))
480
+ rel = str(edge.get("rel", ""))
481
+ if frm not in chosen_set and to not in chosen_set:
482
+ continue
483
+ if rel not in relevant_rels:
484
+ continue
485
+ sec4_lines.append(f"- `{_basename(frm)}` --{rel}--> `{_basename(to)}`")
486
+ edge_count += 1
487
+
488
+ if edge_count > 0:
489
+ sec_edges = "\n".join(sec4_lines)
490
+
491
+ # ── Assemble final output ────────────────────────────────────────────
492
+ sections = [header, sec1]
493
+ if sec_code:
494
+ sections.append(sec_code)
495
+ if sec_reads:
496
+ sections.append(sec_reads)
497
+ if sec_edges:
498
+ sections.append(sec_edges)
499
+
500
+ return "\n\n".join(sections)
501
+
502
+
503
+ # ---------------------------------------------------------------------------
504
+ # 6. pack_for_query
505
+ # ---------------------------------------------------------------------------
506
+
507
+ def pack_for_query(
508
+ query: str,
509
+ project_root: Path,
510
+ token_budget: int = 5000,
511
+ ) -> str:
512
+ """Convenience function: full retrieval + packing pipeline."""
513
+ graph = _dg_load_graph()
514
+ result = _dg_retrieve(graph, query, top_files=8, top_edges=40)
515
+ summaries = load_summaries(project_root)
516
+ return pack(
517
+ retrieve_result=result,
518
+ summaries=summaries,
519
+ graph=graph,
520
+ project_root=project_root,
521
+ token_budget=token_budget,
522
+ )
523
+
524
+
525
+ # ---------------------------------------------------------------------------
526
+ # __main__
527
+ # ---------------------------------------------------------------------------
528
+
529
+ def main() -> None:
530
+ if len(sys.argv) < 2:
531
+ print("Usage: context-packer \"your query\" [project_root]", file=sys.stderr)
532
+ raise SystemExit(1)
533
+ query = sys.argv[1]
534
+ root = Path(sys.argv[2]) if len(sys.argv) > 2 else Path(".")
535
+ print(pack_for_query(query, root.resolve()))
536
+
537
+
538
+ if __name__ == "__main__":
539
+ main()