chronicle-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. chronicle_sdk-0.1.0/PKG-INFO +398 -0
  2. chronicle_sdk-0.1.0/README.md +379 -0
  3. chronicle_sdk-0.1.0/pyproject.toml +30 -0
  4. chronicle_sdk-0.1.0/setup.cfg +4 -0
  5. chronicle_sdk-0.1.0/src/chronicle/__init__.py +46 -0
  6. chronicle_sdk-0.1.0/src/chronicle/api.py +1381 -0
  7. chronicle_sdk-0.1.0/src/chronicle/cli.py +622 -0
  8. chronicle_sdk-0.1.0/src/chronicle/core/__init__.py +51 -0
  9. chronicle_sdk-0.1.0/src/chronicle/core/config.py +51 -0
  10. chronicle_sdk-0.1.0/src/chronicle/core/errors.py +10 -0
  11. chronicle_sdk-0.1.0/src/chronicle/core/interfaces.py +94 -0
  12. chronicle_sdk-0.1.0/src/chronicle/core/logging.py +13 -0
  13. chronicle_sdk-0.1.0/src/chronicle/core/models.py +311 -0
  14. chronicle_sdk-0.1.0/src/chronicle/core/pydantic_compat.py +72 -0
  15. chronicle_sdk-0.1.0/src/chronicle/eval/__init__.py +4 -0
  16. chronicle_sdk-0.1.0/src/chronicle/eval/ab_tests.py +7 -0
  17. chronicle_sdk-0.1.0/src/chronicle/eval/accuracy_eval.py +7 -0
  18. chronicle_sdk-0.1.0/src/chronicle/eval/benchmarks.py +8 -0
  19. chronicle_sdk-0.1.0/src/chronicle/eval/retrieval_quality.py +9 -0
  20. chronicle_sdk-0.1.0/src/chronicle/eval/token_savings.py +71 -0
  21. chronicle_sdk-0.1.0/src/chronicle/indexer/__init__.py +16 -0
  22. chronicle_sdk-0.1.0/src/chronicle/indexer/ast_parser.py +38 -0
  23. chronicle_sdk-0.1.0/src/chronicle/indexer/call_graph_builder.py +49 -0
  24. chronicle_sdk-0.1.0/src/chronicle/indexer/dependency_graph_builder.py +25 -0
  25. chronicle_sdk-0.1.0/src/chronicle/indexer/git_evolution_analyzer.py +119 -0
  26. chronicle_sdk-0.1.0/src/chronicle/indexer/repo_scanner.py +26 -0
  27. chronicle_sdk-0.1.0/src/chronicle/indexer/symbol_extractor.py +92 -0
  28. chronicle_sdk-0.1.0/src/chronicle/integrations/__init__.py +4 -0
  29. chronicle_sdk-0.1.0/src/chronicle/integrations/langgraph.py +5 -0
  30. chronicle_sdk-0.1.0/src/chronicle/integrations/langgraph_node.py +33 -0
  31. chronicle_sdk-0.1.0/src/chronicle/integrations/mcp_server.py +62 -0
  32. chronicle_sdk-0.1.0/src/chronicle/llm/__init__.py +6 -0
  33. chronicle_sdk-0.1.0/src/chronicle/llm/guardrails.py +39 -0
  34. chronicle_sdk-0.1.0/src/chronicle/llm/prompts.py +37 -0
  35. chronicle_sdk-0.1.0/src/chronicle/llm/providers.py +17 -0
  36. chronicle_sdk-0.1.0/src/chronicle/llm/router.py +109 -0
  37. chronicle_sdk-0.1.0/src/chronicle/llm/token_usage.py +25 -0
  38. chronicle_sdk-0.1.0/src/chronicle/memory/__init__.py +15 -0
  39. chronicle_sdk-0.1.0/src/chronicle/memory/agent_bus_store.py +301 -0
  40. chronicle_sdk-0.1.0/src/chronicle/memory/migrations.py +1 -0
  41. chronicle_sdk-0.1.0/src/chronicle/memory/schema.py +18 -0
  42. chronicle_sdk-0.1.0/src/chronicle/memory/session_store.py +240 -0
  43. chronicle_sdk-0.1.0/src/chronicle/memory/sqlite_store.py +62 -0
  44. chronicle_sdk-0.1.0/src/chronicle/memory/store.py +26 -0
  45. chronicle_sdk-0.1.0/src/chronicle/memory/vector_store.py +15 -0
  46. chronicle_sdk-0.1.0/src/chronicle/models.py +1 -0
  47. chronicle_sdk-0.1.0/src/chronicle/ollama.py +118 -0
  48. chronicle_sdk-0.1.0/src/chronicle/pipeline.py +10 -0
  49. chronicle_sdk-0.1.0/src/chronicle/remote_repo.py +78 -0
  50. chronicle_sdk-0.1.0/src/chronicle/retrieval/__init__.py +13 -0
  51. chronicle_sdk-0.1.0/src/chronicle/retrieval/call_chain.py +112 -0
  52. chronicle_sdk-0.1.0/src/chronicle/retrieval/context_builder.py +65 -0
  53. chronicle_sdk-0.1.0/src/chronicle/retrieval/context_compressor.py +158 -0
  54. chronicle_sdk-0.1.0/src/chronicle/retrieval/graph_ranker.py +18 -0
  55. chronicle_sdk-0.1.0/src/chronicle/retrieval/patch_context.py +197 -0
  56. chronicle_sdk-0.1.0/src/chronicle/retrieval/provenance.py +28 -0
  57. chronicle_sdk-0.1.0/src/chronicle/retrieval/query_planner.py +141 -0
  58. chronicle_sdk-0.1.0/src/chronicle/retrieval/retrieval_orchestrator.py +804 -0
  59. chronicle_sdk-0.1.0/src/chronicle/retrieval/symbol_ranker.py +121 -0
  60. chronicle_sdk-0.1.0/src/chronicle/retrieval/token_budget.py +17 -0
  61. chronicle_sdk-0.1.0/src/chronicle/service/__init__.py +5 -0
  62. chronicle_sdk-0.1.0/src/chronicle/service/app.py +1569 -0
  63. chronicle_sdk-0.1.0/src/chronicle/validation/__init__.py +3 -0
  64. chronicle_sdk-0.1.0/src/chronicle/validation/grounding_checker.py +100 -0
  65. chronicle_sdk-0.1.0/src/chronicle/validation/hallucination_checker.py +12 -0
  66. chronicle_sdk-0.1.0/src/chronicle/validation/output_validator.py +38 -0
  67. chronicle_sdk-0.1.0/src/chronicle/validation/patch_validator.py +17 -0
  68. chronicle_sdk-0.1.0/src/chronicle_sdk.egg-info/PKG-INFO +398 -0
  69. chronicle_sdk-0.1.0/src/chronicle_sdk.egg-info/SOURCES.txt +75 -0
  70. chronicle_sdk-0.1.0/src/chronicle_sdk.egg-info/dependency_links.txt +1 -0
  71. chronicle_sdk-0.1.0/src/chronicle_sdk.egg-info/entry_points.txt +3 -0
  72. chronicle_sdk-0.1.0/src/chronicle_sdk.egg-info/requires.txt +14 -0
  73. chronicle_sdk-0.1.0/src/chronicle_sdk.egg-info/top_level.txt +1 -0
  74. chronicle_sdk-0.1.0/tests/test_graph.py +66 -0
  75. chronicle_sdk-0.1.0/tests/test_reasoning.py +883 -0
  76. chronicle_sdk-0.1.0/tests/test_remote_repo.py +56 -0
  77. chronicle_sdk-0.1.0/tests/test_scanner.py +30 -0
@@ -0,0 +1,398 @@
1
+ Metadata-Version: 2.4
2
+ Name: chronicle-sdk
3
+ Version: 0.1.0
4
+ Summary: Python SDK for grounded context compression and LLM routing in coding workflows.
5
+ Author: Animesh Dutta
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: GitPython>=3.1.43
9
+ Requires-Dist: numpy>=1.26
10
+ Requires-Dist: pydantic>=2.8
11
+ Provides-Extra: faiss
12
+ Requires-Dist: faiss-cpu>=1.8.0; extra == "faiss"
13
+ Provides-Extra: hosted
14
+ Requires-Dist: fastapi>=0.115; extra == "hosted"
15
+ Requires-Dist: uvicorn>=0.30; extra == "hosted"
16
+ Provides-Extra: treesitter
17
+ Requires-Dist: tree-sitter>=0.25.0; extra == "treesitter"
18
+ Requires-Dist: tree-sitter-python>=0.23.0; extra == "treesitter"
19
+
20
+ # Chronicle
21
+
22
+ Chronicle is an **AI Context Operating System for Coding Agents**.
23
+
24
+ It indexes repository structure, ranks the smallest useful context for a coding task, compresses that context to fit a token budget, records provenance for every chunk, and decides whether an LLM is needed at all.
25
+
26
+ ## What Chronicle optimizes
27
+
28
+ - Accuracy per token
29
+ - Deterministic retrieval before probabilistic reasoning
30
+ - Grounded context with file and symbol provenance
31
+ - Lower token spend without lowering answer quality
32
+
33
+ ## Current MVP capabilities
34
+
35
+ - Python AST indexing for functions, classes, and methods
36
+ - Symbol graph and import dependency graph construction
37
+ - Git evolution summaries for churn, risky changes, and symbol-to-file history
38
+ - Deterministic query planning and context ranking
39
+ - Ownership-aware context shaping that separates direct behavior, runtime wiring, and adjacent helpers
40
+ - Token-budget-aware context compression
41
+ - LLM routing decisions without forcing an LLM call
42
+ - Output validation for grounded file and symbol references
43
+ - Secret redaction guardrails before external LLM calls
44
+ - SQLite-backed snapshot persistence with JSON compatibility
45
+ - SQLite-backed session memory for multi-turn context recall
46
+ - Patch-aware contexting for edited symbols, callers/callees, tests, and interfaces
47
+ - Multi-agent context bus for planner/coder/reviewer/critic handoffs
48
+ - Confidence gating to block low-signal LLM calls before they waste tokens
49
+ - Grounded repair loop to retry weak answers using the same validated context
50
+ - MCP-compatible integration scaffold for external agent systems
51
+ - Evaluation metrics with benchmark confidence and recommendation output
52
+
53
+ ## Current language support
54
+
55
+ - Production-usable now: Python `.py` repositories
56
+ - Partial only: notebook-heavy Python repos where most logic lives in `.ipynb`
57
+ - Planned next: Go, Rust, TypeScript / JavaScript
58
+ - Later: C / C++
59
+
60
+ Chronicle should not be treated as benchmark-grade on non-Python repos until symbol extraction, dependency understanding, patch-aware retrieval, and grounding are implemented and validated for that language.
61
+
62
+ ## Python SDK
63
+
64
+ Chronicle is ready to ship as a public Python package while still working against private codebases in your own environment before LLM calls.
65
+
66
+ ### Install from PyPI
67
+
68
+ ```bash
69
+ pip install chronicle-sdk
70
+ ```
71
+
72
+ ## Quick start
73
+
74
+ ```bash
75
+ python3 -m venv .venv
76
+ source .venv/bin/activate
77
+ pip install -e .
78
+ ```
79
+
80
+ Run the hosted alpha API locally:
81
+
82
+ ```bash
83
+ pip install -e .[hosted]
84
+ chronicle-api
85
+ ```
86
+
87
+ Index a repository:
88
+
89
+ ```bash
90
+ chronicle index --repo /path/to/repo
91
+ ```
92
+
93
+ Retrieve grounded context:
94
+
95
+ ```bash
96
+ chronicle context "Where is auth token refresh handled?" --repo /path/to/repo --token-budget 3000
97
+ ```
98
+
99
+ Get the full machine-auditable payload when needed:
100
+
101
+ ```bash
102
+ chronicle context "Where is auth token refresh handled?" --repo /path/to/repo --token-budget 3000 --view full
103
+ ```
104
+
105
+ Evaluate Chronicle against a baseline:
106
+
107
+ ```bash
108
+ chronicle evaluate "Where is auth token refresh handled?" --repo /path/to/repo --token-budget 3000
109
+ ```
110
+
111
+ Diagnose whether a repo indexed correctly:
112
+
113
+ ```bash
114
+ chronicle doctor --repo /path/to/repo --query "Where is RequestContext defined?" --token-budget 2500
115
+ ```
116
+
117
+ Run an end-to-end token-savings demo:
118
+
119
+ ```bash
120
+ chronicle demo "Where is RequestContext defined?" --repo-url https://github.com/pallets/flask.git --token-budget 2500
121
+ ```
122
+
123
+ Run an A/B LLM comparison with and without Chronicle context:
124
+
125
+ ```bash
126
+ chronicle ab-test "Where is full_dispatch_request defined?" \
127
+ --repo-url https://github.com/pallets/flask.git \
128
+ --token-budget 2500 \
129
+ --baseline-token-budget 12000 \
130
+ --model qwen2.5:14b-instruct
131
+ ```
132
+
133
+ Render a functional call chain as text plus Mermaid:
134
+
135
+ ```bash
136
+ chronicle call-chain "Trace how ManagerAgent.run reaches retry logic" \
137
+ --repo /path/to/repo \
138
+ --token-budget 2200
139
+ ```
140
+
141
+ Use patch-aware contexting after local code changes:
142
+
143
+ ```bash
144
+ chronicle doctor \
145
+ --repo /path/to/repo \
146
+ --query "Enhance ManagerAgent.run to support retry and update impacted tests" \
147
+ --token-budget 2200
148
+ ```
149
+
150
+ Start a reusable Chronicle session for multi-turn memory:
151
+
152
+ ```bash
153
+ chronicle session-start --repo /path/to/repo
154
+ ```
155
+
156
+ Use session-aware contexting across turns:
157
+
158
+ ```bash
159
+ chronicle context "Where is ManagerAgent.run defined?" \
160
+ --repo /path/to/repo \
161
+ --token-budget 2200 \
162
+ --session-id session-abc123
163
+
164
+ chronicle context "How does ManagerAgent.run call retry logic?" \
165
+ --repo /path/to/repo \
166
+ --token-budget 2200 \
167
+ --session-id session-abc123
168
+ ```
169
+
170
+ Inspect recorded session memory:
171
+
172
+ ```bash
173
+ chronicle session-show --repo /path/to/repo --session-id session-abc123
174
+ ```
175
+
176
+ Create and use a shared multi-agent context bus:
177
+
178
+ ```bash
179
+ chronicle bus-start "Improve ManagerAgent.run flow" --repo /path/to/repo --bus-id feature-bus
180
+
181
+ chronicle bus-context "Plan ManagerAgent.run enhancement" \
182
+ --repo /path/to/repo \
183
+ --bus-id feature-bus \
184
+ --role planner \
185
+ --token-budget 2200
186
+
187
+ chronicle bus-handoff \
188
+ --repo /path/to/repo \
189
+ --bus-id feature-bus \
190
+ --from-role planner \
191
+ --to-role coder \
192
+ --reason "Context is grounded"
193
+
194
+ chronicle bus-show --repo /path/to/repo --bus-id feature-bus
195
+ ```
196
+
197
+ Python SDK:
198
+
199
+ ```python
200
+ from chronicle import Chronicle
201
+
202
+ chronicle = Chronicle(repo_path="./repo")
203
+ chronicle.index()
204
+
205
+ context = chronicle.context(
206
+ query="Where is auth token refresh handled?",
207
+ token_budget=3000,
208
+ )
209
+
210
+ print(context.to_markdown())
211
+ ```
212
+
213
+ SDK packet for your own LLM call:
214
+
215
+ ```python
216
+ from chronicle import Chronicle
217
+
218
+ chronicle = Chronicle(repo_path="./repo")
219
+ packet = chronicle.prepare_prompt_packet(
220
+ query="How should I refactor the retry path?",
221
+ token_budget=3000,
222
+ )
223
+
224
+ if packet.should_call_llm and packet.prompt:
225
+ prompt = packet.prompt
226
+ else:
227
+ prompt = packet.compressed_context
228
+ ```
229
+
230
+ The SDK packet gives you:
231
+
232
+ - `compressed_context` for the smallest grounded repo slice
233
+ - `response_policy` for output length and format control
234
+ - `should_call_llm` to block weak model calls
235
+ - `behavior boundaries` inside the context pack so LLMs can avoid misattributing nearby code
236
+
237
+ ## Retrieval architecture
238
+
239
+ Chronicle’s retrieval path is now deliberately quality-first for synthesis queries:
240
+
241
+ 1. **Intent and concept planning**
242
+ - classify the query (`locator`, `performance`, `dataflow`, `refactor`, etc.)
243
+ - extract stable query concepts, not just raw keywords
244
+
245
+ 2. **Deterministic ranking**
246
+ - score symbols using exact matches, normalized concept matches, file proximity, graph proximity, patch hints, and session memory
247
+
248
+ 3. **Coverage-aware diversification**
249
+ - avoid collapsing onto only one strong symbol
250
+ - preserve cross-concept coverage when the question spans multiple behaviors or layers
251
+
252
+ 4. **Ownership-aware enrichment**
253
+ - expand anchor classes into key methods
254
+ - surface helper evidence from selected execution paths
255
+ - add boundary notes that distinguish direct ownership from adjacent runtime wiring
256
+
257
+ 5. **Focused compression**
258
+ - keep fuller bodies for anchor surfaces
259
+ - use query-aware excerpts for long support methods so relevant branches survive the token cut
260
+
261
+ This means Chronicle is no longer just “top-k symbols under a budget.” It is trying to preserve the smallest grounded packet that still keeps behavior boundaries intact.
262
+
263
+ ## Comparing Chronicle vs baseline
264
+
265
+ When you run `chronicle ab-test` or the sample Ollama comparison, read the results in this order:
266
+
267
+ 1. `Winner summary`
268
+ 2. `Grounded` / `Both grounded`
269
+ 3. `Input token reduction`
270
+ 4. `Answer similarity`
271
+
272
+ High token reduction alone is not a quality win. Chronicle should only be treated as better when the answer stays on-task, grounded, and materially as useful as the baseline.
273
+ - `prompt` when Chronicle recommends a model call
274
+ - `selected_symbols` and `selected_files` for tracing and logging
275
+
276
+ Run the local SDK example against the Nudge repo with Ollama:
277
+
278
+ ```bash
279
+ PYTHONPATH=src python3 examples/sample_nudge_sdk_ollama.py \
280
+ --repo /Users/animeshdutta/Projects/Nudge_git/Nudge \
281
+ --model qwen2.5:14b-instruct
282
+ ```
283
+
284
+ Run the same example in comparison mode to print baseline vs Chronicle token usage and both model responses:
285
+
286
+ ```bash
287
+ PYTHONPATH=src python3 examples/sample_nudge_sdk_ollama.py \
288
+ --repo /Users/animeshdutta/Projects/Nudge_git/Nudge \
289
+ --model qwen2.5:14b-instruct \
290
+ --compare
291
+ ```
292
+
293
+ LangGraph-style integration:
294
+
295
+ ```python
296
+ from chronicle.integrations.langgraph_node import ChronicleContextNode
297
+
298
+ node = ChronicleContextNode(repo_path="./repo", token_budget=4000)
299
+ result = node({"query": "Trace checkout retries"})
300
+ ```
301
+
302
+ ## Hosted alpha deployment
303
+
304
+ Chronicle now includes a minimal FastAPI service for a Python-only hosted alpha.
305
+
306
+ For the full deployment runbook, see `chronicle/DEPLOYMENT.md:1`.
307
+ For the shortest founder shipping path, see `chronicle/LAUNCH_CHECKLIST.md:1`.
308
+
309
+ Available endpoints:
310
+
311
+ - `GET /health`
312
+ - `POST /index`
313
+ - `POST /doctor`
314
+ - `POST /demo`
315
+ - `POST /context`
316
+ - `POST /evaluate`
317
+ - `POST /call-chain`
318
+
319
+ Customers can use Chronicle either from the landing-page demo form or through direct API calls.
320
+ If you set `CHRONICLE_API_KEY`, hosted endpoints require the `X-API-Key` header.
321
+
322
+ ### Fastest free-host path
323
+
324
+ #### Render
325
+
326
+ - Push this repo to GitHub
327
+ - Create a new Web Service on Render
328
+ - Render can use `render.yaml` directly
329
+ - Build command: `pip install -e '.[hosted]'`
330
+ - Start command: `chronicle-api`
331
+ - Set environment variable: `CHRONICLE_API_KEY=replace-with-a-secret-key`
332
+
333
+ #### Railway
334
+
335
+ - Create a new project from the repo
336
+ - Use the same commands:
337
+ - build: `pip install -e '.[hosted]'`
338
+ - start: `chronicle-api`
339
+
340
+ #### Fly.io
341
+
342
+ - Use the included `Dockerfile`
343
+ - Then deploy with normal Fly Docker flow
344
+
345
+ ### Example request
346
+
347
+ ```bash
348
+ curl -X POST http://localhost:8000/doctor \
349
+ -H "Content-Type: application/json" \
350
+ -d '{
351
+ "repo_url": "https://github.com/pallets/flask.git",
352
+ "query": "Where is full_dispatch_request defined?",
353
+ "token_budget": 2500
354
+ }'
355
+ ```
356
+
357
+ ## Architecture
358
+
359
+ ```text
360
+ User Query
361
+
362
+ Query Planner
363
+
364
+ Repository Intelligence Layer
365
+ ├── AST Index
366
+ ├── Symbol Graph
367
+ ├── Dependency Graph
368
+ └── Git Evolution Map
369
+
370
+ Retrieval Orchestrator
371
+
372
+ Context Compression Engine
373
+
374
+ Token Budget Manager
375
+
376
+ LLM Router
377
+
378
+ Output Validator
379
+
380
+ Evaluation Layer
381
+ ```
382
+
383
+ ## Notes
384
+
385
+ - Chronicle is local-first and does not send repository code anywhere by default.
386
+ - Index artifacts are stored in `.chronicle/index.sqlite3` and mirrored to `.chronicle/index.json` unless `index_dir` is overridden.
387
+ - Session memory is stored locally in `.chronicle/sessions.sqlite3`.
388
+ - Remote repos cloned via `--repo-url` are stored in `.chronicle/repos/` by default.
389
+ - For best MVP retrieval, prefer exact symbol queries like `Where is RequestContext defined?` over broad semantic questions.
390
+ - CLI output is wrapped in a stable envelope with `status`, `command`, `generated_at`, and `data`.
391
+ - Compact CLI output is the default; use `--view full` when you want the full machine/audit payload.
392
+ - `evaluate` now includes `benchmark_confidence` and `recommendation` so token-savings reports are easier to trust.
393
+ - Session-aware `context`, `doctor`, `demo`, `evaluate`, and `ab-test` calls can reuse prior retrieved symbols, files, and validated facts.
394
+ - Flow, trace, and edit-style queries can include a compact functional call chain in the grounded context before an LLM call.
395
+ - Edit and enhancement queries can automatically include context priorities, a coverage checklist, patch-aware summaries, and an LLM task brief so the model sees changed symbols, related tests, interfaces, and likely flow with fewer tokens.
396
+ - If retrieval confidence is weak, Chronicle can recommend skipping the LLM call instead of paying for speculation.
397
+ - If an LLM answer is weak or ungrounded, Chronicle can run a grounded repair pass using the same validated context before returning the result.
398
+ - Multi-agent workflows can persist shared grounded context, deterministic handoffs, and per-phase validation in `.chronicle/agent_bus.sqlite3`.