crewlore 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: crewlore
3
+ Version: 0.1.0
4
+ Summary: An open, local-first, harness-agnostic compiler that turns AI-coding-agent sessions into a versioned, plaintext team tribal-knowledge layer in your own git repo.
5
+ Author: crewlore Contributors
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 crewlore Contributors
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ License-File: LICENSE
28
+ Keywords: agents,ai,cli,compiler,knowledge,local-first,mcp,tribal-knowledge
29
+ Classifier: Development Status :: 3 - Alpha
30
+ Classifier: Environment :: Console
31
+ Classifier: Intended Audience :: Developers
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.10
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Programming Language :: Python :: 3.13
38
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
39
+ Requires-Python: >=3.10
40
+ Requires-Dist: anthropic>=0.39
41
+ Requires-Dist: pydantic<3.0,>=2.0
42
+ Requires-Dist: pyyaml<7.0,>=6.0
43
+ Requires-Dist: rich<16.0,>=13.0
44
+ Requires-Dist: typer<1.0,>=0.12
45
+ Provides-Extra: dev
46
+ Requires-Dist: openai>=1.0; extra == 'dev'
47
+ Requires-Dist: pytest-cov; extra == 'dev'
48
+ Requires-Dist: pytest>=8.0; extra == 'dev'
49
+ Requires-Dist: ruff; extra == 'dev'
50
+ Provides-Extra: openai
51
+ Requires-Dist: openai>=1.0; extra == 'openai'
52
+ Provides-Extra: serve
53
+ Requires-Dist: mcp>=1.0; extra == 'serve'
54
+ Description-Content-Type: text/markdown
55
+
56
+ # crewlore
57
+
58
+ [![CI](https://github.com/srijansk/crewlore/actions/workflows/ci.yml/badge.svg)](https://github.com/srijansk/crewlore/actions/workflows/ci.yml)
59
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
60
+ [![fidelity 100%](https://img.shields.io/badge/fidelity-100%25-success.svg)](docs/examples/pydantic-ai/)
61
+ [![claims compiled 18](https://img.shields.io/badge/claims_compiled-18-informational.svg)](docs/examples/pydantic-ai/)
62
+
63
+ > **Your coding agents keep relearning what your team already figured out.**
64
+ > `crewlore` compiles agent sessions into a citable, plaintext team-knowledge layer that lives in your git repo. Local-first.
65
+
66
+ <p align="center">
67
+ <img src="docs/assets/demo.gif" alt="crewlore in action — sessions compiled into a citable team knowledge book" />
68
+ </p>
69
+
70
+ ```bash
71
+ pipx install git+https://github.com/srijansk/crewlore.git
72
+ ```
73
+
74
+ > **Validated on [`pydantic/pydantic-ai`](https://github.com/pydantic/pydantic-ai)** (17.3k ⭐) · 3 sessions · 18 claims · 100% fidelity · [see receipts →](docs/examples/pydantic-ai/)
75
+
76
+ ## Quickstart
77
+
78
+ ```bash
79
+ cd my-repo
80
+ lore init # create .lore/ in your repo
81
+ lore watch # automatic: read agent transcripts, scrub secrets,
82
+ # compile to claims, prune — on an interval
83
+ lore query "billing webhook" # ask the knowledge layer anything, anytime
84
+ ```
85
+
86
+ That's it — engineers keep working in whatever agent they use; `lore` keeps the knowledge layer fresh in the background. Commit `.lore/knowledge` and `.lore/claims` and your teammates inherit it on the next `git pull`.
87
+
88
+ <details>
89
+ <summary>Trouble installing?</summary>
90
+
91
+ If `pipx` fails with `Broken Python installation, platform.mac_ver() returned an empty value`, your default Python is a broken install (sometimes seen with very recent Homebrew Python 3.14 builds). Pin a known-working interpreter:
92
+
93
+ ```bash
94
+ pipx install --python python3.13 git+https://github.com/srijansk/crewlore.git
95
+ ```
96
+
97
+ To make pipx default to Python 3.13 going forward: `export PIPX_DEFAULT_PYTHON=$(which python3.13)`.
98
+
99
+ </details>
100
+
101
+ ### Try it in 30 seconds — no API key
102
+
103
+ ```bash
104
+ git clone https://github.com/srijansk/crewlore.git
105
+ cd crewlore && uv run python scripts/demo.py
106
+ ```
107
+
108
+ The demo runs the full loop on bundled public-safe sessions and prints what it found:
109
+
110
+ > [!NOTE]
111
+ > **Fidelity — 100%.** Every claim's citation resolves verbatim back to its source.
112
+ > **Conflicts surfaced — 1.** A real disagreement kept with both provenances, not silently merged.
113
+ > **Preventable rediscovery — 2 of 3.** Two of the three held-out follow-up sessions re-derived knowledge the layer already had. (Illustrative demo data — n=3, not a benchmark.)
114
+
115
+ ## See it run on a real codebase: pydantic-ai (17.3k ⭐)
116
+
117
+ [`docs/examples/pydantic-ai/`](docs/examples/pydantic-ai/) is a committed snapshot of `crewlore` compiled on the public [`pydantic/pydantic-ai`](https://github.com/pydantic/pydantic-ai) repo — 3 Claude Code sessions on real issues, no synthetic data.
118
+
119
+ - **18 claims** compiled across 9 scope groupings (UI adapters, decorator introspection, durable-execution threat modeling, toolsets, tests, version policy)
120
+ - **100% fidelity** under the explicit [canonical-form contract](docs/anchors.md) — every anchor's quote canonically resolves to a substring of its source session. (Fidelity certifies the *citation* is real, not that the model's *statement* is fully entailed by it — that's what human/PR review of the book is for.)
121
+ - **0 conflicts** because the three sessions covered disjoint scopes — the conflict detector wasn't given anything to flag
122
+ - **Receipts:** the rendered [`book.md`](docs/examples/pydantic-ai/book.md), the raw [`claims.jsonl`](docs/examples/pydantic-ai/claims.jsonl), and full [`provenance.md`](docs/examples/pydantic-ai/provenance.md) (session ids, commit hashes, compile cost, scrub redactions, five real-data bugs the capture surfaced and we fixed before publishing)
123
+
124
+ ## What you get
125
+
126
+ Raw, messy sessions go in. Out comes a structured, citable **compiled claim** — every one carrying its kind, its scope, the action it implies for future work, and a verbatim **anchor** back to the moment it was discovered:
127
+
128
+ > **`[gotcha]`** · *services/billing*
129
+ >
130
+ > Billing webhook handler lacks an idempotency check, causing duplicate charges when Stripe retries webhooks.
131
+ >
132
+ > **Do** — dedupe on the Stripe idempotency key before processing.
133
+ >
134
+ > > *anchor* — "the handler has no idempotency check, so when Stripe retries a webhook the charge is processed again."
135
+
136
+ A human can verify it (the anchor points back to the exact session line); an agent can trust it (the citation is real, not hallucinated). Claims roll up into a knowledge book at `.lore/knowledge/README.md`, grouped by area and committed to your repo alongside your code:
137
+
138
+ ```markdown
139
+ # Team knowledge (compiled by crewlore)
140
+
141
+ ## services/billing
142
+
143
+ - **[gotcha]** Billing webhook handler lacks an idempotency check; dedupe on the Stripe key.
144
+ - *Do:* Dedupe on the Stripe idempotency key before processing.
145
+ - _anchor_ `ses_1#1`: "the handler has no idempotency check, so when Stripe retries a webhook the charge is processed again."
146
+
147
+ ## deployment
148
+
149
+ - **[procedure]** Run migrations before deploy to prevent missing columns.
150
+ - *Do:* Run `make migrate` before every deploy.
151
+ ```
152
+
153
+ ## How it works
154
+
155
+ ```mermaid
156
+ flowchart LR
157
+ S["coding agent<br/>sessions"] --> I["ingest + scrub<br/>(transcripts → NSF,<br/>secrets redacted)"]
158
+ I --> C["compile<br/>(NSF → claims,<br/>verbatim anchors)"]
159
+ C --> R["<b>.lore/</b> in your repo<br/>(knowledge book + claims,<br/>plaintext, git-versioned)"]
160
+ R --> SV["serve<br/>(files + MCP query)"]
161
+ SV --> N["next agent session<br/>inherits the knowledge"]
162
+
163
+ SV -. "usage signal" .-> AL["actuation loop<br/>(decay · reinforce · retire)"]
164
+ AL -. "lifecycle update" .-> R
165
+
166
+ classDef engine fill:#4a5d9e,stroke:#1a2c4d,color:#fff,stroke-width:2px
167
+ classDef artifact fill:#2d6a4f,stroke:#1b4332,color:#fff,stroke-width:2px
168
+ class C engine
169
+ class R artifact
170
+ ```
171
+
172
+ > `lore watch` runs ingest → compile → prune automatically, on an interval.
173
+
174
+ - **Ingest + scrub** — reads the coding agent's existing on-disk transcripts and redacts a curated set of secret patterns (Anthropic / OpenAI / generic `sk-*` API keys, AWS keys, GitHub classic + fine-grained PATs, Google API keys, Slack tokens, HuggingFace tokens, JWTs, connection-string passwords, private-key blocks, and `password=…` assignment shapes) *before* anything is stored or sent to a model. The pattern set is documented in [`docs/scrub.md`](docs/scrub.md).
175
+ - **Compile** — extracts atomic claims, deduplicates them, records disagreements instead of silently overwriting, scores authority by how often a claim recurs, and drops any claim whose citation doesn't resolve verbatim.
176
+ - **Serve** — writes a human- and agent-readable knowledge book to `.lore/knowledge/`, and exposes a query tool (including an optional MCP server) so any agent can pull the relevant slice on demand.
177
+ - **Actuation loop** — every retrieval is recorded, and that usage drives a lifecycle: unused claims decay and archive, contradicted claims are retired, useful claims are reinforced. The store stays small and fresh instead of growing into a pile nobody reads.
178
+
179
+ The intelligence is in **compile**; ingest and serve are deliberately thin, so supporting another coding agent is a small adapter, not a rewrite. To be precise about the word "compile": extraction is an LLM step (the only non-deterministic part), wrapped in deterministic stages — verbatim-anchor verification, content-addressed dedup, conflict recording, and authority scoring. "Compile" means the repeatable session → claims transform, not that an LLM is absent.
180
+
181
+ ## How it differs
182
+
183
+ - **vs. hosted memory (Letta, mem0)** — their store lives in someone else's cloud and you can't `git log` it; `crewlore`'s lives in your repo as plaintext.
184
+ - **vs. per-IDE memory (Cursor rules, Claude memory, Continue, Cody)** — tied to one developer, one IDE; `crewlore` is a *team* artifact, committed and reviewed like code.
185
+ - **vs. hand-curated `CLAUDE.md` / `.cursorrules`** — humans write those by hand and they go stale; `crewlore` compiles + reinforces from real sessions and retires what stops being used.
186
+ - **vs. RAG over a vector DB** — RAG retrieves *document chunks*; `crewlore` compiles atomic, citable *claims* with verbatim anchors, so a human or agent can verify the cited source in seconds. (Retrieval today is deterministic lexical overlap, not embeddings — simpler and dependency-free; semantic ranking is on the roadmap.)
187
+
188
+ ## Why this exists
189
+
190
+ Knowledge discovered inside an agent session is private by default and lost by default. It lives in one developer's transcript, so the next engineer — and every future agent run — re-reads the same files, re-learns the same gotcha, and re-makes a decision the team already made. There's no shared layer that both humans and agents read from, so decisions drift and bugs resurface.
191
+
192
+ `crewlore` makes that knowledge a first-class, versioned artifact in the place your team already trusts: your git repo.
193
+
194
+ **What it is:** a compiler that turns sessions into accurate, deduplicated, conflict-aware, provenance-carrying team knowledge, served back to any agent.
195
+
196
+ **What it isn't:** a hosted service, a vector database, or a personal-memory layer for a single IDE. There's no account, no cloud, and no proprietary store — the compiled knowledge is plaintext you own.
197
+
198
+ ## Your data stays yours
199
+
200
+ - **Local-first.** Capture, compile, and serve all run on infrastructure you control. Point the compiler at your own model provider or a local OpenAI-compatible model (Ollama, LM Studio, vLLM) via `provider: local` — nothing routes through any `crewlore`-operated service, because there is none.
201
+ - **Plaintext, in your repo.** The knowledge layer is human-readable Markdown and JSONL under `.lore/`, versioned by git. `git log .lore/` is your audit trail.
202
+ - **Secrets never travel.** Scrubbing — of both message content and tool-call arguments — happens at ingest, before storage or any model call. It's a high-precision pattern set (a floor, not a DLP guarantee; see [`docs/scrub.md`](docs/scrub.md)), and raw session captures are git-ignored by default regardless.
203
+
204
+ ## CLI
205
+
206
+ | Command | What it does |
207
+ |---|---|
208
+ | `lore init` | Create the `.lore/` layout in your repo. |
209
+ | `lore watch` | Automatically ingest → compile → prune on an interval (`--once` for cron/CI). |
210
+ | `lore compile` | Run a single ingest-and-compile pass manually. |
211
+ | `lore query "<task>"` | Retrieve the claims most relevant to a task (records usage). |
212
+ | `lore status` | Show claim/conflict counts and how much of the layer is actually being used. |
213
+ | `lore serve --mcp` | Start an MCP server exposing query-time retrieval to any MCP-speaking agent (Claude Desktop, Cursor, …). Requires `pip install 'crewlore[serve]'`. See [`docs/mcp.md`](docs/mcp.md) for wiring snippets. |
214
+
215
+ ## Configuration
216
+
217
+ `.lore/config.yaml`:
218
+
219
+ ```yaml
220
+ model:
221
+ provider: anthropic # anthropic | openai | local
222
+ name: claude-sonnet-4-6
223
+ # For provider: local — point at any OpenAI-compatible endpoint you run:
224
+ # base_url: http://localhost:11434/v1 # e.g. Ollama, LM Studio, vLLM
225
+ capture:
226
+ transcripts: ~/.claude/projects
227
+ compile:
228
+ cadence: auto # `lore watch` interval below
229
+ watch_interval_seconds: 300
230
+ ```
231
+
232
+ Bring your own key (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`); `crewlore` never ships keys anywhere. The default Anthropic provider works out of the box. For OpenAI or a local OpenAI-compatible model, add the SDK: `pipx inject crewlore openai` (or `pip install 'crewlore[openai]'`). With `provider: local` nothing leaves your machine at all — the compile call hits your own endpoint.
233
+
234
+ ## Roadmap & limitations
235
+
236
+ > [!NOTE]
237
+ > **Status: alpha.** The core is stable and tested end to end. The on-disk schema may change before 1.0 — and because everything is plaintext and git-versioned, breaking format changes will ship with migrations.
238
+
239
+ - **Stable today:** capture, secret scrubbing, the compile pipeline, retrieval, the actuation loop, and the `.lore/` plaintext format.
240
+ - **In flight:** cross-session conflict alignment — real disagreements are surfaced today, but reliably aligning claims about the same question across independently-compiled sessions is an active area of work.
241
+ - **Planned:** an explicit human approve-before-serve gate (secret scrubbing is already automated), more capture adapters beyond Claude Code, and a real-time capture hook.
242
+
243
+ ## Contributing
244
+
245
+ Issues, discussions, and PRs welcome. New here? Start a [discussion](https://github.com/srijansk/crewlore/discussions) — adding a capture adapter for another coding agent is the most valuable first contribution and is intentionally small. See [CONTRIBUTING.md](CONTRIBUTING.md) for local setup and the dev loop.
246
+
247
+ Tests are fully deterministic — no real API calls during `pytest`.
248
+
249
+ ## License
250
+
251
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,25 @@
1
+ lore/__init__.py,sha256=hXh2s7JVltYCqqA4V434frRg8_TCCwJXL2cmf0GKiA0,116
2
+ lore/actuation.py,sha256=zISvBTvkDWA90zA-8Or0-XJBtfBSavHW7En93dtXfFg,2092
3
+ lore/cli.py,sha256=JwLwmTzkbGnpPKZCtdMm6GOzfulBqDWuJ6W405ctgkQ,6381
4
+ lore/replay.py,sha256=DbkHqp-qy7ittIZjSQjfwcTvFlt_RKMwy1prl0p1Tbk,2343
5
+ lore/schemas.py,sha256=uUqPIuyb9p2Aeyc_Qr9wv1IigS_t21-3idBb1wf_2WY,4472
6
+ lore/scrub.py,sha256=cEEcC2R3km0QYZrn4qZHf96hAK6M75LAJb7YjfoN8PA,5164
7
+ lore/store.py,sha256=vqnk4HlEAajrL1rRM0Nhru0qq1x-mz8PXVglv4FoxD0,6426
8
+ lore/capture/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ lore/capture/ingest.py,sha256=rYOqOeDMFI6t6t6_gPHW3iBhGMUE2EpWjeGfci4oj48,1580
10
+ lore/capture/signals.py,sha256=aH4Z1D7tkN25MgeRCaeiKU6yFVDlFG9O-S661C63qtA,2545
11
+ lore/capture/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ lore/capture/adapters/claude_code.py,sha256=sIe7QISaFC3VODOnVv5JQcajZCK1ZlHd55bmIHXlp1o,4856
13
+ lore/compile/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ lore/compile/extractor.py,sha256=2d4f6eanYqhUCnZpZ0zebFtqEyyoyNfJRT7nstfu664,7644
15
+ lore/compile/llm.py,sha256=lILzulwGi66C1Tp8G2_zflReOARROF1l5X_wwFXEMW4,4539
16
+ lore/compile/pipeline.py,sha256=_MPlJ1aesoDpc8XfEAJHhVvDKmrg5ST9vNZh_kQNE2o,5291
17
+ lore/compile/run.py,sha256=QC5yLDNNE_pmn1EJVvmTZaVmaUnjSYTvwUJrBGXzDIM,3643
18
+ lore/serve/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ lore/serve/mcp_server.py,sha256=YL5Tj1RDvZkeY17N-Ke-Vi31sPdg72_grSL2f4LZMHY,1210
20
+ lore/serve/server.py,sha256=A-l0vX0lyzSKgz30YVM1j7X3kzCe5BLN_0lEkpnZL2E,4453
21
+ crewlore-0.1.0.dist-info/METADATA,sha256=tJiPpabgjFe0_W8BBSCBMmE3bR1TqSy-SO-Ctm0ODBg,15788
22
+ crewlore-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
23
+ crewlore-0.1.0.dist-info/entry_points.txt,sha256=dr_gXSDsmfXndWIangQItQuuwPbyRRYNGCZGCyNZ5h0,38
24
+ crewlore-0.1.0.dist-info/licenses/LICENSE,sha256=4zTEbmIxkckIfU69ENjzoqoniXkg91RU8KdbpeldYu0,1078
25
+ crewlore-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ lore = lore.cli:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 crewlore Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
lore/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """crewlore: a local-first compiler for team tribal knowledge from coding-agent sessions."""
2
+
3
+ __version__ = "0.1.0"
lore/actuation.py ADDED
@@ -0,0 +1,64 @@
1
+ """The actuation loop's lifecycle (C0 lever 2).
2
+
3
+ A knowledge layer's value is an actuation property, not a storage property. Left
4
+ alone, a compiled store grows monotonically and rots into a dumpyard. Usage —
5
+ recorded by the serve layer — drives a homeostatic lifecycle so the *active* set
6
+ plateaus and churns:
7
+
8
+ - never-served claims past a staleness window decay to `archived`;
9
+ - claims overridden in real use (wrong/stale) are retired;
10
+ - claims that proved influential are reinforced (authority up).
11
+
12
+ Run this periodically (e.g. after each compile, or on a cron) over the store.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from datetime import datetime, timedelta, timezone
18
+
19
+ from lore.schemas import Claim
20
+
21
+ _REINFORCE_PER_INFLUENCE = 0.1
22
+
23
+
24
+ def _as_utc(dt: datetime) -> datetime:
25
+ """Coerce a possibly-naive datetime to UTC so aware/naive subtraction can't crash."""
26
+ return dt if dt.tzinfo is not None else dt.replace(tzinfo=timezone.utc)
27
+
28
+
29
+ def apply_lifecycle(
30
+ claims: list[Claim],
31
+ *,
32
+ now: datetime,
33
+ max_unused_age: timedelta,
34
+ override_threshold: int = 2,
35
+ ) -> list[Claim]:
36
+ out: list[Claim] = []
37
+ for c in claims:
38
+ out.append(_step(c, now, max_unused_age, override_threshold))
39
+ return out
40
+
41
+
42
+ def _step(c: Claim, now: datetime, max_unused_age: timedelta, override_threshold: int) -> Claim:
43
+ if c.status != "active":
44
+ return c
45
+
46
+ u = c.usage
47
+ # Retired by real-use contradiction.
48
+ if u.times_overridden >= override_threshold and u.times_overridden > u.times_influential:
49
+ return c.model_copy(update={"status": "archived"})
50
+
51
+ # Never used and stale -> decay out of the active set.
52
+ if (
53
+ u.times_served == 0
54
+ and c.observed_at is not None
55
+ and (_as_utc(now) - _as_utc(c.observed_at)) > max_unused_age
56
+ ):
57
+ return c.model_copy(update={"status": "archived"})
58
+
59
+ # Used and valued -> reinforce.
60
+ if u.times_influential > 0:
61
+ boosted = min(1.0, c.authority + _REINFORCE_PER_INFLUENCE * u.times_influential)
62
+ return c.model_copy(update={"authority": boosted})
63
+
64
+ return c
File without changes
File without changes
@@ -0,0 +1,128 @@
1
+ """Claude Code capture adapter.
2
+
3
+ Maps Claude Code's per-session transcript records into the Normalized Session
4
+ Format (NSF). Capture is deliberately thin: it normalizes faithfully and knows
5
+ nothing about compile/serve. Adding another harness is a sibling module with the
6
+ same shape — that is what makes `lore` harness-neutral by construction.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from datetime import datetime, timezone
12
+ from pathlib import Path
13
+
14
+ from lore.schemas import NSFEvent
15
+
16
+ MANIFEST = {
17
+ "harness": "claude-code",
18
+ # Where Claude Code keeps per-project session transcripts.
19
+ "log_location": "~/.claude/projects/",
20
+ "session_hook": "PostToolUse/Stop hooks write transcript JSONL (default)",
21
+ }
22
+
23
+
24
+ def _parse_ts(raw: str | None) -> datetime:
25
+ """Always return a timezone-aware UTC datetime.
26
+
27
+ A transcript record may omit `timestamp` (older/edited/third-party files), and
28
+ a present timestamp may lack a zone. Both must yield an aware datetime, or the
29
+ actuation lifecycle (which subtracts `now` in UTC) crashes with a naive-vs-aware
30
+ TypeError downstream.
31
+ """
32
+ if not raw:
33
+ return datetime.fromtimestamp(0, tz=timezone.utc)
34
+ dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
35
+ return dt if dt.tzinfo is not None else dt.replace(tzinfo=timezone.utc)
36
+
37
+
38
+ class ClaudeCodeAdapter:
39
+ name = "claude-code"
40
+ manifest = MANIFEST
41
+
42
+ def parse_records(self, records, session: str | None = None) -> list[NSFEvent]:
43
+ events: list[NSFEvent] = []
44
+ for rec in records:
45
+ events.extend(self._record_to_events(rec, session))
46
+ return events
47
+
48
+ def parse_transcript(self, path: Path | str, session: str | None = None) -> list[NSFEvent]:
49
+ import json
50
+
51
+ text = Path(path).read_text()
52
+ records = [json.loads(ln) for ln in text.splitlines() if ln.strip()]
53
+ return self.parse_records(records, session=session)
54
+
55
+ # --- internals ---
56
+ def _record_to_events(self, rec: dict, session: str | None) -> list[NSFEvent]:
57
+ rtype = rec.get("type")
58
+ sid = session or rec.get("sessionId") or "unknown"
59
+ ts = _parse_ts(rec.get("timestamp"))
60
+ message = rec.get("message") or {}
61
+ content = message.get("content")
62
+
63
+ if rtype == "user":
64
+ return self._user_events(content, sid, ts)
65
+ if rtype == "assistant":
66
+ return self._assistant_events(content, sid, ts)
67
+ # summaries, system/meta lines, anything else: not session knowledge.
68
+ return []
69
+
70
+ def _user_events(self, content, sid: str, ts: datetime) -> list[NSFEvent]:
71
+ # A user turn is either a plain string (a real message) or a list of
72
+ # blocks (tool results fed back to the model).
73
+ if isinstance(content, str):
74
+ return [
75
+ NSFEvent(
76
+ session=sid, actor="user", kind="user_message", timestamp=ts, content=content
77
+ )
78
+ ]
79
+ events: list[NSFEvent] = []
80
+ for block in content or []:
81
+ if block.get("type") == "tool_result":
82
+ events.append(
83
+ NSFEvent(
84
+ session=sid,
85
+ actor="system",
86
+ kind="tool_result",
87
+ timestamp=ts,
88
+ content=_stringify(block.get("content", "")),
89
+ meta={"tool_use_id": block.get("tool_use_id")},
90
+ )
91
+ )
92
+ return events
93
+
94
+ def _assistant_events(self, content, sid: str, ts: datetime) -> list[NSFEvent]:
95
+ events: list[NSFEvent] = []
96
+ if isinstance(content, str):
97
+ return [
98
+ NSFEvent(
99
+ session=sid, actor="agent", kind="agent_message", timestamp=ts, content=content
100
+ )
101
+ ]
102
+ for block in content or []:
103
+ btype = block.get("type")
104
+ if btype == "text":
105
+ events.append(
106
+ NSFEvent(
107
+ session=sid, actor="agent", kind="agent_message", timestamp=ts,
108
+ content=block.get("text", ""),
109
+ )
110
+ )
111
+ elif btype == "tool_use":
112
+ events.append(
113
+ NSFEvent(
114
+ session=sid, actor="agent", kind="tool_call", timestamp=ts,
115
+ content=block.get("name", ""),
116
+ meta={"input": block.get("input", {}), "tool_use_id": block.get("id")},
117
+ )
118
+ )
119
+ return events
120
+
121
+
122
+ def _stringify(content) -> str:
123
+ if isinstance(content, str):
124
+ return content
125
+ if isinstance(content, list):
126
+ parts = [b.get("text", "") if isinstance(b, dict) else str(b) for b in content]
127
+ return "\n".join(p for p in parts if p)
128
+ return str(content)
lore/capture/ingest.py ADDED
@@ -0,0 +1,47 @@
1
+ """Capture-from-transcripts.
2
+
3
+ Coding agents already write session transcripts to disk, so for MVP `lore` does
4
+ not need a live hook: it reads the existing transcripts, scrubs secrets, and
5
+ stores them as NSF. Ingestion is incremental (skips already-captured sessions),
6
+ so it is safe to run on a schedule (`lore watch` / cron) — which is what makes
7
+ compilation automatic rather than a chore.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+ from lore.scrub import scrub_events
15
+ from lore.store import LoreStore
16
+
17
+
18
+ def discover_transcripts(root: Path | str) -> list[Path]:
19
+ root = Path(root)
20
+ if not root.exists():
21
+ return []
22
+ return sorted(root.rglob("*.jsonl"))
23
+
24
+
25
+ def ingest_transcripts(
26
+ store: LoreStore, adapter, transcript_dir: Path | str, scrub: bool = True
27
+ ) -> dict:
28
+ discovered = discover_transcripts(transcript_dir)
29
+ existing = set(store.list_sessions())
30
+ ingested = 0
31
+ redactions = 0
32
+ for path in discovered:
33
+ session_id = path.stem
34
+ if session_id in existing:
35
+ continue # incremental: never re-ingest a captured session
36
+ try:
37
+ events = adapter.parse_transcript(path, session=session_id)
38
+ except Exception:
39
+ continue # skip unparseable/corrupt transcript files
40
+ if not events:
41
+ continue
42
+ if scrub:
43
+ events, n = scrub_events(events)
44
+ redactions += n
45
+ store.write_session(session_id, events)
46
+ ingested += 1
47
+ return {"discovered": len(discovered), "ingested": ingested, "redactions": redactions}
@@ -0,0 +1,56 @@
1
+ """Selective ingestion (C0 lever 3).
2
+
3
+ The dumpyard starts at the input: compiling every session yields mostly noise.
4
+ We gate compilation on *friction and resolution* signals — corrections,
5
+ tool errors, deliberated decisions, gotchas — rather than compiling all sessions.
6
+
7
+ This is a deliberately simple, recall-favoring keyword heuristic for v0.1: when in
8
+ doubt it includes the session, because the actuation loop (usage-driven decay)
9
+ prunes anything that turns out to be unused. The lab's own research warns that
10
+ keyword guidance is a weak lever, so this gate is intentionally only a coarse
11
+ trivial-session filter, not a quality judge.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from lore.schemas import NSFEvent
17
+
18
+ _CORRECTION = ("no,", "no.", "not right", "that's wrong", "that is wrong", "actually",
19
+ "instead", "don't", "do not", "revert", "undo")
20
+ _ERROR = ("error", "failed", "failure", "exception", "traceback", "no such",
21
+ "denied", "not found", "cannot", "can't")
22
+ _DECISION = ("let's go with", "lets go with", "we decided", "we chose", "decided to",
23
+ "go with", "should we use", "we'll use", "we will use")
24
+ _GOTCHA = ("careful", "watch out", "gotcha", "fires twice", "double-fire", "race condition",
25
+ "edge case", "footgun", "subtle", "beware")
26
+ # Procedures and team norms carry no friction words but are core tribal knowledge;
27
+ # without these the gate silently drops "how we do X" / "the rule is Y" sessions.
28
+ _CONVENTION = ("always", "never", "must", "convention", "policy", "the rule is", "team rule",
29
+ "going forward", "make sure to", "we write", "we always", "we never",
30
+ "reviewers reject", "by convention", "standard practice")
31
+
32
+ _MARKERS = {
33
+ "correction": _CORRECTION,
34
+ "error": _ERROR,
35
+ "decision": _DECISION,
36
+ "gotcha": _GOTCHA,
37
+ "convention": _CONVENTION,
38
+ }
39
+
40
+
41
+ def signal_reasons(events: list[NSFEvent]) -> list[str]:
42
+ """Return the distinct signal categories present in a session, in fixed order."""
43
+ found: set[str] = set()
44
+ for ev in events:
45
+ text = ev.content.lower()
46
+ for reason, markers in _MARKERS.items():
47
+ if any(m in text for m in markers):
48
+ # Tool errors only count from tool output, not casual mentions.
49
+ if reason == "error" and ev.kind != "tool_result":
50
+ continue
51
+ found.add(reason)
52
+ return [r for r in _MARKERS if r in found]
53
+
54
+
55
+ def session_has_signal(events: list[NSFEvent]) -> bool:
56
+ return bool(signal_reasons(events))