PyPI - crewlore - Versions diffs - 0.1.0__py3-none-any.whl - Mend

crewlore 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

crewlore-0.1.0.dist-info/METADATA +251 -0
crewlore-0.1.0.dist-info/RECORD +25 -0
crewlore-0.1.0.dist-info/WHEEL +4 -0
crewlore-0.1.0.dist-info/entry_points.txt +2 -0
crewlore-0.1.0.dist-info/licenses/LICENSE +21 -0
lore/__init__.py +3 -0
lore/actuation.py +64 -0
lore/capture/__init__.py +0 -0
lore/capture/adapters/__init__.py +0 -0
lore/capture/adapters/claude_code.py +128 -0
lore/capture/ingest.py +47 -0
lore/capture/signals.py +56 -0
lore/cli.py +184 -0
lore/compile/__init__.py +0 -0
lore/compile/extractor.py +196 -0
lore/compile/llm.py +109 -0
lore/compile/pipeline.py +145 -0
lore/compile/run.py +84 -0
lore/replay.py +63 -0
lore/schemas.py +129 -0
lore/scrub.py +117 -0
lore/serve/__init__.py +0 -0
lore/serve/mcp_server.py +39 -0
lore/serve/server.py +119 -0
lore/store.py +161 -0

crewlore-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,251 @@
+Metadata-Version: 2.4
+Name: crewlore
+Version: 0.1.0
+Summary: An open, local-first, harness-agnostic compiler that turns AI-coding-agent sessions into a versioned, plaintext team tribal-knowledge layer in your own git repo.
+Author: crewlore Contributors
+License: MIT License
+        Copyright (c) 2026 crewlore Contributors
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Keywords: agents,ai,cli,compiler,knowledge,local-first,mcp,tribal-knowledge
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
+Requires-Python: >=3.10
+Requires-Dist: anthropic>=0.39
+Requires-Dist: pydantic<3.0,>=2.0
+Requires-Dist: pyyaml<7.0,>=6.0
+Requires-Dist: rich<16.0,>=13.0
+Requires-Dist: typer<1.0,>=0.12
+Provides-Extra: dev
+Requires-Dist: openai>=1.0; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: ruff; extra == 'dev'
+Provides-Extra: openai
+Requires-Dist: openai>=1.0; extra == 'openai'
+Provides-Extra: serve
+Requires-Dist: mcp>=1.0; extra == 'serve'
+Description-Content-Type: text/markdown
+# crewlore
+[![CI](https://github.com/srijansk/crewlore/actions/workflows/ci.yml/badge.svg)](https://github.com/srijansk/crewlore/actions/workflows/ci.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![fidelity 100%](https://img.shields.io/badge/fidelity-100%25-success.svg)](docs/examples/pydantic-ai/)
+[![claims compiled 18](https://img.shields.io/badge/claims_compiled-18-informational.svg)](docs/examples/pydantic-ai/)
+> **Your coding agents keep relearning what your team already figured out.**
+> `crewlore` compiles agent sessions into a citable, plaintext team-knowledge layer that lives in your git repo. Local-first.
+<p align="center">
+  <img src="docs/assets/demo.gif" alt="crewlore in action — sessions compiled into a citable team knowledge book" />
+</p>
+```bash
+pipx install git+https://github.com/srijansk/crewlore.git
+```
+> **Validated on [`pydantic/pydantic-ai`](https://github.com/pydantic/pydantic-ai)** (17.3k ⭐) · 3 sessions · 18 claims · 100% fidelity · [see receipts →](docs/examples/pydantic-ai/)
+## Quickstart
+```bash
+cd my-repo
+lore init                      # create .lore/ in your repo
+lore watch                     # automatic: read agent transcripts, scrub secrets,
+                               #   compile to claims, prune — on an interval
+lore query "billing webhook"   # ask the knowledge layer anything, anytime
+```
+That's it — engineers keep working in whatever agent they use; `lore` keeps the knowledge layer fresh in the background. Commit `.lore/knowledge` and `.lore/claims` and your teammates inherit it on the next `git pull`.
+<details>
+<summary>Trouble installing?</summary>
+If `pipx` fails with `Broken Python installation, platform.mac_ver() returned an empty value`, your default Python is a broken install (sometimes seen with very recent Homebrew Python 3.14 builds). Pin a known-working interpreter:
+```bash
+pipx install --python python3.13 git+https://github.com/srijansk/crewlore.git
+```
+To make pipx default to Python 3.13 going forward: `export PIPX_DEFAULT_PYTHON=$(which python3.13)`.
+</details>
+### Try it in 30 seconds — no API key
+```bash
+git clone https://github.com/srijansk/crewlore.git
+cd crewlore && uv run python scripts/demo.py
+```
+The demo runs the full loop on bundled public-safe sessions and prints what it found:
+> [!NOTE]
+> **Fidelity — 100%.** Every claim's citation resolves verbatim back to its source.
+> **Conflicts surfaced — 1.** A real disagreement kept with both provenances, not silently merged.
+> **Preventable rediscovery — 2 of 3.** Two of the three held-out follow-up sessions re-derived knowledge the layer already had. (Illustrative demo data — n=3, not a benchmark.)
+## See it run on a real codebase: pydantic-ai (17.3k ⭐)
+[`docs/examples/pydantic-ai/`](docs/examples/pydantic-ai/) is a committed snapshot of `crewlore` compiled on the public [`pydantic/pydantic-ai`](https://github.com/pydantic/pydantic-ai) repo — 3 Claude Code sessions on real issues, no synthetic data.
+- **18 claims** compiled across 9 scope groupings (UI adapters, decorator introspection, durable-execution threat modeling, toolsets, tests, version policy)
+- **100% fidelity** under the explicit [canonical-form contract](docs/anchors.md) — every anchor's quote canonically resolves to a substring of its source session. (Fidelity certifies the *citation* is real, not that the model's *statement* is fully entailed by it — that's what human/PR review of the book is for.)
+- **0 conflicts** because the three sessions covered disjoint scopes — the conflict detector wasn't given anything to flag
+- **Receipts:** the rendered [`book.md`](docs/examples/pydantic-ai/book.md), the raw [`claims.jsonl`](docs/examples/pydantic-ai/claims.jsonl), and full [`provenance.md`](docs/examples/pydantic-ai/provenance.md) (session ids, commit hashes, compile cost, scrub redactions, five real-data bugs the capture surfaced and we fixed before publishing)
+## What you get
+Raw, messy sessions go in. Out comes a structured, citable **compiled claim** — every one carrying its kind, its scope, the action it implies for future work, and a verbatim **anchor** back to the moment it was discovered:
+> **`[gotcha]`** · *services/billing*
+>
+> Billing webhook handler lacks an idempotency check, causing duplicate charges when Stripe retries webhooks.
+>
+> **Do** — dedupe on the Stripe idempotency key before processing.
+>
+> > *anchor* — "the handler has no idempotency check, so when Stripe retries a webhook the charge is processed again."
+A human can verify it (the anchor points back to the exact session line); an agent can trust it (the citation is real, not hallucinated). Claims roll up into a knowledge book at `.lore/knowledge/README.md`, grouped by area and committed to your repo alongside your code:
+```markdown
+# Team knowledge (compiled by crewlore)
+## services/billing
+- **[gotcha]** Billing webhook handler lacks an idempotency check; dedupe on the Stripe key.
+  - *Do:* Dedupe on the Stripe idempotency key before processing.
+  - _anchor_ `ses_1#1`: "the handler has no idempotency check, so when Stripe retries a webhook the charge is processed again."
+## deployment
+- **[procedure]** Run migrations before deploy to prevent missing columns.
+  - *Do:* Run `make migrate` before every deploy.
+```
+## How it works
+```mermaid
+flowchart LR
+    S["coding agent<br/>sessions"] --> I["ingest + scrub<br/>(transcripts → NSF,<br/>secrets redacted)"]
+    I --> C["compile<br/>(NSF → claims,<br/>verbatim anchors)"]
+    C --> R["<b>.lore/</b> in your repo<br/>(knowledge book + claims,<br/>plaintext, git-versioned)"]
+    R --> SV["serve<br/>(files + MCP query)"]
+    SV --> N["next agent session<br/>inherits the knowledge"]
+    SV -. "usage signal" .-> AL["actuation loop<br/>(decay · reinforce · retire)"]
+    AL -. "lifecycle update" .-> R
+    classDef engine fill:#4a5d9e,stroke:#1a2c4d,color:#fff,stroke-width:2px
+    classDef artifact fill:#2d6a4f,stroke:#1b4332,color:#fff,stroke-width:2px
+    class C engine
+    class R artifact
+```
+> `lore watch` runs ingest → compile → prune automatically, on an interval.
+- **Ingest + scrub** — reads the coding agent's existing on-disk transcripts and redacts a curated set of secret patterns (Anthropic / OpenAI / generic `sk-*` API keys, AWS keys, GitHub classic + fine-grained PATs, Google API keys, Slack tokens, HuggingFace tokens, JWTs, connection-string passwords, private-key blocks, and `password=…` assignment shapes) *before* anything is stored or sent to a model. The pattern set is documented in [`docs/scrub.md`](docs/scrub.md).
+- **Compile** — extracts atomic claims, deduplicates them, records disagreements instead of silently overwriting, scores authority by how often a claim recurs, and drops any claim whose citation doesn't resolve verbatim.
+- **Serve** — writes a human- and agent-readable knowledge book to `.lore/knowledge/`, and exposes a query tool (including an optional MCP server) so any agent can pull the relevant slice on demand.
+- **Actuation loop** — every retrieval is recorded, and that usage drives a lifecycle: unused claims decay and archive, contradicted claims are retired, useful claims are reinforced. The store stays small and fresh instead of growing into a pile nobody reads.
+The intelligence is in **compile**; ingest and serve are deliberately thin, so supporting another coding agent is a small adapter, not a rewrite. To be precise about the word "compile": extraction is an LLM step (the only non-deterministic part), wrapped in deterministic stages — verbatim-anchor verification, content-addressed dedup, conflict recording, and authority scoring. "Compile" means the repeatable session → claims transform, not that an LLM is absent.
+## How it differs
+- **vs. hosted memory (Letta, mem0)** — their store lives in someone else's cloud and you can't `git log` it; `crewlore`'s lives in your repo as plaintext.
+- **vs. per-IDE memory (Cursor rules, Claude memory, Continue, Cody)** — tied to one developer, one IDE; `crewlore` is a *team* artifact, committed and reviewed like code.
+- **vs. hand-curated `CLAUDE.md` / `.cursorrules`** — humans write those by hand and they go stale; `crewlore` compiles + reinforces from real sessions and retires what stops being used.
+- **vs. RAG over a vector DB** — RAG retrieves *document chunks*; `crewlore` compiles atomic, citable *claims* with verbatim anchors, so a human or agent can verify the cited source in seconds. (Retrieval today is deterministic lexical overlap, not embeddings — simpler and dependency-free; semantic ranking is on the roadmap.)
+## Why this exists
+Knowledge discovered inside an agent session is private by default and lost by default. It lives in one developer's transcript, so the next engineer — and every future agent run — re-reads the same files, re-learns the same gotcha, and re-makes a decision the team already made. There's no shared layer that both humans and agents read from, so decisions drift and bugs resurface.
+`crewlore` makes that knowledge a first-class, versioned artifact in the place your team already trusts: your git repo.
+**What it is:** a compiler that turns sessions into accurate, deduplicated, conflict-aware, provenance-carrying team knowledge, served back to any agent.
+**What it isn't:** a hosted service, a vector database, or a personal-memory layer for a single IDE. There's no account, no cloud, and no proprietary store — the compiled knowledge is plaintext you own.
+## Your data stays yours
+- **Local-first.** Capture, compile, and serve all run on infrastructure you control. Point the compiler at your own model provider or a local OpenAI-compatible model (Ollama, LM Studio, vLLM) via `provider: local` — nothing routes through any `crewlore`-operated service, because there is none.
+- **Plaintext, in your repo.** The knowledge layer is human-readable Markdown and JSONL under `.lore/`, versioned by git. `git log .lore/` is your audit trail.
+- **Secrets never travel.** Scrubbing — of both message content and tool-call arguments — happens at ingest, before storage or any model call. It's a high-precision pattern set (a floor, not a DLP guarantee; see [`docs/scrub.md`](docs/scrub.md)), and raw session captures are git-ignored by default regardless.
+## CLI
+| Command | What it does |
+|---|---|
+| `lore init` | Create the `.lore/` layout in your repo. |
+| `lore watch` | Automatically ingest → compile → prune on an interval (`--once` for cron/CI). |
+| `lore compile` | Run a single ingest-and-compile pass manually. |
+| `lore query "<task>"` | Retrieve the claims most relevant to a task (records usage). |
+| `lore status` | Show claim/conflict counts and how much of the layer is actually being used. |
+| `lore serve --mcp` | Start an MCP server exposing query-time retrieval to any MCP-speaking agent (Claude Desktop, Cursor, …). Requires `pip install 'crewlore[serve]'`. See [`docs/mcp.md`](docs/mcp.md) for wiring snippets. |
+## Configuration
+`.lore/config.yaml`:
+```yaml
+model:
+  provider: anthropic          # anthropic | openai | local
+  name: claude-sonnet-4-6
+  # For provider: local — point at any OpenAI-compatible endpoint you run:
+  # base_url: http://localhost:11434/v1   # e.g. Ollama, LM Studio, vLLM
+capture:
+  transcripts: ~/.claude/projects
+compile:
+  cadence: auto                # `lore watch` interval below
+  watch_interval_seconds: 300
+```
+Bring your own key (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY`); `crewlore` never ships keys anywhere. The default Anthropic provider works out of the box. For OpenAI or a local OpenAI-compatible model, add the SDK: `pipx inject crewlore openai` (or `pip install 'crewlore[openai]'`). With `provider: local` nothing leaves your machine at all — the compile call hits your own endpoint.
+## Roadmap & limitations
+> [!NOTE]
+> **Status: alpha.** The core is stable and tested end to end. The on-disk schema may change before 1.0 — and because everything is plaintext and git-versioned, breaking format changes will ship with migrations.
+- **Stable today:** capture, secret scrubbing, the compile pipeline, retrieval, the actuation loop, and the `.lore/` plaintext format.
+- **In flight:** cross-session conflict alignment — real disagreements are surfaced today, but reliably aligning claims about the same question across independently-compiled sessions is an active area of work.
+- **Planned:** an explicit human approve-before-serve gate (secret scrubbing is already automated), more capture adapters beyond Claude Code, and a real-time capture hook.
+## Contributing
+Issues, discussions, and PRs welcome. New here? Start a [discussion](https://github.com/srijansk/crewlore/discussions) — adding a capture adapter for another coding agent is the most valuable first contribution and is intentionally small. See [CONTRIBUTING.md](CONTRIBUTING.md) for local setup and the dev loop.
+Tests are fully deterministic — no real API calls during `pytest`.
+## License
+MIT — see [LICENSE](LICENSE).

crewlore-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,25 @@
+lore/__init__.py,sha256=hXh2s7JVltYCqqA4V434frRg8_TCCwJXL2cmf0GKiA0,116
+lore/actuation.py,sha256=zISvBTvkDWA90zA-8Or0-XJBtfBSavHW7En93dtXfFg,2092
+lore/cli.py,sha256=JwLwmTzkbGnpPKZCtdMm6GOzfulBqDWuJ6W405ctgkQ,6381
+lore/replay.py,sha256=DbkHqp-qy7ittIZjSQjfwcTvFlt_RKMwy1prl0p1Tbk,2343
+lore/schemas.py,sha256=uUqPIuyb9p2Aeyc_Qr9wv1IigS_t21-3idBb1wf_2WY,4472
+lore/scrub.py,sha256=cEEcC2R3km0QYZrn4qZHf96hAK6M75LAJb7YjfoN8PA,5164
+lore/store.py,sha256=vqnk4HlEAajrL1rRM0Nhru0qq1x-mz8PXVglv4FoxD0,6426
+lore/capture/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lore/capture/ingest.py,sha256=rYOqOeDMFI6t6t6_gPHW3iBhGMUE2EpWjeGfci4oj48,1580
+lore/capture/signals.py,sha256=aH4Z1D7tkN25MgeRCaeiKU6yFVDlFG9O-S661C63qtA,2545
+lore/capture/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lore/capture/adapters/claude_code.py,sha256=sIe7QISaFC3VODOnVv5JQcajZCK1ZlHd55bmIHXlp1o,4856
+lore/compile/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lore/compile/extractor.py,sha256=2d4f6eanYqhUCnZpZ0zebFtqEyyoyNfJRT7nstfu664,7644
+lore/compile/llm.py,sha256=lILzulwGi66C1Tp8G2_zflReOARROF1l5X_wwFXEMW4,4539
+lore/compile/pipeline.py,sha256=_MPlJ1aesoDpc8XfEAJHhVvDKmrg5ST9vNZh_kQNE2o,5291
+lore/compile/run.py,sha256=QC5yLDNNE_pmn1EJVvmTZaVmaUnjSYTvwUJrBGXzDIM,3643
+lore/serve/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lore/serve/mcp_server.py,sha256=YL5Tj1RDvZkeY17N-Ke-Vi31sPdg72_grSL2f4LZMHY,1210
+lore/serve/server.py,sha256=A-l0vX0lyzSKgz30YVM1j7X3kzCe5BLN_0lEkpnZL2E,4453
+crewlore-0.1.0.dist-info/METADATA,sha256=tJiPpabgjFe0_W8BBSCBMmE3bR1TqSy-SO-Ctm0ODBg,15788
+crewlore-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+crewlore-0.1.0.dist-info/entry_points.txt,sha256=dr_gXSDsmfXndWIangQItQuuwPbyRRYNGCZGCyNZ5h0,38
+crewlore-0.1.0.dist-info/licenses/LICENSE,sha256=4zTEbmIxkckIfU69ENjzoqoniXkg91RU8KdbpeldYu0,1078
+crewlore-0.1.0.dist-info/RECORD,,

crewlore-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

crewlore-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ lore = lore.cli:app

crewlore-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 crewlore Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

lore/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""crewlore: a local-first compiler for team tribal knowledge from coding-agent sessions."""
+__version__ = "0.1.0"

lore/actuation.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""The actuation loop's lifecycle (C0 lever 2).
+A knowledge layer's value is an actuation property, not a storage property. Left
+alone, a compiled store grows monotonically and rots into a dumpyard. Usage —
+recorded by the serve layer — drives a homeostatic lifecycle so the *active* set
+plateaus and churns:
+- never-served claims past a staleness window decay to `archived`;
+- claims overridden in real use (wrong/stale) are retired;
+- claims that proved influential are reinforced (authority up).
+Run this periodically (e.g. after each compile, or on a cron) over the store.
+"""
+from __future__ import annotations
+from datetime import datetime, timedelta, timezone
+from lore.schemas import Claim
+_REINFORCE_PER_INFLUENCE = 0.1
+def _as_utc(dt: datetime) -> datetime:
+    """Coerce a possibly-naive datetime to UTC so aware/naive subtraction can't crash."""
+    return dt if dt.tzinfo is not None else dt.replace(tzinfo=timezone.utc)
+def apply_lifecycle(
+    claims: list[Claim],
+    *,
+    now: datetime,
+    max_unused_age: timedelta,
+    override_threshold: int = 2,
+) -> list[Claim]:
+    out: list[Claim] = []
+    for c in claims:
+        out.append(_step(c, now, max_unused_age, override_threshold))
+    return out
+def _step(c: Claim, now: datetime, max_unused_age: timedelta, override_threshold: int) -> Claim:
+    if c.status != "active":
+        return c
+    u = c.usage
+    # Retired by real-use contradiction.
+    if u.times_overridden >= override_threshold and u.times_overridden > u.times_influential:
+        return c.model_copy(update={"status": "archived"})
+    # Never used and stale -> decay out of the active set.
+    if (
+        u.times_served == 0
+        and c.observed_at is not None
+        and (_as_utc(now) - _as_utc(c.observed_at)) > max_unused_age
+    ):
+        return c.model_copy(update={"status": "archived"})
+    # Used and valued -> reinforce.
+    if u.times_influential > 0:
+        boosted = min(1.0, c.authority + _REINFORCE_PER_INFLUENCE * u.times_influential)
+        return c.model_copy(update={"authority": boosted})
+    return c

lore/capture/__init__.py ADDED Viewed

File without changes

lore/capture/adapters/__init__.py ADDED Viewed

File without changes

lore/capture/adapters/claude_code.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""Claude Code capture adapter.
+Maps Claude Code's per-session transcript records into the Normalized Session
+Format (NSF). Capture is deliberately thin: it normalizes faithfully and knows
+nothing about compile/serve. Adding another harness is a sibling module with the
+same shape — that is what makes `lore` harness-neutral by construction.
+"""
+from __future__ import annotations
+from datetime import datetime, timezone
+from pathlib import Path
+from lore.schemas import NSFEvent
+MANIFEST = {
+    "harness": "claude-code",
+    # Where Claude Code keeps per-project session transcripts.
+    "log_location": "~/.claude/projects/",
+    "session_hook": "PostToolUse/Stop hooks write transcript JSONL (default)",
+}
+def _parse_ts(raw: str | None) -> datetime:
+    """Always return a timezone-aware UTC datetime.
+    A transcript record may omit `timestamp` (older/edited/third-party files), and
+    a present timestamp may lack a zone. Both must yield an aware datetime, or the
+    actuation lifecycle (which subtracts `now` in UTC) crashes with a naive-vs-aware
+    TypeError downstream.
+    """
+    if not raw:
+        return datetime.fromtimestamp(0, tz=timezone.utc)
+    dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
+    return dt if dt.tzinfo is not None else dt.replace(tzinfo=timezone.utc)
+class ClaudeCodeAdapter:
+    name = "claude-code"
+    manifest = MANIFEST
+    def parse_records(self, records, session: str | None = None) -> list[NSFEvent]:
+        events: list[NSFEvent] = []
+        for rec in records:
+            events.extend(self._record_to_events(rec, session))
+        return events
+    def parse_transcript(self, path: Path | str, session: str | None = None) -> list[NSFEvent]:
+        import json
+        text = Path(path).read_text()
+        records = [json.loads(ln) for ln in text.splitlines() if ln.strip()]
+        return self.parse_records(records, session=session)
+    # --- internals ---
+    def _record_to_events(self, rec: dict, session: str | None) -> list[NSFEvent]:
+        rtype = rec.get("type")
+        sid = session or rec.get("sessionId") or "unknown"
+        ts = _parse_ts(rec.get("timestamp"))
+        message = rec.get("message") or {}
+        content = message.get("content")
+        if rtype == "user":
+            return self._user_events(content, sid, ts)
+        if rtype == "assistant":
+            return self._assistant_events(content, sid, ts)
+        # summaries, system/meta lines, anything else: not session knowledge.
+        return []
+    def _user_events(self, content, sid: str, ts: datetime) -> list[NSFEvent]:
+        # A user turn is either a plain string (a real message) or a list of
+        # blocks (tool results fed back to the model).
+        if isinstance(content, str):
+            return [
+                NSFEvent(
+                    session=sid, actor="user", kind="user_message", timestamp=ts, content=content
+                )
+            ]
+        events: list[NSFEvent] = []
+        for block in content or []:
+            if block.get("type") == "tool_result":
+                events.append(
+                    NSFEvent(
+                        session=sid,
+                        actor="system",
+                        kind="tool_result",
+                        timestamp=ts,
+                        content=_stringify(block.get("content", "")),
+                        meta={"tool_use_id": block.get("tool_use_id")},
+                    )
+                )
+        return events
+    def _assistant_events(self, content, sid: str, ts: datetime) -> list[NSFEvent]:
+        events: list[NSFEvent] = []
+        if isinstance(content, str):
+            return [
+                NSFEvent(
+                    session=sid, actor="agent", kind="agent_message", timestamp=ts, content=content
+                )
+            ]
+        for block in content or []:
+            btype = block.get("type")
+            if btype == "text":
+                events.append(
+                    NSFEvent(
+                        session=sid, actor="agent", kind="agent_message", timestamp=ts,
+                        content=block.get("text", ""),
+                    )
+                )
+            elif btype == "tool_use":
+                events.append(
+                    NSFEvent(
+                        session=sid, actor="agent", kind="tool_call", timestamp=ts,
+                        content=block.get("name", ""),
+                        meta={"input": block.get("input", {}), "tool_use_id": block.get("id")},
+                    )
+                )
+        return events
+def _stringify(content) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = [b.get("text", "") if isinstance(b, dict) else str(b) for b in content]
+        return "\n".join(p for p in parts if p)
+    return str(content)

lore/capture/ingest.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Capture-from-transcripts.
+Coding agents already write session transcripts to disk, so for MVP `lore` does
+not need a live hook: it reads the existing transcripts, scrubs secrets, and
+stores them as NSF. Ingestion is incremental (skips already-captured sessions),
+so it is safe to run on a schedule (`lore watch` / cron) — which is what makes
+compilation automatic rather than a chore.
+"""
+from __future__ import annotations
+from pathlib import Path
+from lore.scrub import scrub_events
+from lore.store import LoreStore
+def discover_transcripts(root: Path | str) -> list[Path]:
+    root = Path(root)
+    if not root.exists():
+        return []
+    return sorted(root.rglob("*.jsonl"))
+def ingest_transcripts(
+    store: LoreStore, adapter, transcript_dir: Path | str, scrub: bool = True
+) -> dict:
+    discovered = discover_transcripts(transcript_dir)
+    existing = set(store.list_sessions())
+    ingested = 0
+    redactions = 0
+    for path in discovered:
+        session_id = path.stem
+        if session_id in existing:
+            continue  # incremental: never re-ingest a captured session
+        try:
+            events = adapter.parse_transcript(path, session=session_id)
+        except Exception:
+            continue  # skip unparseable/corrupt transcript files
+        if not events:
+            continue
+        if scrub:
+            events, n = scrub_events(events)
+            redactions += n
+        store.write_session(session_id, events)
+        ingested += 1
+    return {"discovered": len(discovered), "ingested": ingested, "redactions": redactions}

lore/capture/signals.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Selective ingestion (C0 lever 3).
+The dumpyard starts at the input: compiling every session yields mostly noise.
+We gate compilation on *friction and resolution* signals — corrections,
+tool errors, deliberated decisions, gotchas — rather than compiling all sessions.
+This is a deliberately simple, recall-favoring keyword heuristic for v0.1: when in
+doubt it includes the session, because the actuation loop (usage-driven decay)
+prunes anything that turns out to be unused. The lab's own research warns that
+keyword guidance is a weak lever, so this gate is intentionally only a coarse
+trivial-session filter, not a quality judge.
+"""
+from __future__ import annotations
+from lore.schemas import NSFEvent
+_CORRECTION = ("no,", "no.", "not right", "that's wrong", "that is wrong", "actually",
+               "instead", "don't", "do not", "revert", "undo")
+_ERROR = ("error", "failed", "failure", "exception", "traceback", "no such",
+          "denied", "not found", "cannot", "can't")
+_DECISION = ("let's go with", "lets go with", "we decided", "we chose", "decided to",
+             "go with", "should we use", "we'll use", "we will use")
+_GOTCHA = ("careful", "watch out", "gotcha", "fires twice", "double-fire", "race condition",
+           "edge case", "footgun", "subtle", "beware")
+# Procedures and team norms carry no friction words but are core tribal knowledge;
+# without these the gate silently drops "how we do X" / "the rule is Y" sessions.
+_CONVENTION = ("always", "never", "must", "convention", "policy", "the rule is", "team rule",
+               "going forward", "make sure to", "we write", "we always", "we never",
+               "reviewers reject", "by convention", "standard practice")
+_MARKERS = {
+    "correction": _CORRECTION,
+    "error": _ERROR,
+    "decision": _DECISION,
+    "gotcha": _GOTCHA,
+    "convention": _CONVENTION,
+}
+def signal_reasons(events: list[NSFEvent]) -> list[str]:
+    """Return the distinct signal categories present in a session, in fixed order."""
+    found: set[str] = set()
+    for ev in events:
+        text = ev.content.lower()
+        for reason, markers in _MARKERS.items():
+            if any(m in text for m in markers):
+                # Tool errors only count from tool output, not casual mentions.
+                if reason == "error" and ev.kind != "tool_result":
+                    continue
+                found.add(reason)
+    return [r for r in _MARKERS if r in found]
+def session_has_signal(events: list[NSFEvent]) -> bool:
+    return bool(signal_reasons(events))