agentcairn 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentcairn-0.1.0/.github/workflows/bench.yml +23 -0
- agentcairn-0.1.0/.github/workflows/ci.yml +34 -0
- agentcairn-0.1.0/.github/workflows/release.yml +35 -0
- agentcairn-0.1.0/.github/workflows/site.yml +22 -0
- agentcairn-0.1.0/.gitignore +49 -0
- agentcairn-0.1.0/.pre-commit-config.yaml +28 -0
- agentcairn-0.1.0/.python-version +1 -0
- agentcairn-0.1.0/CLAUDE.md +46 -0
- agentcairn-0.1.0/LICENSE +202 -0
- agentcairn-0.1.0/NOTICE +4 -0
- agentcairn-0.1.0/PKG-INFO +158 -0
- agentcairn-0.1.0/README.md +129 -0
- agentcairn-0.1.0/benchmarks/README.md +126 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/__init__.py +1 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/ablation.py +72 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/adapters/__init__.py +1 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/adapters/locomo.py +99 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/adapters/longmemeval.py +50 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/build.py +25 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/config.py +67 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/download.py +97 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/models.py +18 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/qa/__init__.py +1 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/qa/generate.py +37 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/qa/judge.py +87 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/qa/provider.py +51 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/report.py +163 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/retrieval_metrics.py +57 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/run.py +257 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/token_savings.py +83 -0
- agentcairn-0.1.0/benchmarks/cairn_bench/vaultize.py +16 -0
- agentcairn-0.1.0/benchmarks/fixtures/synthetic/locomo_synth.json +27 -0
- agentcairn-0.1.0/benchmarks/fixtures/synthetic/longmemeval_synth.json +67 -0
- agentcairn-0.1.0/benchmarks/manifest.toml +17 -0
- agentcairn-0.1.0/benchmarks/tests/__init__.py +1 -0
- agentcairn-0.1.0/benchmarks/tests/conftest.py +21 -0
- agentcairn-0.1.0/benchmarks/tests/test_adapters.py +69 -0
- agentcairn-0.1.0/benchmarks/tests/test_download.py +33 -0
- agentcairn-0.1.0/benchmarks/tests/test_locomo_denominator.py +65 -0
- agentcairn-0.1.0/benchmarks/tests/test_metrics.py +206 -0
- agentcairn-0.1.0/benchmarks/tests/test_qa.py +107 -0
- agentcairn-0.1.0/benchmarks/tests/test_synthetic.py +52 -0
- agentcairn-0.1.0/benchmarks/tests/test_token_savings.py +65 -0
- agentcairn-0.1.0/docs/plans/2026-06-08-01-scaffold-and-vault.md +983 -0
- agentcairn-0.1.0/docs/plans/2026-06-08-02-index-and-embed.md +933 -0
- agentcairn-0.1.0/docs/plans/2026-06-08-03-search.md +582 -0
- agentcairn-0.1.0/docs/plans/2026-06-08-04-ingest.md +1214 -0
- agentcairn-0.1.0/docs/plans/2026-06-08-05-mcp.md +740 -0
- agentcairn-0.1.0/docs/plans/2026-06-09-06-benchmark-harness.md +1589 -0
- agentcairn-0.1.0/docs/plans/2026-06-09-07-reranker-on.md +378 -0
- agentcairn-0.1.0/docs/plans/2026-06-09-08-ollama-embedder.md +390 -0
- agentcairn-0.1.0/docs/plans/2026-06-09-09-bitemporal-validity.md +501 -0
- agentcairn-0.1.0/docs/plans/2026-06-10-agentcairn-website.md +1103 -0
- agentcairn-0.1.0/docs/specs/2026-06-08-agentcairn-design.md +176 -0
- agentcairn-0.1.0/docs/specs/2026-06-09-benchmark-harness-design.md +154 -0
- agentcairn-0.1.0/docs/specs/2026-06-09-bitemporal-validity-design.md +129 -0
- agentcairn-0.1.0/docs/specs/2026-06-09-ollama-embedder-design.md +70 -0
- agentcairn-0.1.0/docs/specs/2026-06-09-reranker-on-design.md +55 -0
- agentcairn-0.1.0/docs/specs/2026-06-10-agentcairn-website-design.md +249 -0
- agentcairn-0.1.0/pyproject.toml +64 -0
- agentcairn-0.1.0/src/cairn/__init__.py +4 -0
- agentcairn-0.1.0/src/cairn/cli.py +304 -0
- agentcairn-0.1.0/src/cairn/config.py +62 -0
- agentcairn-0.1.0/src/cairn/embed/__init__.py +37 -0
- agentcairn-0.1.0/src/cairn/embed/base.py +18 -0
- agentcairn-0.1.0/src/cairn/embed/fake.py +33 -0
- agentcairn-0.1.0/src/cairn/embed/fastembed_embedder.py +33 -0
- agentcairn-0.1.0/src/cairn/embed/ollama_embedder.py +80 -0
- agentcairn-0.1.0/src/cairn/index/__init__.py +27 -0
- agentcairn-0.1.0/src/cairn/index/build.py +231 -0
- agentcairn-0.1.0/src/cairn/index/chunk.py +86 -0
- agentcairn-0.1.0/src/cairn/index/schema.py +72 -0
- agentcairn-0.1.0/src/cairn/ingest/__init__.py +32 -0
- agentcairn-0.1.0/src/cairn/ingest/dedup.py +33 -0
- agentcairn-0.1.0/src/cairn/ingest/distill.py +63 -0
- agentcairn-0.1.0/src/cairn/ingest/importance.py +78 -0
- agentcairn-0.1.0/src/cairn/ingest/locate.py +101 -0
- agentcairn-0.1.0/src/cairn/ingest/models.py +63 -0
- agentcairn-0.1.0/src/cairn/ingest/pipeline.py +75 -0
- agentcairn-0.1.0/src/cairn/ingest/redact.py +130 -0
- agentcairn-0.1.0/src/cairn/mcp/__init__.py +17 -0
- agentcairn-0.1.0/src/cairn/mcp/server.py +88 -0
- agentcairn-0.1.0/src/cairn/mcp/tools.py +265 -0
- agentcairn-0.1.0/src/cairn/search/__init__.py +24 -0
- agentcairn-0.1.0/src/cairn/search/engine.py +396 -0
- agentcairn-0.1.0/src/cairn/search/rerank.py +35 -0
- agentcairn-0.1.0/src/cairn/temporal.py +76 -0
- agentcairn-0.1.0/src/cairn/vault/__init__.py +6 -0
- agentcairn-0.1.0/src/cairn/vault/models.py +35 -0
- agentcairn-0.1.0/src/cairn/vault/parse.py +139 -0
- agentcairn-0.1.0/src/cairn/vault/patterns.py +17 -0
- agentcairn-0.1.0/src/cairn/vault/write.py +29 -0
- agentcairn-0.1.0/tests/__init__.py +1 -0
- agentcairn-0.1.0/tests/embed/__init__.py +1 -0
- agentcairn-0.1.0/tests/embed/test_fake.py +17 -0
- agentcairn-0.1.0/tests/embed/test_fastembed_integration.py +24 -0
- agentcairn-0.1.0/tests/embed/test_get_embedder.py +50 -0
- agentcairn-0.1.0/tests/embed/test_ollama.py +110 -0
- agentcairn-0.1.0/tests/index/__init__.py +1 -0
- agentcairn-0.1.0/tests/index/test_build.py +78 -0
- agentcairn-0.1.0/tests/index/test_chunk.py +39 -0
- agentcairn-0.1.0/tests/index/test_reconcile.py +171 -0
- agentcairn-0.1.0/tests/index/test_schema.py +106 -0
- agentcairn-0.1.0/tests/ingest/__init__.py +1 -0
- agentcairn-0.1.0/tests/ingest/test_dedup.py +33 -0
- agentcairn-0.1.0/tests/ingest/test_distill.py +58 -0
- agentcairn-0.1.0/tests/ingest/test_importance.py +26 -0
- agentcairn-0.1.0/tests/ingest/test_locate.py +150 -0
- agentcairn-0.1.0/tests/ingest/test_pipeline.py +116 -0
- agentcairn-0.1.0/tests/ingest/test_redact.py +264 -0
- agentcairn-0.1.0/tests/mcp/__init__.py +1 -0
- agentcairn-0.1.0/tests/mcp/test_server.py +98 -0
- agentcairn-0.1.0/tests/mcp/test_tools.py +556 -0
- agentcairn-0.1.0/tests/search/__init__.py +1 -0
- agentcairn-0.1.0/tests/search/test_engine.py +68 -0
- agentcairn-0.1.0/tests/search/test_rerank_integration.py +21 -0
- agentcairn-0.1.0/tests/search/test_search.py +421 -0
- agentcairn-0.1.0/tests/test_cli.py +298 -0
- agentcairn-0.1.0/tests/test_config.py +59 -0
- agentcairn-0.1.0/tests/test_temporal.py +98 -0
- agentcairn-0.1.0/tests/vault/__init__.py +0 -0
- agentcairn-0.1.0/tests/vault/test_forward_refs.py +10 -0
- agentcairn-0.1.0/tests/vault/test_models.py +32 -0
- agentcairn-0.1.0/tests/vault/test_parse_inline_fields.py +19 -0
- agentcairn-0.1.0/tests/vault/test_parse_note.py +72 -0
- agentcairn-0.1.0/tests/vault/test_parse_observations.py +41 -0
- agentcairn-0.1.0/tests/vault/test_parse_relations.py +24 -0
- agentcairn-0.1.0/tests/vault/test_patterns.py +19 -0
- agentcairn-0.1.0/tests/vault/test_roundtrip.py +59 -0
- agentcairn-0.1.0/uv.lock +1670 -0
- agentcairn-0.1.0/website/.gitignore +12 -0
- agentcairn-0.1.0/website/.nvmrc +1 -0
- agentcairn-0.1.0/website/README.md +24 -0
- agentcairn-0.1.0/website/astro.config.mjs +14 -0
- agentcairn-0.1.0/website/package-lock.json +9419 -0
- agentcairn-0.1.0/website/package.json +37 -0
- agentcairn-0.1.0/website/playwright.config.ts +10 -0
- agentcairn-0.1.0/website/public/.assetsignore +2 -0
- agentcairn-0.1.0/website/public/favicon.svg +1 -0
- agentcairn-0.1.0/website/public/og.png +0 -0
- agentcairn-0.1.0/website/scripts/gen-og.mjs +14 -0
- agentcairn-0.1.0/website/src/components/CopyButton.astro +13 -0
- agentcairn-0.1.0/website/src/components/Differentiators.astro +14 -0
- agentcairn-0.1.0/website/src/components/Footer.astro +14 -0
- agentcairn-0.1.0/website/src/components/Hero.astro +17 -0
- agentcairn-0.1.0/website/src/components/HeroDiagram.astro +38 -0
- agentcairn-0.1.0/website/src/components/HowItWorks.astro +20 -0
- agentcairn-0.1.0/website/src/components/Inversion.astro +9 -0
- agentcairn-0.1.0/website/src/components/Measured.astro +28 -0
- agentcairn-0.1.0/website/src/components/Nav.astro +9 -0
- agentcairn-0.1.0/website/src/components/Prose.astro +1 -0
- agentcairn-0.1.0/website/src/components/Quickstart.astro +7 -0
- agentcairn-0.1.0/website/src/components/Section.astro +9 -0
- agentcairn-0.1.0/website/src/components/SurvivesUninstall.astro +11 -0
- agentcairn-0.1.0/website/src/components/TrustSecurity.astro +14 -0
- agentcairn-0.1.0/website/src/islands/SurvivesUninstallDemo.tsx +42 -0
- agentcairn-0.1.0/website/src/layouts/Base.astro +23 -0
- agentcairn-0.1.0/website/src/lib/content.ts +88 -0
- agentcairn-0.1.0/website/src/pages/index.astro +27 -0
- agentcairn-0.1.0/website/src/styles/global.css +80 -0
- agentcairn-0.1.0/website/tests/a11y.spec.ts +16 -0
- agentcairn-0.1.0/website/tests/reduced-motion.spec.ts +16 -0
- agentcairn-0.1.0/website/tests/smoke.spec.ts +39 -0
- agentcairn-0.1.0/website/tsconfig.json +15 -0
- agentcairn-0.1.0/website/wrangler.jsonc +17 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: bench
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
branches: [main]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
bench-offline:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Install uv (+ Python 3.12)
|
|
15
|
+
uses: astral-sh/setup-uv@v6
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
enable-cache: true
|
|
19
|
+
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: uv sync
|
|
22
|
+
|
|
23
|
+
- run: uv run pytest benchmarks/tests/ -q
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
# Mirrors the local pre-commit hooks: ruff-format check, ruff lint, pytest.
|
|
9
|
+
jobs:
|
|
10
|
+
check:
|
|
11
|
+
name: format Β· lint Β· test
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Install uv (+ Python 3.12)
|
|
17
|
+
uses: astral-sh/setup-uv@v6
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.12"
|
|
20
|
+
enable-cache: true
|
|
21
|
+
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: uv sync
|
|
24
|
+
|
|
25
|
+
- name: Format check (ruff)
|
|
26
|
+
run: uv run ruff format --check .
|
|
27
|
+
|
|
28
|
+
- name: Lint (ruff)
|
|
29
|
+
run: uv run ruff check .
|
|
30
|
+
|
|
31
|
+
- name: Tests (pytest)
|
|
32
|
+
# The FastEmbed integration test is skipped unless CAIRN_RUN_INTEGRATION=1,
|
|
33
|
+
# so CI runs the fast, offline suite (no model download).
|
|
34
|
+
run: uv run pytest -q
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
|
|
3
|
+
# Publishes the `agentcairn` distribution to PyPI via Trusted Publishing (OIDC) β
|
|
4
|
+
# no API tokens or secrets. Requires a Trusted Publisher to be registered on PyPI
|
|
5
|
+
# (and TestPyPI for the dry-run) for this repo + this workflow file.
|
|
6
|
+
#
|
|
7
|
+
# - Push a tag `vX.Y.Z` -> build + publish to PyPI.
|
|
8
|
+
# - Manually run (Actions -> release -> Run workflow) -> build + publish to TestPyPI (dry-run).
|
|
9
|
+
|
|
10
|
+
on:
|
|
11
|
+
push:
|
|
12
|
+
tags: ["v*"]
|
|
13
|
+
workflow_dispatch: {}
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
publish:
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
permissions:
|
|
19
|
+
id-token: write # required for Trusted Publishing (OIDC)
|
|
20
|
+
contents: read # required for actions/checkout (explicit block defaults others to none)
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v4
|
|
23
|
+
- uses: astral-sh/setup-uv@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: "3.12"
|
|
26
|
+
- name: Build sdist + wheel
|
|
27
|
+
run: uv build
|
|
28
|
+
- name: Publish to TestPyPI (manual dry-run)
|
|
29
|
+
if: github.event_name == 'workflow_dispatch'
|
|
30
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
31
|
+
with:
|
|
32
|
+
repository-url: https://test.pypi.org/legacy/
|
|
33
|
+
- name: Publish to PyPI (on v* tag)
|
|
34
|
+
if: github.event_name == 'push'
|
|
35
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: site
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches: [main]
|
|
5
|
+
paths: ["website/**", ".github/workflows/site.yml"]
|
|
6
|
+
pull_request:
|
|
7
|
+
paths: ["website/**", ".github/workflows/site.yml"]
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
defaults: { run: { working-directory: website } }
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: actions/setup-node@v4
|
|
15
|
+
with: { node-version: "22", cache: "npm", cache-dependency-path: website/package-lock.json }
|
|
16
|
+
- run: npm ci
|
|
17
|
+
- run: npm run check
|
|
18
|
+
- run: npm run build
|
|
19
|
+
- run: npx playwright install --with-deps chromium
|
|
20
|
+
- run: npm test
|
|
21
|
+
# Deploy is handled by Cloudflare Pages' Git integration (build: npm run build,
|
|
22
|
+
# output: dist, root dir: website). This workflow is the test gate only.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Python bytecode
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
|
|
6
|
+
# Distribution / packaging
|
|
7
|
+
*.egg-info/
|
|
8
|
+
build/
|
|
9
|
+
dist/
|
|
10
|
+
.eggs/
|
|
11
|
+
MANIFEST
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
ENV/
|
|
18
|
+
|
|
19
|
+
# uv β keep uv.lock committed for a reproducible tool (do NOT ignore it)
|
|
20
|
+
|
|
21
|
+
# Test & coverage artefacts
|
|
22
|
+
.pytest_cache/
|
|
23
|
+
.coverage
|
|
24
|
+
.coverage.*
|
|
25
|
+
coverage.xml
|
|
26
|
+
htmlcov/
|
|
27
|
+
nosetests.xml
|
|
28
|
+
|
|
29
|
+
# Linter / type-checker caches
|
|
30
|
+
.ruff_cache/
|
|
31
|
+
.mypy_cache/
|
|
32
|
+
|
|
33
|
+
# IDE / editor artefacts
|
|
34
|
+
.idea/
|
|
35
|
+
.vscode/
|
|
36
|
+
*.swp
|
|
37
|
+
*.swo
|
|
38
|
+
*~
|
|
39
|
+
|
|
40
|
+
# macOS
|
|
41
|
+
.DS_Store
|
|
42
|
+
Thumbs.db
|
|
43
|
+
|
|
44
|
+
# agentcairn: the rebuildable DuckDB index is disposable β ignore it
|
|
45
|
+
# Note: the vault Markdown files are NEVER ignored (they are the source of truth)
|
|
46
|
+
*.duckdb
|
|
47
|
+
*.duckdb.wal
|
|
48
|
+
|
|
49
|
+
.superpowers/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.15.16
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
|
|
9
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
10
|
+
rev: v6.0.0
|
|
11
|
+
hooks:
|
|
12
|
+
- id: end-of-file-fixer
|
|
13
|
+
- id: trailing-whitespace
|
|
14
|
+
- id: check-yaml
|
|
15
|
+
- id: check-toml
|
|
16
|
+
- id: check-added-large-files
|
|
17
|
+
|
|
18
|
+
# Run the test suite on every commit.
|
|
19
|
+
# Move to the pre-push stage if the suite grows slow:
|
|
20
|
+
# stages: [pre-push]
|
|
21
|
+
- repo: local
|
|
22
|
+
hooks:
|
|
23
|
+
- id: pytest
|
|
24
|
+
name: pytest
|
|
25
|
+
entry: uv run pytest -q
|
|
26
|
+
language: system
|
|
27
|
+
pass_filenames: false
|
|
28
|
+
always_run: true
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# agentcairn β project guide for Claude Code
|
|
2
|
+
|
|
3
|
+
**What this is:** a local-first agent-memory system. An Obsidian **Markdown vault is the source of truth**; a **rebuildable embedded DuckDB index** provides hybrid retrieval. Daemonless: a `cairn` CLI + an on-demand READ_ONLY MCP server. Capture is via **out-of-band transcript ingestion** + an agent `remember` tool β **not** live hooks.
|
|
4
|
+
|
|
5
|
+
**Status (2026-06-08):** Design phase. The full spec is committed at
|
|
6
|
+
`docs/specs/2026-06-08-agentcairn-design.md` β **read it first.** No implementation code yet.
|
|
7
|
+
Next step: turn the spec into an implementation plan (superpowers `writing-plans`).
|
|
8
|
+
Build order: `cairn.vault β cairn.index β cairn.embed β cairn.search β cairn.ingest β cairn.mcp`/CLI.
|
|
9
|
+
|
|
10
|
+
## Locked decisions
|
|
11
|
+
- **Name:** package/org/repo `agentcairn`; **CLI command `cairn`**.
|
|
12
|
+
- **Language:** Python 3.12+. Distribute via `uv`/`uvx`/`pipx`; MCP launched via `uvx agentcairn`.
|
|
13
|
+
- **Index is a disposable cache** β always rebuildable from Markdown (`cairn reindex`); never the source of truth.
|
|
14
|
+
- **Default embedder:** FastEmbed `bge-small-en-v1.5` (384-d), behind a pluggable interface (Ollama/cloud opt-in).
|
|
15
|
+
- **Retrieval:** BM25 + vector + wikilink-graph-boost + recency/importance, fused with RRF (k=60); degradation ladder down to BM25-only ("never silently dead").
|
|
16
|
+
- **Concurrency:** MCP opens DuckDB **READ_ONLY**; one short-lived CLI process is the sole writer. The `.duckdb` lives on **local disk, NOT inside the synced vault folder**.
|
|
17
|
+
- **Markdown contract:** basic-memory conventions (frontmatter `title/type/permalink/tags`; observations `- [category] text #tag (ctx)`; relations `- rel_type [[Target]]`; bare `[[link]]` β implicit `links_to`) + Dataview-compatible inline fields.
|
|
18
|
+
|
|
19
|
+
## Hard parts / constraints (don't relearn the hard way)
|
|
20
|
+
- `cairn.vault` (parse/write Markdown preserving frontmatter order, unresolved forward-refs, and link-rewrite-on-move) is the **hardest** component β build and test it first.
|
|
21
|
+
- **Secret redaction before any write is mandatory** (we write plaintext to disk).
|
|
22
|
+
- DuckDB VSS HNSW persistence is *experimental* β prefer **in-memory HNSW rebuilt at MCP spawn**. DuckDB-WASM has **no VSS** β a future Obsidian plugin gets BM25+graph only (no in-browser semantic search).
|
|
23
|
+
- **No single headline benchmark number** β vendor LoCoMo/LongMemEval figures are self-reported and disputed; validate on LongMemEval-S + LoCoMo with committed scripts before any comparative claim.
|
|
24
|
+
|
|
25
|
+
## The wedge (keep it crisp)
|
|
26
|
+
Closest competitor is `rohitg00/agentmemory` (it already has hybrid+graph, local embeddings, decay, Obsidian *export*). Our narrow-but-real edge: **vault-as-truth (not export) Β· disposable/rebuildable index Β· non-lossy by construction Β· free wikilink graph Β· daemonless, zero external DB.** If a change blurs this, reconsider it.
|
|
27
|
+
|
|
28
|
+
## Tooling (uv β exclusive)
|
|
29
|
+
|
|
30
|
+
**Always use `uv`; never use pip, poetry, or global venvs.**
|
|
31
|
+
|
|
32
|
+
- Install / sync deps: `uv sync`
|
|
33
|
+
- Run any command: `uv run <cmd>` (e.g. `uv run pytest`, `uv run cairn --help`)
|
|
34
|
+
- Ephemeral tool runs: `uvx <tool>` β the MCP server will be launched as `uvx agentcairn`
|
|
35
|
+
- Contributor setup: `uv sync` then `uv run pre-commit install`
|
|
36
|
+
|
|
37
|
+
`uv.lock` is **committed** β it keeps the tool reproducible across machines.
|
|
38
|
+
|
|
39
|
+
## Conventions
|
|
40
|
+
- Specs/designs β `docs/specs/` (date-prefixed `YYYY-MM-DD-<topic>-design.md`). No `superpowers/` segment.
|
|
41
|
+
- `README.md` is the public positioning doc β advantages framed as design goals until validated.
|
|
42
|
+
- **License: Apache-2.0.** Start each source file with `# SPDX-License-Identifier: Apache-2.0`; keep `NOTICE` intact and propagate it in distributions; set `license = "Apache-2.0"` in `pyproject.toml` when scaffolding.
|
|
43
|
+
|
|
44
|
+
## Open items
|
|
45
|
+
- Validate large-vault HNSW rebuild / MCP cold-start latency (sets in-memory-vs-persisted threshold).
|
|
46
|
+
- Validate the local embedding baseline on a QA-style metric, not just R@5.
|
agentcairn-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
|
|
2
|
+
Apache License
|
|
3
|
+
Version 2.0, January 2004
|
|
4
|
+
http://www.apache.org/licenses/
|
|
5
|
+
|
|
6
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
7
|
+
|
|
8
|
+
1. Definitions.
|
|
9
|
+
|
|
10
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
11
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
12
|
+
|
|
13
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
14
|
+
the copyright owner that is granting the License.
|
|
15
|
+
|
|
16
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
17
|
+
other entities that control, are controlled by, or are under common
|
|
18
|
+
control with that entity. For the purposes of this definition,
|
|
19
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
20
|
+
direction or management of such entity, whether by contract or
|
|
21
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
22
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
23
|
+
|
|
24
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
25
|
+
exercising permissions granted by this License.
|
|
26
|
+
|
|
27
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
28
|
+
including but not limited to software source code, documentation
|
|
29
|
+
source, and configuration files.
|
|
30
|
+
|
|
31
|
+
"Object" form shall mean any form resulting from mechanical
|
|
32
|
+
transformation or translation of a Source form, including but
|
|
33
|
+
not limited to compiled object code, generated documentation,
|
|
34
|
+
and conversions to other media types.
|
|
35
|
+
|
|
36
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
37
|
+
Object form, made available under the License, as indicated by a
|
|
38
|
+
copyright notice that is included in or attached to the work
|
|
39
|
+
(an example is provided in the Appendix below).
|
|
40
|
+
|
|
41
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
42
|
+
form, that is based on (or derived from) the Work and for which the
|
|
43
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
44
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
45
|
+
of this License, Derivative Works shall not include works that remain
|
|
46
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
47
|
+
the Work and Derivative Works thereof.
|
|
48
|
+
|
|
49
|
+
"Contribution" shall mean any work of authorship, including
|
|
50
|
+
the original version of the Work and any modifications or additions
|
|
51
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
52
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
53
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
54
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
55
|
+
means any form of electronic, verbal, or written communication sent
|
|
56
|
+
to the Licensor or its representatives, including but not limited to
|
|
57
|
+
communication on electronic mailing lists, source code control systems,
|
|
58
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
59
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
60
|
+
excluding communication that is conspicuously marked or otherwise
|
|
61
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
62
|
+
|
|
63
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
64
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
65
|
+
subsequently incorporated within the Work.
|
|
66
|
+
|
|
67
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
68
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
69
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
70
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
71
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
72
|
+
Work and such Derivative Works in Source or Object form.
|
|
73
|
+
|
|
74
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
75
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
76
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
77
|
+
(except as stated in this section) patent license to make, have made,
|
|
78
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
79
|
+
where such license applies only to those patent claims licensable
|
|
80
|
+
by such Contributor that are necessarily infringed by their
|
|
81
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
82
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
83
|
+
institute patent litigation against any entity (including a
|
|
84
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
85
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
86
|
+
or contributory patent infringement, then any patent licenses
|
|
87
|
+
granted to You under this License for that Work shall terminate
|
|
88
|
+
as of the date such litigation is filed.
|
|
89
|
+
|
|
90
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
91
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
92
|
+
modifications, and in Source or Object form, provided that You
|
|
93
|
+
meet the following conditions:
|
|
94
|
+
|
|
95
|
+
(a) You must give any other recipients of the Work or
|
|
96
|
+
Derivative Works a copy of this License; and
|
|
97
|
+
|
|
98
|
+
(b) You must cause any modified files to carry prominent notices
|
|
99
|
+
stating that You changed the files; and
|
|
100
|
+
|
|
101
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
102
|
+
that You distribute, all copyright, patent, trademark, and
|
|
103
|
+
attribution notices from the Source form of the Work,
|
|
104
|
+
excluding those notices that do not pertain to any part of
|
|
105
|
+
the Derivative Works; and
|
|
106
|
+
|
|
107
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
108
|
+
distribution, then any Derivative Works that You distribute must
|
|
109
|
+
include a readable copy of the attribution notices contained
|
|
110
|
+
within such NOTICE file, excluding those notices that do not
|
|
111
|
+
pertain to any part of the Derivative Works, in at least one
|
|
112
|
+
of the following places: within a NOTICE text file distributed
|
|
113
|
+
as part of the Derivative Works; within the Source form or
|
|
114
|
+
documentation, if provided along with the Derivative Works; or,
|
|
115
|
+
within a display generated by the Derivative Works, if and
|
|
116
|
+
wherever such third-party notices normally appear. The contents
|
|
117
|
+
of the NOTICE file are for informational purposes only and
|
|
118
|
+
do not modify the License. You may add Your own attribution
|
|
119
|
+
notices within Derivative Works that You distribute, alongside
|
|
120
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
121
|
+
that such additional attribution notices cannot be construed
|
|
122
|
+
as modifying the License.
|
|
123
|
+
|
|
124
|
+
You may add Your own copyright statement to Your modifications and
|
|
125
|
+
may provide additional or different license terms and conditions
|
|
126
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
127
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
128
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
129
|
+
the conditions stated in this License.
|
|
130
|
+
|
|
131
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
132
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
133
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
134
|
+
this License, without any additional terms or conditions.
|
|
135
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
136
|
+
the terms of any separate license agreement you may have executed
|
|
137
|
+
with Licensor regarding such Contributions.
|
|
138
|
+
|
|
139
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
140
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
141
|
+
except as required for reasonable and customary use in describing the
|
|
142
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
143
|
+
|
|
144
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
145
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
146
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
147
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
148
|
+
implied, including, without limitation, any warranties or conditions
|
|
149
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
150
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
151
|
+
appropriateness of using or redistributing the Work and assume any
|
|
152
|
+
risks associated with Your exercise of permissions under this License.
|
|
153
|
+
|
|
154
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
155
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
156
|
+
unless required by applicable law (such as deliberate and grossly
|
|
157
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
158
|
+
liable to You for damages, including any direct, indirect, special,
|
|
159
|
+
incidental, or consequential damages of any character arising as a
|
|
160
|
+
result of this License or out of the use or inability to use the
|
|
161
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
162
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
163
|
+
other commercial damages or losses), even if such Contributor
|
|
164
|
+
has been advised of the possibility of such damages.
|
|
165
|
+
|
|
166
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
167
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
168
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
169
|
+
or other liability obligations and/or rights consistent with this
|
|
170
|
+
License. However, in accepting such obligations, You may act only
|
|
171
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
172
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
173
|
+
defend, and hold each Contributor harmless for any liability
|
|
174
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
175
|
+
of your accepting any such warranty or additional liability.
|
|
176
|
+
|
|
177
|
+
END OF TERMS AND CONDITIONS
|
|
178
|
+
|
|
179
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
180
|
+
|
|
181
|
+
To apply the Apache License to your work, attach the following
|
|
182
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
183
|
+
replaced with your own identifying information. (Don't include
|
|
184
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
185
|
+
comment syntax for the file format. We also recommend that a
|
|
186
|
+
file or class name and description of purpose be included on the
|
|
187
|
+
same "printed page" as the copyright notice for easier
|
|
188
|
+
identification within third-party archives.
|
|
189
|
+
|
|
190
|
+
Copyright [yyyy] [name of copyright owner]
|
|
191
|
+
|
|
192
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
193
|
+
you may not use this file except in compliance with the License.
|
|
194
|
+
You may obtain a copy of the License at
|
|
195
|
+
|
|
196
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
197
|
+
|
|
198
|
+
Unless required by applicable law or agreed to in writing, software
|
|
199
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
200
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
201
|
+
See the License for the specific language governing permissions and
|
|
202
|
+
limitations under the License.
|
agentcairn-0.1.0/NOTICE
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentcairn
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local-first agent memory: an Obsidian markdown vault as source of truth, with a rebuildable DuckDB index.
|
|
5
|
+
Project-URL: Homepage, https://agentcairn.dev
|
|
6
|
+
Project-URL: Repository, https://github.com/ccf/agentcairn
|
|
7
|
+
Project-URL: Documentation, https://github.com/ccf/agentcairn#readme
|
|
8
|
+
Author-email: "Charles C. Figueiredo" <ccf@ccf.io>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
License-File: NOTICE
|
|
12
|
+
Keywords: agent-memory,claude,duckdb,llm,local-first,mcp,obsidian,rag
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Requires-Python: >=3.12
|
|
22
|
+
Requires-Dist: duckdb>=1.1
|
|
23
|
+
Requires-Dist: fastembed>=0.4
|
|
24
|
+
Requires-Dist: markdown-it-py>=3.0
|
|
25
|
+
Requires-Dist: mcp>=1.27.2
|
|
26
|
+
Requires-Dist: python-frontmatter>=1.1
|
|
27
|
+
Requires-Dist: typer>=0.12
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# πͺ¨ agentcairn
|
|
31
|
+
|
|
32
|
+
**Local-first memory for AI agents β that you can actually read, edit, and own.**
|
|
33
|
+
|
|
34
|
+
> **cairn** /kΙΙn/ Β· *noun* β a stack of stones raised to mark a trail or a place worth remembering, left for whoever comes next.
|
|
35
|
+
|
|
36
|
+
agentcairn gives your coding agent durable, high-quality memory β but instead of locking it in an opaque database or a cloud service, **your memories live as plain Markdown in an [Obsidian](https://obsidian.md) vault you own.** A fast, rebuildable [DuckDB](https://duckdb.org) index sits on top for retrieval. Open your vault, read what the agent remembered, fix a wrong fact by hand, or drop in your own notes β and the agent picks it all up.
|
|
37
|
+
|
|
38
|
+
## Why agentcairn is different
|
|
39
|
+
|
|
40
|
+
Most agent-memory systems make a database or cloud store the source of truth and treat files (if any) as a one-way export. agentcairn inverts that:
|
|
41
|
+
|
|
42
|
+
- **π Your vault is the source of truth β not an export.** Memory is human-readable Markdown with frontmatter and `[[wikilinks]]`. Edit it in Obsidian; the index honors your edits.
|
|
43
|
+
- **β»οΈ The index is disposable.** DuckDB is a rebuildable cache (`cairn reindex`). Your memory survives a model upgrade, a corrupted index, a schema change, or uninstalling the tool β **zero data loss**, because the truth is just files on disk.
|
|
44
|
+
- **π§ Non-lossy by construction.** The full note is always retained. Distillation only *adds* derived notes that link back to the source β it never silently drops facts it didn't think to extract at write time.
|
|
45
|
+
- **π Redaction before every write.** Secrets are scrubbed (regex + entropy + URL-credential detection) before anything β body, title, or tags β reaches the plaintext vault. We write files you can read, so we treat a leaked credential as the worst failure mode.
|
|
46
|
+
- **πΈοΈ A free, deterministic knowledge graph.** Your `[[wikilinks]]` and frontmatter *are* the graph β no LLM extraction, no hallucinated entities.
|
|
47
|
+
- **πͺΆ Daemonless, zero external DB.** One embedded DuckDB file does semantic vector search, BM25 full-text, and graph traversal. No always-on server, no Neo4j/Postgres/Qdrant, no required cloud key β just a `cairn` CLI and an on-demand MCP server.
|
|
48
|
+
- **π Honestly measured.** A reproducible LongMemEval-S + LoCoMo harness ships in [`benchmarks/`](benchmarks/) β with real numbers, ablations, and explicit caveats instead of one cherry-picked headline (see below).
|
|
49
|
+
|
|
50
|
+
## How it works
|
|
51
|
+
|
|
52
|
+
```mermaid
|
|
53
|
+
flowchart LR
|
|
54
|
+
T["Session transcripts<br/>(out-of-band)"]
|
|
55
|
+
H["You Β· Obsidian<br/>(hand edits)"]
|
|
56
|
+
V["π Obsidian vault<br/>Markdown + frontmatter + wikilinks<br/><b>source of truth</b>"]
|
|
57
|
+
I["β»οΈ DuckDB index<br/>vector + BM25 + graph<br/><b>rebuildable cache</b>"]
|
|
58
|
+
M["MCP tools<br/>remember Β· recall Β· search Β· build_context Β· recent"]
|
|
59
|
+
|
|
60
|
+
T -- "redact β dedup β distill" --> V
|
|
61
|
+
H -- "edit" --> V
|
|
62
|
+
V -- "parse / reconcile-on-spawn" --> I
|
|
63
|
+
I -- "READ_ONLY hybrid recall" --> M
|
|
64
|
+
M -. "remember (redacted write)" .-> V
|
|
65
|
+
|
|
66
|
+
classDef truth fill:#eaf1ff,stroke:#317cff,color:#191919;
|
|
67
|
+
classDef cache fill:#f5f5f3,stroke:#999999,color:#191919;
|
|
68
|
+
class V truth
|
|
69
|
+
class I cache
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
- **Capture** reads your agent harness's session transcripts (append-only, already on disk) *out-of-band* β robust by design, with no fragile live hooks β then redacts β dedups β importance-gates β distills into the vault, non-lossily. Plus an agent-driven `remember` tool for curated, high-value memories.
|
|
73
|
+
- **Retrieval** fuses BM25 + semantic vectors with Reciprocal Rank Fusion, applies an optional graph-boost, and **degrades gracefully** down to keyword-only when no embedding model is available β so recall is *never* silently dead. An optional cross-encoder reranker adds precision.
|
|
74
|
+
- **Hybrid intelligence:** offline local embeddings (FastEmbed / `nomic-embed-text-v1.5` by default) out of the box β strong on its own *and* in the hybrid fusion (with `nomic`, vector-only edges out BM25 even on short turns; see the benchmark). Set `CAIRN_EMBED_MODEL` to pick another FastEmbed model, or run `CAIRN_EMBEDDER=ollama` / a cloud tier to go further.
|
|
75
|
+
- **Temporal memory:** notes may carry `valid_from`/`valid_until`/`superseded_by` frontmatter. Recall is validity-aware β it soft-demotes superseded and expired facts (the *current* fact wins) without ever hiding them (non-lossy), and annotates each result's status (`current`/`superseded`/`expired`/`not_yet_valid`) plus an `as_of` anchor so the agent can reason over time. Inert for notes with no validity fields.
|
|
76
|
+
|
|
77
|
+
### CLI
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
uvx agentcairn # on-demand MCP server for your agent harness
|
|
81
|
+
cairn ingest --vault ~/vault # distill recent agent sessions into the vault
|
|
82
|
+
cairn sweep --vault ~/vault # ingest + reindex in one pass (cron-friendly)
|
|
83
|
+
cairn recall "how did we fix the auth bug?" # hybrid recall from the CLI
|
|
84
|
+
cairn reindex ~/vault # rebuild the index from Markdown (always safe)
|
|
85
|
+
cairn doctor # health-check the index
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Honestly measured
|
|
89
|
+
|
|
90
|
+
We benchmark agentcairn the way we'd want a memory system measured β **reproducibly, with ablations, and without a single cherry-picked headline number.** The harness ([`benchmarks/`](benchmarks/)) runs **LongMemEval-S** and **LoCoMo** through a version-pinned downloader (datasets are never vendored), scores retrieval deterministically (recall/nDCG@k, MRR β no API key needed, runs in CI on a synthetic fixture), and offers an opt-in LLM-judged QA layer.
|
|
91
|
+
|
|
92
|
+
Retrieval ablation on the full LoCoMo set (turn-level, macro-avg, FastEmbed `nomic-embed-text-v1.5` β the default):
|
|
93
|
+
|
|
94
|
+
| arm | recall@5 | recall@10 | MRR |
|
|
95
|
+
|---|---|---|---|
|
|
96
|
+
| BM25 only | 0.527 | 0.604 | 0.459 |
|
|
97
|
+
| vector only | 0.536 | 0.637 | 0.433 |
|
|
98
|
+
| hybrid (RRF) | 0.562 | 0.648 | 0.477 |
|
|
99
|
+
| hybrid + graph-boost | 0.562 | 0.648 | 0.477 |
|
|
100
|
+
| **hybrid + reranker** | **0.662** | **0.735** | **0.608** |
|
|
101
|
+
|
|
102
|
+
What we read from this β and say out loud:
|
|
103
|
+
- **Hybrid beats either arm alone** β RRF fusion is worth it.
|
|
104
|
+
- **The cross-encoder reranker is the biggest lever** (+0.10 recall@5 over hybrid); the "ms-marco domain-shift might hurt" worry didn't materialize on conversational data.
|
|
105
|
+
- **The embedder default now pulls its weight** β with `nomic`, vector-only *edges out* BM25 (0.536 vs 0.527); switching from the old `bge-small` default (which trailed at 0.483) closed the gap. A 5-model FastEmbed sweep settled the pick β `nomic` (768-d) wins on quality-per-dim; bigger 1024-d models don't beat it. Full table: [`benchmarks/README.md`](benchmarks/README.md).
|
|
106
|
+
- **graph-boost is inert on these corpora** β LoCoMo/LongMemEval have no native `[[wikilink]]` graph, so the boost has nothing to fire on. It's for *real interlinked vaults*, not chat logs, and we don't pretend otherwise.
|
|
107
|
+
|
|
108
|
+
**LongMemEval-S** (50-instance sample) is an easier retrieval task with well-separated evidence sessions. At **session level** (the granularity prior work reports) retrieval is essentially perfect β **recall@5 = 1.00** for *every* arm (hybrid+reranker nDCG@10 0.993 / MRR 0.990); at the finer **turn level**, hybrid+reranker reaches **0.96 recall@5**. Two caveats we say out loud: session-recall@5 *saturates* at 1.0 here (even BM25 hits it), so it isn't a discriminating metric on this corpus; and it's a 10% sample β comparable for relative signal, not a leaderboard claim.
|
|
109
|
+
|
|
110
|
+
**Context efficiency.** On LongMemEval-S's ~136k-token sessions, agentcairn answers from the ~2,500 tokens it *recalls* (top-10) rather than the full history β a **~55Γ reduction** in what the model has to read (estimate, ~4 chars/token; 20-query sample). It measures context *size*, independent of retrieval quality.
|
|
111
|
+
|
|
112
|
+
QA-accuracy numbers (LLM-judged) are available too, but use an Anthropic judge rather than the papers' GPT-4o, so they are **not comparable to published leaderboards** β valid for relative ablation signal only. See [`benchmarks/README.md`](benchmarks/README.md) for how to run it and how to read the numbers.
|
|
113
|
+
|
|
114
|
+
## Roadmap
|
|
115
|
+
|
|
116
|
+
- **v1 β done.** The core loop: transcript ingestion β redaction β Markdown β rebuildable DuckDB index β hybrid recall; MCP server + CLI; secret redaction; local embeddings; reproducible benchmark harness.
|
|
117
|
+
- **v1.1 β next, prioritized by the benchmark above:**
|
|
118
|
+
- β
**Reranker on by default** β the largest measured retrieval lever; `CAIRN_RERANK=0` to disable. *(shipped)*
|
|
119
|
+
- **Ollama embedding tier** β β
local models via `CAIRN_EMBEDDER=ollama` (`CAIRN_EMBED_MODEL`/`OLLAMA_HOST`); cloud (OpenAI/Voyage) still pending.
|
|
120
|
+
- β
**Bi-temporal validity** β frontmatter `valid_from`/`valid_until`/`superseded_by`; recall soft-demotes superseded/expired facts (non-lossy β never hidden) and annotates each result's currency + an `as_of` anchor, so the *current* fact wins and the agent can reason over time. *(shipped)*
|
|
121
|
+
- In-memory HNSW for large-vault retrieval latency.
|
|
122
|
+
- **v2** β Obsidian plugin surface, MotherDuck cloud sync, optional LLM entity extraction.
|
|
123
|
+
|
|
124
|
+
## Prior art & thanks
|
|
125
|
+
|
|
126
|
+
Learning from [basic-memory](https://github.com/basicmachines-co/basic-memory) (Markdown-as-memory + rebuildable index), Simon SpΓ€ti's [Obsidian RAG on DuckDB](https://www.ssp.sh/blog/obsidian-rag-duckdb-sql/), and [DuckDB](https://duckdb.org)'s VSS + FTS extensions. Benchmarks use [LongMemEval](https://github.com/xiaowu0162/LongMemEval) (MIT) and [LoCoMo](https://github.com/snap-research/locomo) (CC BY-NC 4.0).
|
|
127
|
+
|
|
128
|
+
## Development
|
|
129
|
+
|
|
130
|
+
agentcairn uses [uv](https://docs.astral.sh/uv/) exclusively for dependency management and tooling.
|
|
131
|
+
|
|
132
|
+
**Do not use pip, poetry, or global virtual environments.**
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
# First-time setup
|
|
136
|
+
uv sync # create .venv and install all deps (including dev)
|
|
137
|
+
uv run pre-commit install # install git hooks (ruff + pytest run on every commit)
|
|
138
|
+
|
|
139
|
+
# Daily use
|
|
140
|
+
uv run pytest # run the test suite
|
|
141
|
+
uv run cairn --help # run the CLI
|
|
142
|
+
uvx agentcairn # run the installed tool ephemerally (as the MCP server does)
|
|
143
|
+
|
|
144
|
+
# Formatting and linting
|
|
145
|
+
uv run ruff format . # format all Python files
|
|
146
|
+
uv run ruff check --fix . # lint with auto-fix
|
|
147
|
+
uv run pre-commit run --all-files
|
|
148
|
+
|
|
149
|
+
# Benchmarks (offline retrieval metrics need no API key)
|
|
150
|
+
uv run pytest benchmarks/tests/ # offline synthetic-fixture suite
|
|
151
|
+
PYTHONPATH=benchmarks uv run --group bench python -m cairn_bench.run --dataset locomo
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
The MCP server is launched via `uvx agentcairn` β no global install required.
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
[Apache License 2.0](LICENSE) β permissive, with an explicit patent grant. Copyright Β© 2026 Charles C. Figueiredo.
|