PyPI - phileas-memory - Versions diffs - 0.1.0__tar.gz - Mend

phileas-memory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

phileas_memory-0.1.0/.github/dependabot.yml +11 -0
phileas_memory-0.1.0/.github/workflows/ci.yml +78 -0
phileas_memory-0.1.0/.github/workflows/release.yml +51 -0
phileas_memory-0.1.0/.gitignore +19 -0
phileas_memory-0.1.0/.pre-commit-config.yaml +14 -0
phileas_memory-0.1.0/.python-version +1 -0
phileas_memory-0.1.0/PKG-INFO +81 -0
phileas_memory-0.1.0/README.md +50 -0
phileas_memory-0.1.0/RELEASING.md +46 -0
phileas_memory-0.1.0/evals/locomo/README.md +192 -0
phileas_memory-0.1.0/evals/locomo/faithful_conv0.json +78 -0
phileas_memory-0.1.0/evals/locomo/locomo_smoke.py +275 -0
phileas_memory-0.1.0/evals/locomo/rerank_probe.py +146 -0
phileas_memory-0.1.0/evals/locomo/score_run.py +77 -0
phileas_memory-0.1.0/evals/locomo/sweep_fusion.py +79 -0
phileas_memory-0.1.0/evals/locomo/sweep_rerank.py +88 -0
phileas_memory-0.1.0/evals/locomo/sweep_standout.py +72 -0
phileas_memory-0.1.0/pyproject.toml +69 -0
phileas_memory-0.1.0/src/phileas/__init__.py +3 -0
phileas_memory-0.1.0/src/phileas/api.py +209 -0
phileas_memory-0.1.0/src/phileas/assets/skills/phileas/SKILL.md +165 -0
phileas_memory-0.1.0/src/phileas/cli/__init__.py +87 -0
phileas_memory-0.1.0/src/phileas/cli/commands.py +938 -0
phileas_memory-0.1.0/src/phileas/cli/formatter.py +93 -0
phileas_memory-0.1.0/src/phileas/cli/wizard.py +515 -0
phileas_memory-0.1.0/src/phileas/config.py +224 -0
phileas_memory-0.1.0/src/phileas/daemon.py +845 -0
phileas_memory-0.1.0/src/phileas/db.py +939 -0
phileas_memory-0.1.0/src/phileas/engine.py +2052 -0
phileas_memory-0.1.0/src/phileas/fusion.py +155 -0
phileas_memory-0.1.0/src/phileas/graph.py +2326 -0
phileas_memory-0.1.0/src/phileas/graph_proxy.py +278 -0
phileas_memory-0.1.0/src/phileas/health.py +248 -0
phileas_memory-0.1.0/src/phileas/ingest.py +69 -0
phileas_memory-0.1.0/src/phileas/logging.py +123 -0
phileas_memory-0.1.0/src/phileas/mcp_auth.py +346 -0
phileas_memory-0.1.0/src/phileas/models.py +72 -0
phileas_memory-0.1.0/src/phileas/recall_format.py +167 -0
phileas_memory-0.1.0/src/phileas/reranker.py +40 -0
phileas_memory-0.1.0/src/phileas/scoring.py +181 -0
phileas_memory-0.1.0/src/phileas/server.py +991 -0
phileas_memory-0.1.0/src/phileas/standout.py +231 -0
phileas_memory-0.1.0/src/phileas/stats/__init__.py +1 -0
phileas_memory-0.1.0/src/phileas/stats/cli.py +400 -0
phileas_memory-0.1.0/src/phileas/stats/graph_probe.py +79 -0
phileas_memory-0.1.0/src/phileas/stats/queries.py +536 -0
phileas_memory-0.1.0/src/phileas/stats/render.py +43 -0
phileas_memory-0.1.0/src/phileas/stats/time.py +77 -0
phileas_memory-0.1.0/src/phileas/stats/usage.py +141 -0
phileas_memory-0.1.0/src/phileas/stats/writer.py +250 -0
phileas_memory-0.1.0/src/phileas/stopwords.py +155 -0
phileas_memory-0.1.0/src/phileas/sync.py +222 -0
phileas_memory-0.1.0/src/phileas/sync_stream.py +102 -0
phileas_memory-0.1.0/src/phileas/systemd.py +169 -0
phileas_memory-0.1.0/src/phileas/tool_runner.py +367 -0
phileas_memory-0.1.0/src/phileas/vector.py +175 -0
phileas_memory-0.1.0/tests/conftest.py +30 -0
phileas_memory-0.1.0/tests/test_config.py +231 -0
phileas_memory-0.1.0/tests/test_contradiction.py +276 -0
phileas_memory-0.1.0/tests/test_daemon_cron.py +103 -0
phileas_memory-0.1.0/tests/test_fusion.py +128 -0
phileas_memory-0.1.0/tests/test_health.py +132 -0
phileas_memory-0.1.0/tests/test_ingest.py +69 -0
phileas_memory-0.1.0/tests/test_models.py +23 -0
phileas_memory-0.1.0/tests/test_monitoring.py +103 -0
phileas_memory-0.1.0/tests/test_provenance.py +107 -0
phileas_memory-0.1.0/tests/test_recall_context.py +306 -0
phileas_memory-0.1.0/tests/test_recall_keyword_blend.py +152 -0
phileas_memory-0.1.0/tests/test_recall_split.py +117 -0
phileas_memory-0.1.0/tests/test_recall_standout.py +112 -0
phileas_memory-0.1.0/tests/test_recall_thread.py +103 -0
phileas_memory-0.1.0/tests/test_scoring.py +141 -0
phileas_memory-0.1.0/tests/test_standout.py +174 -0
phileas_memory-0.1.0/tests/test_stats_queries.py +198 -0
phileas_memory-0.1.0/tests/test_stats_render.py +25 -0
phileas_memory-0.1.0/tests/test_stats_time.py +53 -0
phileas_memory-0.1.0/tests/test_strength.py +149 -0
phileas_memory-0.1.0/tests/test_sync_trigger.py +162 -0
phileas_memory-0.1.0/tests/test_tool_metrics.py +64 -0
phileas_memory-0.1.0/tests/test_wizard.py +128 -0
phileas_memory-0.1.0/uv.lock +3886 -0
phileas_memory-0.1.0/web/.gitignore +41 -0
phileas_memory-0.1.0/web/AGENTS.md +5 -0
phileas_memory-0.1.0/web/CLAUDE.md +1 -0
phileas_memory-0.1.0/web/README.md +50 -0
phileas_memory-0.1.0/web/components.json +25 -0
phileas_memory-0.1.0/web/eslint.config.mjs +18 -0
phileas_memory-0.1.0/web/next.config.ts +7 -0
phileas_memory-0.1.0/web/package.json +44 -0
phileas_memory-0.1.0/web/pnpm-lock.yaml +6525 -0
phileas_memory-0.1.0/web/pnpm-workspace.yaml +3 -0
phileas_memory-0.1.0/web/postcss.config.mjs +7 -0
phileas_memory-0.1.0/web/public/file.svg +1 -0
phileas_memory-0.1.0/web/public/globe.svg +1 -0
phileas_memory-0.1.0/web/public/next.svg +1 -0
phileas_memory-0.1.0/web/public/vercel.svg +1 -0
phileas_memory-0.1.0/web/public/window.svg +1 -0
phileas_memory-0.1.0/web/src/app/api/daemon/status/route.ts +20 -0
phileas_memory-0.1.0/web/src/app/api/days/route.ts +18 -0
phileas_memory-0.1.0/web/src/app/api/entities/[type]/[name]/route.ts +67 -0
phileas_memory-0.1.0/web/src/app/api/entities/route.ts +54 -0
phileas_memory-0.1.0/web/src/app/api/export/route.ts +101 -0
phileas_memory-0.1.0/web/src/app/api/memories/[id]/route.ts +36 -0
phileas_memory-0.1.0/web/src/app/api/memories/route.ts +23 -0
phileas_memory-0.1.0/web/src/app/api/monitoring/aggregate/route.ts +23 -0
phileas_memory-0.1.0/web/src/app/api/monitoring/compare/route.ts +37 -0
phileas_memory-0.1.0/web/src/app/api/monitoring/ingestion/event/[id]/route.ts +28 -0
phileas_memory-0.1.0/web/src/app/api/monitoring/ingestion/route.ts +35 -0
phileas_memory-0.1.0/web/src/app/api/monitoring/traces/[id]/route.ts +67 -0
phileas_memory-0.1.0/web/src/app/api/monitoring/traces/route.ts +32 -0
phileas_memory-0.1.0/web/src/app/api/recall/route.ts +99 -0
phileas_memory-0.1.0/web/src/app/api/search/route.ts +40 -0
phileas_memory-0.1.0/web/src/app/api/tool/route.ts +105 -0
phileas_memory-0.1.0/web/src/app/entities/[type]/[name]/page.tsx +93 -0
phileas_memory-0.1.0/web/src/app/entities/page.tsx +66 -0
phileas_memory-0.1.0/web/src/app/favicon.ico +0 -0
phileas_memory-0.1.0/web/src/app/globals.css +130 -0
phileas_memory-0.1.0/web/src/app/layout.tsx +39 -0
phileas_memory-0.1.0/web/src/app/monitoring/page.tsx +166 -0
phileas_memory-0.1.0/web/src/app/page.tsx +65 -0
phileas_memory-0.1.0/web/src/app/recall/page.tsx +34 -0
phileas_memory-0.1.0/web/src/app/search/page.tsx +45 -0
phileas_memory-0.1.0/web/src/app/tools/page.tsx +13 -0
phileas_memory-0.1.0/web/src/components/daemon-status.tsx +48 -0
phileas_memory-0.1.0/web/src/components/day-nav.tsx +106 -0
phileas_memory-0.1.0/web/src/components/empty-state.tsx +31 -0
phileas_memory-0.1.0/web/src/components/entity-detail-view.tsx +145 -0
phileas_memory-0.1.0/web/src/components/entity-list-view.tsx +262 -0
phileas_memory-0.1.0/web/src/components/export-menu.tsx +77 -0
phileas_memory-0.1.0/web/src/components/forget-memory-dialog.tsx +108 -0
phileas_memory-0.1.0/web/src/components/ingestion-view.tsx +329 -0
phileas_memory-0.1.0/web/src/components/memory-card.tsx +120 -0
phileas_memory-0.1.0/web/src/components/memory-list.tsx +264 -0
phileas_memory-0.1.0/web/src/components/monitoring-shared.tsx +128 -0
phileas_memory-0.1.0/web/src/components/monitoring-view.tsx +681 -0
phileas_memory-0.1.0/web/src/components/recall-view.tsx +276 -0
phileas_memory-0.1.0/web/src/components/search-view.tsx +197 -0
phileas_memory-0.1.0/web/src/components/site-header.tsx +72 -0
phileas_memory-0.1.0/web/src/components/stats-strip.tsx +69 -0
phileas_memory-0.1.0/web/src/components/theme-provider.tsx +10 -0
phileas_memory-0.1.0/web/src/components/theme-toggle.tsx +42 -0
phileas_memory-0.1.0/web/src/components/tool-playground-view.tsx +349 -0
phileas_memory-0.1.0/web/src/components/trace-detail-dialog.tsx +497 -0
phileas_memory-0.1.0/web/src/components/ui/badge.tsx +52 -0
phileas_memory-0.1.0/web/src/components/ui/button.tsx +58 -0
phileas_memory-0.1.0/web/src/components/ui/calendar.tsx +221 -0
phileas_memory-0.1.0/web/src/components/ui/card.tsx +103 -0
phileas_memory-0.1.0/web/src/components/ui/dialog.tsx +160 -0
phileas_memory-0.1.0/web/src/components/ui/popover.tsx +90 -0
phileas_memory-0.1.0/web/src/components/ui/select.tsx +201 -0
phileas_memory-0.1.0/web/src/components/ui/separator.tsx +25 -0
phileas_memory-0.1.0/web/src/components/ui/tooltip.tsx +66 -0
phileas_memory-0.1.0/web/src/lib/daemon.ts +66 -0
phileas_memory-0.1.0/web/src/lib/day.ts +29 -0
phileas_memory-0.1.0/web/src/lib/format.ts +37 -0
phileas_memory-0.1.0/web/src/lib/graph.ts +114 -0
phileas_memory-0.1.0/web/src/lib/highlight.tsx +70 -0
phileas_memory-0.1.0/web/src/lib/metrics-db.ts +372 -0
phileas_memory-0.1.0/web/src/lib/phileas-db.ts +139 -0
phileas_memory-0.1.0/web/src/lib/queries.ts +146 -0
phileas_memory-0.1.0/web/src/lib/types.ts +59 -0
phileas_memory-0.1.0/web/src/lib/utils.ts +6 -0
phileas_memory-0.1.0/web/tsconfig.json +34 -0

phileas_memory-0.1.0/.github/dependabot.yml ADDED Viewed

@@ -0,0 +1,11 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+  - package-ecosystem: pip
+    directory: /
+    schedule:
+      interval: weekly

phileas_memory-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,78 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.14"
+          allow-prereleases: true
+      - name: Install ruff
+        run: pip install ruff
+      - name: Ruff check
+        run: ruff check src/ tests/
+      - name: Ruff format check
+        run: ruff format --check src/ tests/
+  test:
+    runs-on: ubuntu-latest
+    needs: lint
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12", "3.13", "3.14"]
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+          allow-prereleases: true
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[embeddings]" --group dev
+      - name: Run tests
+        run: pytest tests/ -q --tb=short
+        env:
+          PHILEAS_TEST_MODE: "1"
+  build:
+    runs-on: ubuntu-latest
+    needs: test
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.14"
+          allow-prereleases: true
+      - name: Install build tools
+        run: pip install build
+      - name: Build package
+        run: python -m build
+      - uses: actions/upload-artifact@v7
+        with:
+          name: dist
+          path: dist/

phileas_memory-0.1.0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,51 @@
+name: Release
+# Build and publish phileas-memory to PyPI when a GitHub Release is published.
+# Publishing uses PyPI Trusted Publishing (OIDC) — no API token stored in the
+# repo. See RELEASING.md for the one-time PyPI setup and how to cut a release.
+on:
+  release:
+    types: [published]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.14"
+          allow-prereleases: true
+      - name: Install build tools
+        run: pip install build twine
+      - name: Build sdist and wheel
+        run: python -m build
+      - name: Check artifacts
+        run: twine check dist/*
+      - uses: actions/upload-artifact@v7
+        with:
+          name: dist
+          path: dist/
+  publish:
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/phileas-memory
+    permissions:
+      id-token: write  # required for Trusted Publishing
+    steps:
+      - uses: actions/download-artifact@v8
+        with:
+          name: dist
+          path: dist/
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

phileas_memory-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,19 @@
+__pycache__/
+*.pyc
+.venv/
+*.egg-info/
+# local data + transient artifacts
+simulation_results.json
+research/
+# tests/: the unit-test code IS tracked. What stays LOCAL-only is anything that
+# carries fixtures sampled from real Claude Code sessions — private personal
+# conversation context (issue #39). Never track these:
+#   - real-session run outputs (e2e_runs/, path3_runs/)
+#   - the eval/probe sampling harness, which reads real session snapshots
+# Any test file added here must contain ONLY synthetic data: no real names,
+# places, events, or handles.
+tests/e2e_runs/
+tests/path3_runs/
+tests/eval/

phileas_memory-0.1.0/.pre-commit-config.yaml ADDED Viewed

@@ -0,0 +1,14 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.15.7
+    hooks:
+      - id: ruff
+      - id: ruff-format
+  - repo: local
+    hooks:
+      - id: pytest
+        name: pytest
+        entry: uv run pytest tests/ -x -q
+        language: system
+        pass_filenames: false
+        stages: [pre-push]

phileas_memory-0.1.0/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.14

phileas_memory-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,81 @@
+Metadata-Version: 2.4
+Name: phileas-memory
+Version: 0.1.0
+Summary: Local-first long-term memory for AI companions
+Project-URL: Homepage, https://github.com/alexajuno/phileas
+Project-URL: Repository, https://github.com/alexajuno/phileas
+Project-URL: Issues, https://github.com/alexajuno/phileas/issues
+License-Expression: MIT
+Keywords: ai,embeddings,llm,local-first,mcp,memory,rag
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.11
+Requires-Dist: chromadb>=1.0.0
+Requires-Dist: click>=8.3.1
+Requires-Dist: fastapi>=0.136.3
+Requires-Dist: kuzu>=0.8.0
+Requires-Dist: litellm>=1.83.0
+Requires-Dist: mcp[cli]
+Requires-Dist: rich>=14.3.3
+Requires-Dist: sentence-transformers>=5.3.0
+Requires-Dist: uvicorn>=0.42.0
+Provides-Extra: embeddings
+Requires-Dist: sentence-transformers; extra == 'embeddings'
+Description-Content-Type: text/markdown
+# Phileas — persistent memory for AI
+AI conversations reset every session. Phileas is a local memory layer that any LLM can read and write through [MCP](https://modelcontextprotocol.io/), so context survives across sessions and tools.
+## Get started
+```bash
+pip install phileas-memory
+phileas init
+```
+The setup wizard connects Phileas to your MCP client (Claude, GPT, Ollama, or any other) and chooses where to store memories.
+### First run
+On first run, `phileas init` downloads two small models that run locally — an
+embedding model (`all-MiniLM-L6-v2`) and a reranker
+(`ms-marco-MiniLM-L-6-v2`), about 150 MB together — from
+[Hugging Face](https://huggingface.co/). Expect a one-time wait on a slow
+connection; they're cached afterward, so later runs work offline.
+No external LLM API key is needed to try Phileas: your MCP client's model does
+the reasoning, and the embedding and reranking run on your machine.
+## Connect to your AI
+If you use Claude Code, `phileas init` handles this automatically.
+For other MCP clients, start the server and point your client at it:
+```bash
+phileas serve
+```
+## Principles
+- **Local-first** — memories stay on your machine
+- **Model-agnostic** — works with any LLM via MCP
+- **Natural forgetting** — irrelevant detail decays; recall favors what stays useful
+- **Open** — run it yourself, read the code
+For the command reference, run `phileas --help` or `phileas COMMAND --help`.
+## Requirements
+Python 3.11+
+## License
+MIT

phileas_memory-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,50 @@
+# Phileas — persistent memory for AI
+AI conversations reset every session. Phileas is a local memory layer that any LLM can read and write through [MCP](https://modelcontextprotocol.io/), so context survives across sessions and tools.
+## Get started
+```bash
+pip install phileas-memory
+phileas init
+```
+The setup wizard connects Phileas to your MCP client (Claude, GPT, Ollama, or any other) and chooses where to store memories.
+### First run
+On first run, `phileas init` downloads two small models that run locally — an
+embedding model (`all-MiniLM-L6-v2`) and a reranker
+(`ms-marco-MiniLM-L-6-v2`), about 150 MB together — from
+[Hugging Face](https://huggingface.co/). Expect a one-time wait on a slow
+connection; they're cached afterward, so later runs work offline.
+No external LLM API key is needed to try Phileas: your MCP client's model does
+the reasoning, and the embedding and reranking run on your machine.
+## Connect to your AI
+If you use Claude Code, `phileas init` handles this automatically.
+For other MCP clients, start the server and point your client at it:
+```bash
+phileas serve
+```
+## Principles
+- **Local-first** — memories stay on your machine
+- **Model-agnostic** — works with any LLM via MCP
+- **Natural forgetting** — irrelevant detail decays; recall favors what stays useful
+- **Open** — run it yourself, read the code
+For the command reference, run `phileas --help` or `phileas COMMAND --help`.
+## Requirements
+Python 3.11+
+## License
+MIT

phileas_memory-0.1.0/RELEASING.md ADDED Viewed

@@ -0,0 +1,46 @@
+# Releasing `phileas-memory`
+Phileas publishes to [PyPI](https://pypi.org/project/phileas-memory/) so that
+`pip install phileas-memory` works for anyone. Publishing runs automatically from
+GitHub Actions (`.github/workflows/release.yml`) when a GitHub Release is
+published, using [PyPI Trusted Publishing](https://docs.pypi.org/trusted-publishers/)
+(OIDC) — there is no API token stored in the repository.
+## One-time setup (maintainer)
+Do this once, before the first release.
+1. Reserve the name on PyPI by creating the project's first release (the steps
+   below) — or, if the name is unclaimed, configure a **pending publisher** so
+   the project is created by the first Trusted-Publishing upload.
+2. On PyPI, go to the project (or **Your projects → Publishing** for a pending
+   publisher) and add a **GitHub** trusted publisher with:
+   - **Owner:** `alexajuno`
+   - **Repository:** `phileas`
+   - **Workflow name:** `release.yml`
+   - **Environment:** `pypi`
+3. In the GitHub repo settings, create an **Environment** named `pypi`
+   (Settings → Environments → New environment). No secrets are needed; the
+   environment just scopes the OIDC trust and lets you add reviewers later.
+## Cutting a release
+1. Make sure `main` is green and pick the new version (semver).
+2. Bump `version` in `pyproject.toml` and commit it on `main`.
+3. Tag and push:
+   ```bash
+   git tag v0.1.0
+   git push origin v0.1.0
+   ```
+4. Create a GitHub Release for that tag (`gh release create v0.1.0 --generate-notes`).
+   Publishing the release triggers `release.yml`, which builds the sdist + wheel,
+   runs `twine check`, and publishes to PyPI.
+5. Confirm it landed: `pip install phileas-memory==0.1.0` in a clean environment.
+## Building locally (optional sanity check)
+```bash
+pip install build twine
+python -m build          # writes dist/*.tar.gz and dist/*.whl
+twine check dist/*
+```

phileas_memory-0.1.0/evals/locomo/README.md ADDED Viewed

@@ -0,0 +1,192 @@
+# LoCoMo smoke harness (Tier-2, manual / me-as-model)
+A directional smoke for Phileas recall against the [LoCoMo](https://snap-research.github.io/locomo/)
+long-term-conversation benchmark. **Not a pytest test** — it loads one LoCoMo
+conversation into an isolated Phileas store and scores whether each question's gold
+`evidence` turn surfaces in recall's top-k.
+Why this exists, the full landscape, and the run findings:
+[`docs/research/eval-benchmarks.md`](../../docs/research/eval-benchmarks.md).
+> This smoke is **directional, not a quotable number**: 1 conversation, 9
+> hand-picked cases, and *mechanical* extraction (one memory per turn, not faithful
+> summarization). It exists to catch regressions/improvements in recall behavior
+> while iterating on AA-136 / AA-137, against the recorded baseline below.
+## Prereqs
+- The repo venv (`.venv`) with the engine stack (chromadb, kuzu,
+  sentence-transformers). First run downloads the embedding + cross-encoder models.
+- Network egress (model downloads + the one-time data fetch).
+## Run it
+```bash
+# 0. Fetch the LoCoMo corpus (once). Default path is /tmp/locomo10.json;
+#    override with LOCOMO_JSON. 10 conversations, ~2.8 MB.
+curl -s -o /tmp/locomo10.json \
+  https://raw.githubusercontent.com/snap-research/locomo/main/data/locomo10.json
+# 1. See the sampled gold questions for a conversation (no store needed)
+.venv/bin/python evals/locomo/locomo_smoke.py gold 0 --n 16
+# 2. Extract one conversation into an ISOLATED store (~2 min: model load + ~419 memorizes)
+mkdir -p /tmp/locomo-eval/conv0
+PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/locomo_smoke.py extract 0
+# 3. Score the 9 baseline cases + about() probe (graph ON by default)
+PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/score_run.py
+# 3b. Score the no-graph floor (keyword + semantic only)
+PHILEAS_EVAL_GRAPH=off PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/score_run.py
+# Ad-hoc probing while playing the agent-in-loop:
+PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/locomo_smoke.py ask "Sweden" --top-k 10
+PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/locomo_smoke.py about Caroline
+```
+Notes:
+- The store under `PHILEAS_HOME` is **throwaway** — re-extract after any engine
+  change (extraction is deterministic, ~2 min). `dia_map.json` (dia_id → memory id)
+  lives in the home and is what makes scoring objective.
+- `_engine()` uses `GraphStore` **in-process** (no daemon needed — the daemon only
+  arbitrates the Kuzu lock across multiple processes). `PHILEAS_EVAL_GRAPH=off`
+  falls back to the degraded no-graph path.
+## Baseline — conv0 (Caroline/Melanie), 2026-06-12, mechanical extraction
+Pre-change reference. After improving AA-136/137, re-run and diff against this.
+| Case | query | gold | graph OFF | graph ON |
+|------|-------|------|-----------|----------|
+| Q1 research FOCUSED  | `adoption agencies` | D2:8 | D2:8 @3 | D2:8 @3 |
+| Q1 research SENTENCE | `what did Caroline research about adoption` | D2:8 | D2:8 @1 | D2:8 @1 |
+| Q2 LGBTQ group       | `LGBTQ support group` | D1:3 | D1:3 @5 | D1:3 @4 |
+| Q4 charity FOCUSED   | `charity race awareness` | D2:2 | D2:2 @1 | D2:2 @1 |
+| Q4 charity SENTENCE  | `what did the charity race raise awareness for` | D2:2 | D2:2 @1 | D2:2 @1 |
+| Q6 identity          | `Caroline transgender identity` | D1:5 | D1:5 @7 | **MISS** |
+| Q7 sunrise           | `Melanie sunrise painting` | D1:12 | MISS¹ | MISS¹ |
+| Q14 self-care        | `Melanie self-care` | D2:5 | MISS² | MISS² |
+| Q16 moved            | `Caroline moved Sweden` | D3:13, D4:3 | **empty**³ | D4:3 @4 |
+| **any-gold-surfaced** | | | **6/9** | **6/9** |
+`about('Caroline')` → 211/419 memories · `about('Melanie')` → 208/419 (firehose —
+extraction tags the speaker on every turn).
+¹ **False miss** — gold `D1:12` is the image-share turn; the answer-bearing turn
+`D1:14` ("painted that lake sunrise last year") surfaces at rank 1. Score the
+*answer*, not evidence rank.
+² **Reranker/vocabulary gap** — `D2:5` phrases self-care as "me-time… running,
+reading, violin"; semantic ranks lexical-vibe matches ("take care of yourself") above it.
+³ **`SIMILARITY_FLOOR=0.5` soft-failure** — all semantic candidates 0.378–0.458,
+every one cut → empty result.
+### Win conditions when revisiting (after AA-136 / AA-137)
+- Q16 stays a hit **and** Q6 returns (≤ top-10) → broadening + distributional cut
+  rescue without the flood regression.
+- Q14 surfaces `D2:5` → wider candidate pool + reranking closed the vocabulary gap.
+- `about()` stops returning ~half the corpus → needs faithful extraction (tag
+  mentioned entities, not the speaker) before this is meaningful; do that re-extract
+  first or the firehose masks the signal.
+- Replace evidence-rank with an **answer-level judge** (LoCoMo's own protocol)
+  before trusting any aggregate — evidence-rank both under- and over-counts (¹, Q6).
+## Status — after the recall rework (2026-06-12)
+The AA-136/137 work plus the follow-on recall rework landed on
+`feat/recall-threshold`: the gather pool is decoupled from `top_k`, the relevance
+cut governs result size (no count cap), the graph hop is relevance-gated (an
+entity match pulls only the memories that stand out for the query, not all of
+them), the keyword floor scales by term **rarity (IDF)** rather than coverage,
+and the default cut is `ratio` (a head-selector) rather than `gap` (a
+tail-trimmer). Re-scored conv0:
+| measure | result |
+|---------|--------|
+| conv0 smoke (top-10), graph ON / OFF | **6/9** |
+| threshold mode (no `top_k`), focused queries | 4–36 memories, self-bound |
+| broad-query breadth | bounded by the cut (`painting` 1, `Caroline` 82, was 389) |
+| Q16 "Caroline moved Sweden" | rescued — `D4:3` @2 (cosine 0.23 / sem-rank 415, via rare-term IDF) |
+The win conditions above are met except where they depend on faithful extraction
+(the `about()` firehose) or an answer-level judge.
+## Faithful extraction — demonstrated (2026-06-13)
+`LOCOMO_FAITHFUL=<path>` swaps the verbatim per-turn copy for a self-contained
+fact per turn: pronouns resolved, the concept named in the text, speakers
+attributed, and every named person tagged (not just the speaker). The facts for
+conv0 sessions 1–4 (all 9 gold cases live there) are hand-written in
+`faithful_conv0.json` — me-as-model, the Tier-2 reader. Sessions 5–19 stay
+mechanical, so the run is faithful needles in a mechanical haystack and the turn
+count is identical (419) — a clean A/B on text quality alone.
+```bash
+mkdir -p /tmp/locomo-eval/conv0faith
+LOCOMO_FAITHFUL=evals/locomo/faithful_conv0.json \
+  PHILEAS_HOME=/tmp/locomo-eval/conv0faith .venv/bin/python evals/locomo/locomo_smoke.py extract 0
+PHILEAS_HOME=/tmp/locomo-eval/conv0faith .venv/bin/python evals/locomo/score_run.py
+```
+| case | mechanical | faithful |
+|------|-----------|----------|
+| Q6 identity (`D1:5`)  | @7   | **@2** |
+| Q7 sunrise (`D1:12`)  | miss | **@2** |
+| Q14 self-care (`D2:5`)| miss | **@3** |
+| Q1 research (`D2:8`)  | @1   | @3 (longer fact, still top-10) |
+| **any-gold-surfaced** | **7/9** | **9/9** |
+The mechanism, read off `ask "Melanie self-care"`: mechanically, the answer turn
+`D2:5` is verbatim *"carving out some me-time… running, reading, violin"* — no
+"self-care" token, so it never surfaces; the query instead matches `D2:3`/`D2:4`,
+which carry the word but not the answer. The faithful fact for `D2:5` reads
+*"Melanie practices self-care by carving out daily me-time — running, reading, or
+playing her violin"*, co-locating the concept with the answer so the cross-encoder
+scores it `@3 (0.574)`. Per-turn copy splits the concept from its answer across
+adjacent turns; a reader writes them into one fact. Closes Q14's vocabulary gap
+and lifts Q6 with no bigger embedder, query expansion, or reranker change.
+### Open problems — pick up here next session
+- [x] **Q14 vocabulary gap** — closed by faithful extraction (above); the concept
+  word lands in the answer-bearing fact, so the cross-encoder matches it.
+- [x] **Faithful extraction** — `faithful_conv0.json` + `LOCOMO_FAITHFUL` exists
+  and tags named entities rather than the speaker. `about()` still returns ~half
+  the corpus, because in a two-person conversation nearly every fact names one of
+  the two speakers — the firehose is inherent to the corpus, not the tagging.
+- [ ] **Evidence-rank still both under- and over-counts.** Q7 (`D1:12` is the
+  image-share turn; the answer `D1:14` "painted that lake sunrise" is the real
+  evidence) and Q6 (`D1:5` is one of several valid transgender turns) are mislabels.
+  An **answer-level judge** (LoCoMo's own protocol) is the honest metric — pending
+  because it needs a paid LLM call per question.
+- [ ] **Faithful extraction at corpus scale.** Sessions 1–4 are hand-written.
+  A quotable number needs faithful facts for all 419 turns × 10 conversations,
+  which is the real ingest path (`ingest_session` → agent → `memorize_batch`),
+  not hand authoring.
+- [ ] **Tier-2 real number.** This 9-case smoke is directional only. A quotable
+  LoCoMo figure needs the answer-level LLM judge + faithful extraction across all
+  10 conversations (the agent-in-loop Mode B in
+  [`docs/research/eval-benchmarks.md`](../../docs/research/eval-benchmarks.md)).
+## Method sweep (distributional cut)
+`sweep_standout.py` re-runs the 9 baseline cases under each `PHILEAS_STANDOUT`
+strategy (`gap` / `zscore` / `ratio` / `knee`, plus `absolute:X` flat-floor
+references) against an already-extracted store, so you can read off which cut
+recovers cases like Q6/Q14 without re-extracting:
+```bash
+PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/sweep_standout.py
+```
+It prints, per method, how many cases surfaced any gold and the mean rank of
+surfaced golds (lower = better). The `absolute:X` rows apply one floor uniformly
+to both cut sites — a baseline to beat, not the exact historical split.
+## Files
+- `locomo_smoke.py` — loader, extractor (mechanical / windowed / faithful), `ask` / `about` / `gold` probes.
+- `faithful_conv0.json` — hand-written self-contained facts for conv0 sessions 1–4, loaded via `LOCOMO_FAITHFUL`.
+- `score_run.py` — the 9 baseline cases + `about()` probe, objective dia-id scoring.
+- `sweep_standout.py` — re-scores those cases under each distributional-cut strategy.

phileas_memory-0.1.0/evals/locomo/faithful_conv0.json ADDED Viewed

@@ -0,0 +1,78 @@
+{
+  "D1:1": "Caroline greeted Melanie warmly when the two friends reconnected.",
+  "D1:2": "Melanie is swamped with her kids and her work.",
+  "D1:3": "Caroline attended an LGBTQ support group and found it powerful and moving.",
+  "D1:4": "Melanie asked Caroline about the inspiring stories she heard at the LGBTQ support group.",
+  "D1:5": "Caroline was inspired by the transgender stories shared at the LGBTQ support group and felt grateful for the acceptance and support there.",
+  "D1:6": "Melanie admired a painting Caroline shared and asked what the support group had done for her.",
+  "D1:7": "The LGBTQ support group made Caroline feel accepted and gave her the courage to embrace herself.",
+  "D1:8": "Melanie praised Caroline's courage in embracing who she is.",
+  "D1:9": "Caroline plans to continue her education and explore career options.",
+  "D1:10": "Melanie asked what kinds of careers Caroline was considering.",
+  "D1:11": "Caroline is keen on a career in counseling or mental health, so she can support people facing struggles similar to her own.",
+  "D1:12": "Melanie told Caroline she'd make a great counselor and shared one of her own paintings — a sunset over a lake.",
+  "D1:13": "Caroline asked whether the lake painting was Melanie's own work.",
+  "D1:14": "Melanie painted the lake sunrise herself last year, and the painting is special to her.",
+  "D1:15": "Caroline praised the blended colors in Melanie's painting and called painting a great outlet for self-expression.",
+  "D1:16": "Melanie paints to express her feelings, get creative, and relax after a long day.",
+  "D1:17": "Caroline agreed that relaxing and self-expression matter, then headed off to do some research.",
+  "D1:18": "Melanie said taking care of ourselves is vital and went swimming with her kids.",
+  "D2:1": "Melanie ran a charity race for mental health and found it rewarding; it made her reflect on taking care of one's mind.",
+  "D2:2": "Caroline praised Melanie's charity race for making a difference and raising awareness for mental health.",
+  "D2:3": "Melanie is realizing that self-care is important: when she looks after herself, she can better look after her family.",
+  "D2:4": "Caroline agreed that self-care matters and praised Melanie for prioritizing it.",
+  "D2:5": "Melanie practices self-care by carving out daily me-time — running, reading, or playing her violin — which refreshes her and helps her stay present for her family.",
+  "D2:6": "Caroline encouraged Melanie that taking time for yourself is important and that she's looking after herself and her family well.",
+  "D2:7": "Melanie said her self-care is still a work in progress; her kids are excited for summer break and the family is planning a camping trip next month.",
+  "D2:8": "Caroline is researching adoption agencies — it has long been her dream to have a family and give a loving home to children who need one.",
+  "D2:9": "Melanie praised Caroline's plan to adopt and give children in need a loving home.",
+  "D2:10": "Caroline's goal is to give children a loving home; she shared one of the adoption agencies she is considering and feels hopeful and optimistic.",
+  "D2:11": "Melanie asked what made Caroline choose that particular adoption agency.",
+  "D2:12": "Caroline chose that adoption agency because it helps LGBTQ+ people adopt, and its inclusivity and support spoke to her.",
+  "D2:13": "Melanie praised the agency's inclusivity and asked what Caroline was most excited about in the adoption process.",
+  "D2:14": "Caroline is thrilled to build a family for children who need one, though she knows it will be hard as a single parent.",
+  "D2:15": "Melanie told Caroline she will be a wonderful mother to the children she adopts.",
+  "D2:16": "Caroline vowed to give the children she adopts a safe and loving home.",
+  "D2:17": "Melanie said Caroline's caring heart means the adopted children will have all the love and stability they need.",
+  "D3:1": "Caroline spoke at a school event about her transgender journey and encouraged students to get involved in the LGBTQ community; she reflected on how far she has come since she began transitioning three years ago.",
+  "D3:2": "Melanie said she is proud of Caroline for spreading LGBTQ awareness and coming so far since her transition.",
+  "D3:3": "Caroline felt powerful giving her talk, sharing her struggles and growth since coming out, and was grateful to give a voice to the trans community.",
+  "D3:4": "Melanie praised Caroline as brave for speaking up for the trans community and inspiring others toward inclusivity and acceptance.",
+  "D3:5": "Caroline believes sharing personal experiences promotes understanding and acceptance, and she wants to pass on the love and support she has received.",
+  "D3:6": "Melanie reflected that sharing our vulnerable stories bonds people and shows others facing the same challenges that they are not alone.",
+  "D3:7": "Caroline is grateful for the chance to share her story and intends to keep using her voice to build a more inclusive, understanding world.",
+  "D3:8": "Melanie said she is proud to be part of the difference Caroline is making.",
+  "D3:9": "Caroline said she and Melanie can spread love and tackle life's challenges together.",
+  "D3:10": "Melanie said her family motivates her to be courageous, and asked what motivates Caroline.",
+  "D3:11": "Caroline said her friends, family, and mentors are her rocks who give her strength, and she shared a photo from a recent meetup.",
+  "D3:12": "Melanie asked how long Caroline has had such a strong support system.",
+  "D3:13": "Caroline has known her close friends for four years, ever since she moved away from her home country; their support mattered especially after a tough breakup.",
+  "D3:14": "Melanie said her husband and kids keep her motivated, and shared a family photo.",
+  "D3:15": "Caroline asked Melanie how long she has been married.",
+  "D3:16": "Melanie has been married for five years and shared a photo from her wedding day.",
+  "D3:17": "Caroline congratulated Melanie on her marriage and wished the couple many happy years.",
+  "D3:18": "Melanie said her family and the moments they share make everything worthwhile.",
+  "D3:19": "Caroline remarked that Melanie's family looked happy and asked about their day.",
+  "D3:20": "Melanie described a fun family day of playing games, eating good food, and spending time together.",
+  "D3:21": "Caroline encouraged Melanie to cherish those family moments.",
+  "D3:22": "Melanie cherishes time with her family — it is when she feels most alive and happy.",
+  "D3:23": "Caroline agreed that time with loved ones brings happiness and that family is everything.",
+  "D4:1": "Caroline shared a photo of a necklace bearing a cross and a heart, saying a lot had been going on in her life.",
+  "D4:2": "Melanie admired Caroline's necklace and asked whether it held a special meaning.",
+  "D4:3": "Caroline's treasured necklace was a gift from her grandmother in her home country, Sweden; it symbolizes love, faith, and strength, and reminds her of her roots and her family's support.",
+  "D4:4": "Melanie reflected on how objects can hold deep meaning and asked Caroline about other treasured items.",
+  "D4:5": "Caroline treasures a hand-painted bowl a friend made for her eighteenth birthday ten years ago; its pattern and colors remind her of art and self-expression.",
+  "D4:6": "Melanie took her family camping in the mountains and had a wonderful time together.",
+  "D4:7": "Caroline asked Melanie about the family camping trip.",
+  "D4:8": "Melanie's mountain camping trip included exploring nature, roasting marshmallows by the campfire, and a hike with an amazing view; her two younger kids love nature.",
+  "D4:9": "Caroline said family moments like the camping trip are special.",
+  "D4:10": "Melanie said family time matters to her and asked what Caroline had been up to lately.",
+  "D4:11": "Caroline has been pursuing counseling and mental health as a career, to help people who have gone through what she has.",
+  "D4:12": "Melanie asked what kind of counseling and mental health work Caroline wants to pursue.",
+  "D4:13": "Caroline wants to work as a counselor with trans people, and she recently attended an enlightening LGBTQ+ counseling workshop on therapeutic methods for supporting trans clients.",
+  "D4:14": "Melanie praised Caroline's dedication and asked what motivated her to pursue counseling.",
+  "D4:15": "Caroline said her own journey and the support she received motivated her to pursue counseling, after she saw how counseling and support groups improved her life.",
+  "D4:16": "Melanie admired how Caroline turned her own experience into a passion for helping others.",
+  "D4:17": "Caroline thanked Melanie for her kind words.",
+  "D4:18": "Melanie congratulated Caroline for pursuing the work she truly cares about."
+}