phileas-memory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. phileas_memory-0.1.0/.github/dependabot.yml +11 -0
  2. phileas_memory-0.1.0/.github/workflows/ci.yml +78 -0
  3. phileas_memory-0.1.0/.github/workflows/release.yml +51 -0
  4. phileas_memory-0.1.0/.gitignore +19 -0
  5. phileas_memory-0.1.0/.pre-commit-config.yaml +14 -0
  6. phileas_memory-0.1.0/.python-version +1 -0
  7. phileas_memory-0.1.0/PKG-INFO +81 -0
  8. phileas_memory-0.1.0/README.md +50 -0
  9. phileas_memory-0.1.0/RELEASING.md +46 -0
  10. phileas_memory-0.1.0/evals/locomo/README.md +192 -0
  11. phileas_memory-0.1.0/evals/locomo/faithful_conv0.json +78 -0
  12. phileas_memory-0.1.0/evals/locomo/locomo_smoke.py +275 -0
  13. phileas_memory-0.1.0/evals/locomo/rerank_probe.py +146 -0
  14. phileas_memory-0.1.0/evals/locomo/score_run.py +77 -0
  15. phileas_memory-0.1.0/evals/locomo/sweep_fusion.py +79 -0
  16. phileas_memory-0.1.0/evals/locomo/sweep_rerank.py +88 -0
  17. phileas_memory-0.1.0/evals/locomo/sweep_standout.py +72 -0
  18. phileas_memory-0.1.0/pyproject.toml +69 -0
  19. phileas_memory-0.1.0/src/phileas/__init__.py +3 -0
  20. phileas_memory-0.1.0/src/phileas/api.py +209 -0
  21. phileas_memory-0.1.0/src/phileas/assets/skills/phileas/SKILL.md +165 -0
  22. phileas_memory-0.1.0/src/phileas/cli/__init__.py +87 -0
  23. phileas_memory-0.1.0/src/phileas/cli/commands.py +938 -0
  24. phileas_memory-0.1.0/src/phileas/cli/formatter.py +93 -0
  25. phileas_memory-0.1.0/src/phileas/cli/wizard.py +515 -0
  26. phileas_memory-0.1.0/src/phileas/config.py +224 -0
  27. phileas_memory-0.1.0/src/phileas/daemon.py +845 -0
  28. phileas_memory-0.1.0/src/phileas/db.py +939 -0
  29. phileas_memory-0.1.0/src/phileas/engine.py +2052 -0
  30. phileas_memory-0.1.0/src/phileas/fusion.py +155 -0
  31. phileas_memory-0.1.0/src/phileas/graph.py +2326 -0
  32. phileas_memory-0.1.0/src/phileas/graph_proxy.py +278 -0
  33. phileas_memory-0.1.0/src/phileas/health.py +248 -0
  34. phileas_memory-0.1.0/src/phileas/ingest.py +69 -0
  35. phileas_memory-0.1.0/src/phileas/logging.py +123 -0
  36. phileas_memory-0.1.0/src/phileas/mcp_auth.py +346 -0
  37. phileas_memory-0.1.0/src/phileas/models.py +72 -0
  38. phileas_memory-0.1.0/src/phileas/recall_format.py +167 -0
  39. phileas_memory-0.1.0/src/phileas/reranker.py +40 -0
  40. phileas_memory-0.1.0/src/phileas/scoring.py +181 -0
  41. phileas_memory-0.1.0/src/phileas/server.py +991 -0
  42. phileas_memory-0.1.0/src/phileas/standout.py +231 -0
  43. phileas_memory-0.1.0/src/phileas/stats/__init__.py +1 -0
  44. phileas_memory-0.1.0/src/phileas/stats/cli.py +400 -0
  45. phileas_memory-0.1.0/src/phileas/stats/graph_probe.py +79 -0
  46. phileas_memory-0.1.0/src/phileas/stats/queries.py +536 -0
  47. phileas_memory-0.1.0/src/phileas/stats/render.py +43 -0
  48. phileas_memory-0.1.0/src/phileas/stats/time.py +77 -0
  49. phileas_memory-0.1.0/src/phileas/stats/usage.py +141 -0
  50. phileas_memory-0.1.0/src/phileas/stats/writer.py +250 -0
  51. phileas_memory-0.1.0/src/phileas/stopwords.py +155 -0
  52. phileas_memory-0.1.0/src/phileas/sync.py +222 -0
  53. phileas_memory-0.1.0/src/phileas/sync_stream.py +102 -0
  54. phileas_memory-0.1.0/src/phileas/systemd.py +169 -0
  55. phileas_memory-0.1.0/src/phileas/tool_runner.py +367 -0
  56. phileas_memory-0.1.0/src/phileas/vector.py +175 -0
  57. phileas_memory-0.1.0/tests/conftest.py +30 -0
  58. phileas_memory-0.1.0/tests/test_config.py +231 -0
  59. phileas_memory-0.1.0/tests/test_contradiction.py +276 -0
  60. phileas_memory-0.1.0/tests/test_daemon_cron.py +103 -0
  61. phileas_memory-0.1.0/tests/test_fusion.py +128 -0
  62. phileas_memory-0.1.0/tests/test_health.py +132 -0
  63. phileas_memory-0.1.0/tests/test_ingest.py +69 -0
  64. phileas_memory-0.1.0/tests/test_models.py +23 -0
  65. phileas_memory-0.1.0/tests/test_monitoring.py +103 -0
  66. phileas_memory-0.1.0/tests/test_provenance.py +107 -0
  67. phileas_memory-0.1.0/tests/test_recall_context.py +306 -0
  68. phileas_memory-0.1.0/tests/test_recall_keyword_blend.py +152 -0
  69. phileas_memory-0.1.0/tests/test_recall_split.py +117 -0
  70. phileas_memory-0.1.0/tests/test_recall_standout.py +112 -0
  71. phileas_memory-0.1.0/tests/test_recall_thread.py +103 -0
  72. phileas_memory-0.1.0/tests/test_scoring.py +141 -0
  73. phileas_memory-0.1.0/tests/test_standout.py +174 -0
  74. phileas_memory-0.1.0/tests/test_stats_queries.py +198 -0
  75. phileas_memory-0.1.0/tests/test_stats_render.py +25 -0
  76. phileas_memory-0.1.0/tests/test_stats_time.py +53 -0
  77. phileas_memory-0.1.0/tests/test_strength.py +149 -0
  78. phileas_memory-0.1.0/tests/test_sync_trigger.py +162 -0
  79. phileas_memory-0.1.0/tests/test_tool_metrics.py +64 -0
  80. phileas_memory-0.1.0/tests/test_wizard.py +128 -0
  81. phileas_memory-0.1.0/uv.lock +3886 -0
  82. phileas_memory-0.1.0/web/.gitignore +41 -0
  83. phileas_memory-0.1.0/web/AGENTS.md +5 -0
  84. phileas_memory-0.1.0/web/CLAUDE.md +1 -0
  85. phileas_memory-0.1.0/web/README.md +50 -0
  86. phileas_memory-0.1.0/web/components.json +25 -0
  87. phileas_memory-0.1.0/web/eslint.config.mjs +18 -0
  88. phileas_memory-0.1.0/web/next.config.ts +7 -0
  89. phileas_memory-0.1.0/web/package.json +44 -0
  90. phileas_memory-0.1.0/web/pnpm-lock.yaml +6525 -0
  91. phileas_memory-0.1.0/web/pnpm-workspace.yaml +3 -0
  92. phileas_memory-0.1.0/web/postcss.config.mjs +7 -0
  93. phileas_memory-0.1.0/web/public/file.svg +1 -0
  94. phileas_memory-0.1.0/web/public/globe.svg +1 -0
  95. phileas_memory-0.1.0/web/public/next.svg +1 -0
  96. phileas_memory-0.1.0/web/public/vercel.svg +1 -0
  97. phileas_memory-0.1.0/web/public/window.svg +1 -0
  98. phileas_memory-0.1.0/web/src/app/api/daemon/status/route.ts +20 -0
  99. phileas_memory-0.1.0/web/src/app/api/days/route.ts +18 -0
  100. phileas_memory-0.1.0/web/src/app/api/entities/[type]/[name]/route.ts +67 -0
  101. phileas_memory-0.1.0/web/src/app/api/entities/route.ts +54 -0
  102. phileas_memory-0.1.0/web/src/app/api/export/route.ts +101 -0
  103. phileas_memory-0.1.0/web/src/app/api/memories/[id]/route.ts +36 -0
  104. phileas_memory-0.1.0/web/src/app/api/memories/route.ts +23 -0
  105. phileas_memory-0.1.0/web/src/app/api/monitoring/aggregate/route.ts +23 -0
  106. phileas_memory-0.1.0/web/src/app/api/monitoring/compare/route.ts +37 -0
  107. phileas_memory-0.1.0/web/src/app/api/monitoring/ingestion/event/[id]/route.ts +28 -0
  108. phileas_memory-0.1.0/web/src/app/api/monitoring/ingestion/route.ts +35 -0
  109. phileas_memory-0.1.0/web/src/app/api/monitoring/traces/[id]/route.ts +67 -0
  110. phileas_memory-0.1.0/web/src/app/api/monitoring/traces/route.ts +32 -0
  111. phileas_memory-0.1.0/web/src/app/api/recall/route.ts +99 -0
  112. phileas_memory-0.1.0/web/src/app/api/search/route.ts +40 -0
  113. phileas_memory-0.1.0/web/src/app/api/tool/route.ts +105 -0
  114. phileas_memory-0.1.0/web/src/app/entities/[type]/[name]/page.tsx +93 -0
  115. phileas_memory-0.1.0/web/src/app/entities/page.tsx +66 -0
  116. phileas_memory-0.1.0/web/src/app/favicon.ico +0 -0
  117. phileas_memory-0.1.0/web/src/app/globals.css +130 -0
  118. phileas_memory-0.1.0/web/src/app/layout.tsx +39 -0
  119. phileas_memory-0.1.0/web/src/app/monitoring/page.tsx +166 -0
  120. phileas_memory-0.1.0/web/src/app/page.tsx +65 -0
  121. phileas_memory-0.1.0/web/src/app/recall/page.tsx +34 -0
  122. phileas_memory-0.1.0/web/src/app/search/page.tsx +45 -0
  123. phileas_memory-0.1.0/web/src/app/tools/page.tsx +13 -0
  124. phileas_memory-0.1.0/web/src/components/daemon-status.tsx +48 -0
  125. phileas_memory-0.1.0/web/src/components/day-nav.tsx +106 -0
  126. phileas_memory-0.1.0/web/src/components/empty-state.tsx +31 -0
  127. phileas_memory-0.1.0/web/src/components/entity-detail-view.tsx +145 -0
  128. phileas_memory-0.1.0/web/src/components/entity-list-view.tsx +262 -0
  129. phileas_memory-0.1.0/web/src/components/export-menu.tsx +77 -0
  130. phileas_memory-0.1.0/web/src/components/forget-memory-dialog.tsx +108 -0
  131. phileas_memory-0.1.0/web/src/components/ingestion-view.tsx +329 -0
  132. phileas_memory-0.1.0/web/src/components/memory-card.tsx +120 -0
  133. phileas_memory-0.1.0/web/src/components/memory-list.tsx +264 -0
  134. phileas_memory-0.1.0/web/src/components/monitoring-shared.tsx +128 -0
  135. phileas_memory-0.1.0/web/src/components/monitoring-view.tsx +681 -0
  136. phileas_memory-0.1.0/web/src/components/recall-view.tsx +276 -0
  137. phileas_memory-0.1.0/web/src/components/search-view.tsx +197 -0
  138. phileas_memory-0.1.0/web/src/components/site-header.tsx +72 -0
  139. phileas_memory-0.1.0/web/src/components/stats-strip.tsx +69 -0
  140. phileas_memory-0.1.0/web/src/components/theme-provider.tsx +10 -0
  141. phileas_memory-0.1.0/web/src/components/theme-toggle.tsx +42 -0
  142. phileas_memory-0.1.0/web/src/components/tool-playground-view.tsx +349 -0
  143. phileas_memory-0.1.0/web/src/components/trace-detail-dialog.tsx +497 -0
  144. phileas_memory-0.1.0/web/src/components/ui/badge.tsx +52 -0
  145. phileas_memory-0.1.0/web/src/components/ui/button.tsx +58 -0
  146. phileas_memory-0.1.0/web/src/components/ui/calendar.tsx +221 -0
  147. phileas_memory-0.1.0/web/src/components/ui/card.tsx +103 -0
  148. phileas_memory-0.1.0/web/src/components/ui/dialog.tsx +160 -0
  149. phileas_memory-0.1.0/web/src/components/ui/popover.tsx +90 -0
  150. phileas_memory-0.1.0/web/src/components/ui/select.tsx +201 -0
  151. phileas_memory-0.1.0/web/src/components/ui/separator.tsx +25 -0
  152. phileas_memory-0.1.0/web/src/components/ui/tooltip.tsx +66 -0
  153. phileas_memory-0.1.0/web/src/lib/daemon.ts +66 -0
  154. phileas_memory-0.1.0/web/src/lib/day.ts +29 -0
  155. phileas_memory-0.1.0/web/src/lib/format.ts +37 -0
  156. phileas_memory-0.1.0/web/src/lib/graph.ts +114 -0
  157. phileas_memory-0.1.0/web/src/lib/highlight.tsx +70 -0
  158. phileas_memory-0.1.0/web/src/lib/metrics-db.ts +372 -0
  159. phileas_memory-0.1.0/web/src/lib/phileas-db.ts +139 -0
  160. phileas_memory-0.1.0/web/src/lib/queries.ts +146 -0
  161. phileas_memory-0.1.0/web/src/lib/types.ts +59 -0
  162. phileas_memory-0.1.0/web/src/lib/utils.ts +6 -0
  163. phileas_memory-0.1.0/web/tsconfig.json +34 -0
@@ -0,0 +1,11 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: github-actions
4
+ directory: /
5
+ schedule:
6
+ interval: weekly
7
+
8
+ - package-ecosystem: pip
9
+ directory: /
10
+ schedule:
11
+ interval: weekly
@@ -0,0 +1,78 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ concurrency:
10
+ group: ${{ github.workflow }}-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ lint:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v6
18
+
19
+ - uses: actions/setup-python@v6
20
+ with:
21
+ python-version: "3.14"
22
+ allow-prereleases: true
23
+
24
+ - name: Install ruff
25
+ run: pip install ruff
26
+
27
+ - name: Ruff check
28
+ run: ruff check src/ tests/
29
+
30
+ - name: Ruff format check
31
+ run: ruff format --check src/ tests/
32
+
33
+ test:
34
+ runs-on: ubuntu-latest
35
+ needs: lint
36
+ strategy:
37
+ fail-fast: false
38
+ matrix:
39
+ python-version: ["3.11", "3.12", "3.13", "3.14"]
40
+ steps:
41
+ - uses: actions/checkout@v6
42
+
43
+ - uses: actions/setup-python@v6
44
+ with:
45
+ python-version: ${{ matrix.python-version }}
46
+ allow-prereleases: true
47
+
48
+ - name: Install dependencies
49
+ run: |
50
+ python -m pip install --upgrade pip
51
+ pip install -e ".[embeddings]" --group dev
52
+
53
+ - name: Run tests
54
+ run: pytest tests/ -q --tb=short
55
+ env:
56
+ PHILEAS_TEST_MODE: "1"
57
+
58
+ build:
59
+ runs-on: ubuntu-latest
60
+ needs: test
61
+ steps:
62
+ - uses: actions/checkout@v6
63
+
64
+ - uses: actions/setup-python@v6
65
+ with:
66
+ python-version: "3.14"
67
+ allow-prereleases: true
68
+
69
+ - name: Install build tools
70
+ run: pip install build
71
+
72
+ - name: Build package
73
+ run: python -m build
74
+
75
+ - uses: actions/upload-artifact@v7
76
+ with:
77
+ name: dist
78
+ path: dist/
@@ -0,0 +1,51 @@
1
+ name: Release
2
+
3
+ # Build and publish phileas-memory to PyPI when a GitHub Release is published.
4
+ # Publishing uses PyPI Trusted Publishing (OIDC) — no API token stored in the
5
+ # repo. See RELEASING.md for the one-time PyPI setup and how to cut a release.
6
+
7
+ on:
8
+ release:
9
+ types: [published]
10
+
11
+ jobs:
12
+ build:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v6
16
+
17
+ - uses: actions/setup-python@v6
18
+ with:
19
+ python-version: "3.14"
20
+ allow-prereleases: true
21
+
22
+ - name: Install build tools
23
+ run: pip install build twine
24
+
25
+ - name: Build sdist and wheel
26
+ run: python -m build
27
+
28
+ - name: Check artifacts
29
+ run: twine check dist/*
30
+
31
+ - uses: actions/upload-artifact@v7
32
+ with:
33
+ name: dist
34
+ path: dist/
35
+
36
+ publish:
37
+ needs: build
38
+ runs-on: ubuntu-latest
39
+ environment:
40
+ name: pypi
41
+ url: https://pypi.org/p/phileas-memory
42
+ permissions:
43
+ id-token: write # required for Trusted Publishing
44
+ steps:
45
+ - uses: actions/download-artifact@v8
46
+ with:
47
+ name: dist
48
+ path: dist/
49
+
50
+ - name: Publish to PyPI
51
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,19 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ *.egg-info/
5
+
6
+ # local data + transient artifacts
7
+ simulation_results.json
8
+ research/
9
+
10
+ # tests/: the unit-test code IS tracked. What stays LOCAL-only is anything that
11
+ # carries fixtures sampled from real Claude Code sessions — private personal
12
+ # conversation context (issue #39). Never track these:
13
+ # - real-session run outputs (e2e_runs/, path3_runs/)
14
+ # - the eval/probe sampling harness, which reads real session snapshots
15
+ # Any test file added here must contain ONLY synthetic data: no real names,
16
+ # places, events, or handles.
17
+ tests/e2e_runs/
18
+ tests/path3_runs/
19
+ tests/eval/
@@ -0,0 +1,14 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.15.7
4
+ hooks:
5
+ - id: ruff
6
+ - id: ruff-format
7
+ - repo: local
8
+ hooks:
9
+ - id: pytest
10
+ name: pytest
11
+ entry: uv run pytest tests/ -x -q
12
+ language: system
13
+ pass_filenames: false
14
+ stages: [pre-push]
@@ -0,0 +1 @@
1
+ 3.14
@@ -0,0 +1,81 @@
1
+ Metadata-Version: 2.4
2
+ Name: phileas-memory
3
+ Version: 0.1.0
4
+ Summary: Local-first long-term memory for AI companions
5
+ Project-URL: Homepage, https://github.com/alexajuno/phileas
6
+ Project-URL: Repository, https://github.com/alexajuno/phileas
7
+ Project-URL: Issues, https://github.com/alexajuno/phileas/issues
8
+ License-Expression: MIT
9
+ Keywords: ai,embeddings,llm,local-first,mcp,memory,rag
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: chromadb>=1.0.0
20
+ Requires-Dist: click>=8.3.1
21
+ Requires-Dist: fastapi>=0.136.3
22
+ Requires-Dist: kuzu>=0.8.0
23
+ Requires-Dist: litellm>=1.83.0
24
+ Requires-Dist: mcp[cli]
25
+ Requires-Dist: rich>=14.3.3
26
+ Requires-Dist: sentence-transformers>=5.3.0
27
+ Requires-Dist: uvicorn>=0.42.0
28
+ Provides-Extra: embeddings
29
+ Requires-Dist: sentence-transformers; extra == 'embeddings'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # Phileas — persistent memory for AI
33
+
34
+ AI conversations reset every session. Phileas is a local memory layer that any LLM can read and write through [MCP](https://modelcontextprotocol.io/), so context survives across sessions and tools.
35
+
36
+ ## Get started
37
+
38
+ ```bash
39
+ pip install phileas-memory
40
+ phileas init
41
+ ```
42
+
43
+ The setup wizard connects Phileas to your MCP client (Claude, GPT, Ollama, or any other) and chooses where to store memories.
44
+
45
+ ### First run
46
+
47
+ On first run, `phileas init` downloads two small models that run locally — an
48
+ embedding model (`all-MiniLM-L6-v2`) and a reranker
49
+ (`ms-marco-MiniLM-L-6-v2`), about 150 MB together — from
50
+ [Hugging Face](https://huggingface.co/). Expect a one-time wait on a slow
51
+ connection; they're cached afterward, so later runs work offline.
52
+
53
+ No external LLM API key is needed to try Phileas: your MCP client's model does
54
+ the reasoning, and the embedding and reranking run on your machine.
55
+
56
+ ## Connect to your AI
57
+
58
+ If you use Claude Code, `phileas init` handles this automatically.
59
+
60
+ For other MCP clients, start the server and point your client at it:
61
+
62
+ ```bash
63
+ phileas serve
64
+ ```
65
+
66
+ ## Principles
67
+
68
+ - **Local-first** — memories stay on your machine
69
+ - **Model-agnostic** — works with any LLM via MCP
70
+ - **Natural forgetting** — irrelevant detail decays; recall favors what stays useful
71
+ - **Open** — run it yourself, read the code
72
+
73
+ For the command reference, run `phileas --help` or `phileas COMMAND --help`.
74
+
75
+ ## Requirements
76
+
77
+ Python 3.11+
78
+
79
+ ## License
80
+
81
+ MIT
@@ -0,0 +1,50 @@
1
+ # Phileas — persistent memory for AI
2
+
3
+ AI conversations reset every session. Phileas is a local memory layer that any LLM can read and write through [MCP](https://modelcontextprotocol.io/), so context survives across sessions and tools.
4
+
5
+ ## Get started
6
+
7
+ ```bash
8
+ pip install phileas-memory
9
+ phileas init
10
+ ```
11
+
12
+ The setup wizard connects Phileas to your MCP client (Claude, GPT, Ollama, or any other) and chooses where to store memories.
13
+
14
+ ### First run
15
+
16
+ On first run, `phileas init` downloads two small models that run locally — an
17
+ embedding model (`all-MiniLM-L6-v2`) and a reranker
18
+ (`ms-marco-MiniLM-L-6-v2`), about 150 MB together — from
19
+ [Hugging Face](https://huggingface.co/). Expect a one-time wait on a slow
20
+ connection; they're cached afterward, so later runs work offline.
21
+
22
+ No external LLM API key is needed to try Phileas: your MCP client's model does
23
+ the reasoning, and the embedding and reranking run on your machine.
24
+
25
+ ## Connect to your AI
26
+
27
+ If you use Claude Code, `phileas init` handles this automatically.
28
+
29
+ For other MCP clients, start the server and point your client at it:
30
+
31
+ ```bash
32
+ phileas serve
33
+ ```
34
+
35
+ ## Principles
36
+
37
+ - **Local-first** — memories stay on your machine
38
+ - **Model-agnostic** — works with any LLM via MCP
39
+ - **Natural forgetting** — irrelevant detail decays; recall favors what stays useful
40
+ - **Open** — run it yourself, read the code
41
+
42
+ For the command reference, run `phileas --help` or `phileas COMMAND --help`.
43
+
44
+ ## Requirements
45
+
46
+ Python 3.11+
47
+
48
+ ## License
49
+
50
+ MIT
@@ -0,0 +1,46 @@
1
+ # Releasing `phileas-memory`
2
+
3
+ Phileas publishes to [PyPI](https://pypi.org/project/phileas-memory/) so that
4
+ `pip install phileas-memory` works for anyone. Publishing runs automatically from
5
+ GitHub Actions (`.github/workflows/release.yml`) when a GitHub Release is
6
+ published, using [PyPI Trusted Publishing](https://docs.pypi.org/trusted-publishers/)
7
+ (OIDC) — there is no API token stored in the repository.
8
+
9
+ ## One-time setup (maintainer)
10
+
11
+ Do this once, before the first release.
12
+
13
+ 1. Reserve the name on PyPI by creating the project's first release (the steps
14
+ below) — or, if the name is unclaimed, configure a **pending publisher** so
15
+ the project is created by the first Trusted-Publishing upload.
16
+ 2. On PyPI, go to the project (or **Your projects → Publishing** for a pending
17
+ publisher) and add a **GitHub** trusted publisher with:
18
+ - **Owner:** `alexajuno`
19
+ - **Repository:** `phileas`
20
+ - **Workflow name:** `release.yml`
21
+ - **Environment:** `pypi`
22
+ 3. In the GitHub repo settings, create an **Environment** named `pypi`
23
+ (Settings → Environments → New environment). No secrets are needed; the
24
+ environment just scopes the OIDC trust and lets you add reviewers later.
25
+
26
+ ## Cutting a release
27
+
28
+ 1. Make sure `main` is green and pick the new version (semver).
29
+ 2. Bump `version` in `pyproject.toml` and commit it on `main`.
30
+ 3. Tag and push:
31
+ ```bash
32
+ git tag v0.1.0
33
+ git push origin v0.1.0
34
+ ```
35
+ 4. Create a GitHub Release for that tag (`gh release create v0.1.0 --generate-notes`).
36
+ Publishing the release triggers `release.yml`, which builds the sdist + wheel,
37
+ runs `twine check`, and publishes to PyPI.
38
+ 5. Confirm it landed: `pip install phileas-memory==0.1.0` in a clean environment.
39
+
40
+ ## Building locally (optional sanity check)
41
+
42
+ ```bash
43
+ pip install build twine
44
+ python -m build # writes dist/*.tar.gz and dist/*.whl
45
+ twine check dist/*
46
+ ```
@@ -0,0 +1,192 @@
1
+ # LoCoMo smoke harness (Tier-2, manual / me-as-model)
2
+
3
+ A directional smoke for Phileas recall against the [LoCoMo](https://snap-research.github.io/locomo/)
4
+ long-term-conversation benchmark. **Not a pytest test** — it loads one LoCoMo
5
+ conversation into an isolated Phileas store and scores whether each question's gold
6
+ `evidence` turn surfaces in recall's top-k.
7
+
8
+ Why this exists, the full landscape, and the run findings:
9
+ [`docs/research/eval-benchmarks.md`](../../docs/research/eval-benchmarks.md).
10
+
11
+ > This smoke is **directional, not a quotable number**: 1 conversation, 9
12
+ > hand-picked cases, and *mechanical* extraction (one memory per turn, not faithful
13
+ > summarization). It exists to catch regressions/improvements in recall behavior
14
+ > while iterating on AA-136 / AA-137, against the recorded baseline below.
15
+
16
+ ## Prereqs
17
+
18
+ - The repo venv (`.venv`) with the engine stack (chromadb, kuzu,
19
+ sentence-transformers). First run downloads the embedding + cross-encoder models.
20
+ - Network egress (model downloads + the one-time data fetch).
21
+
22
+ ## Run it
23
+
24
+ ```bash
25
+ # 0. Fetch the LoCoMo corpus (once). Default path is /tmp/locomo10.json;
26
+ # override with LOCOMO_JSON. 10 conversations, ~2.8 MB.
27
+ curl -s -o /tmp/locomo10.json \
28
+ https://raw.githubusercontent.com/snap-research/locomo/main/data/locomo10.json
29
+
30
+ # 1. See the sampled gold questions for a conversation (no store needed)
31
+ .venv/bin/python evals/locomo/locomo_smoke.py gold 0 --n 16
32
+
33
+ # 2. Extract one conversation into an ISOLATED store (~2 min: model load + ~419 memorizes)
34
+ mkdir -p /tmp/locomo-eval/conv0
35
+ PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/locomo_smoke.py extract 0
36
+
37
+ # 3. Score the 9 baseline cases + about() probe (graph ON by default)
38
+ PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/score_run.py
39
+
40
+ # 3b. Score the no-graph floor (keyword + semantic only)
41
+ PHILEAS_EVAL_GRAPH=off PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/score_run.py
42
+
43
+ # Ad-hoc probing while playing the agent-in-loop:
44
+ PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/locomo_smoke.py ask "Sweden" --top-k 10
45
+ PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/locomo_smoke.py about Caroline
46
+ ```
47
+
48
+ Notes:
49
+ - The store under `PHILEAS_HOME` is **throwaway** — re-extract after any engine
50
+ change (extraction is deterministic, ~2 min). `dia_map.json` (dia_id → memory id)
51
+ lives in the home and is what makes scoring objective.
52
+ - `_engine()` uses `GraphStore` **in-process** (no daemon needed — the daemon only
53
+ arbitrates the Kuzu lock across multiple processes). `PHILEAS_EVAL_GRAPH=off`
54
+ falls back to the degraded no-graph path.
55
+
56
+ ## Baseline — conv0 (Caroline/Melanie), 2026-06-12, mechanical extraction
57
+
58
+ Pre-change reference. After improving AA-136/137, re-run and diff against this.
59
+
60
+ | Case | query | gold | graph OFF | graph ON |
61
+ |------|-------|------|-----------|----------|
62
+ | Q1 research FOCUSED | `adoption agencies` | D2:8 | D2:8 @3 | D2:8 @3 |
63
+ | Q1 research SENTENCE | `what did Caroline research about adoption` | D2:8 | D2:8 @1 | D2:8 @1 |
64
+ | Q2 LGBTQ group | `LGBTQ support group` | D1:3 | D1:3 @5 | D1:3 @4 |
65
+ | Q4 charity FOCUSED | `charity race awareness` | D2:2 | D2:2 @1 | D2:2 @1 |
66
+ | Q4 charity SENTENCE | `what did the charity race raise awareness for` | D2:2 | D2:2 @1 | D2:2 @1 |
67
+ | Q6 identity | `Caroline transgender identity` | D1:5 | D1:5 @7 | **MISS** |
68
+ | Q7 sunrise | `Melanie sunrise painting` | D1:12 | MISS¹ | MISS¹ |
69
+ | Q14 self-care | `Melanie self-care` | D2:5 | MISS² | MISS² |
70
+ | Q16 moved | `Caroline moved Sweden` | D3:13, D4:3 | **empty**³ | D4:3 @4 |
71
+ | **any-gold-surfaced** | | | **6/9** | **6/9** |
72
+
73
+ `about('Caroline')` → 211/419 memories · `about('Melanie')` → 208/419 (firehose —
74
+ extraction tags the speaker on every turn).
75
+
76
+ ¹ **False miss** — gold `D1:12` is the image-share turn; the answer-bearing turn
77
+ `D1:14` ("painted that lake sunrise last year") surfaces at rank 1. Score the
78
+ *answer*, not evidence rank.
79
+ ² **Reranker/vocabulary gap** — `D2:5` phrases self-care as "me-time… running,
80
+ reading, violin"; semantic ranks lexical-vibe matches ("take care of yourself") above it.
81
+ ³ **`SIMILARITY_FLOOR=0.5` soft-failure** — all semantic candidates 0.378–0.458,
82
+ every one cut → empty result.
83
+
84
+ ### Win conditions when revisiting (after AA-136 / AA-137)
85
+
86
+ - Q16 stays a hit **and** Q6 returns (≤ top-10) → broadening + distributional cut
87
+ rescue without the flood regression.
88
+ - Q14 surfaces `D2:5` → wider candidate pool + reranking closed the vocabulary gap.
89
+ - `about()` stops returning ~half the corpus → needs faithful extraction (tag
90
+ mentioned entities, not the speaker) before this is meaningful; do that re-extract
91
+ first or the firehose masks the signal.
92
+ - Replace evidence-rank with an **answer-level judge** (LoCoMo's own protocol)
93
+ before trusting any aggregate — evidence-rank both under- and over-counts (¹, Q6).
94
+
95
+ ## Status — after the recall rework (2026-06-12)
96
+
97
+ The AA-136/137 work plus the follow-on recall rework landed on
98
+ `feat/recall-threshold`: the gather pool is decoupled from `top_k`, the relevance
99
+ cut governs result size (no count cap), the graph hop is relevance-gated (an
100
+ entity match pulls only the memories that stand out for the query, not all of
101
+ them), the keyword floor scales by term **rarity (IDF)** rather than coverage,
102
+ and the default cut is `ratio` (a head-selector) rather than `gap` (a
103
+ tail-trimmer). Re-scored conv0:
104
+
105
+ | measure | result |
106
+ |---------|--------|
107
+ | conv0 smoke (top-10), graph ON / OFF | **6/9** |
108
+ | threshold mode (no `top_k`), focused queries | 4–36 memories, self-bound |
109
+ | broad-query breadth | bounded by the cut (`painting` 1, `Caroline` 82, was 389) |
110
+ | Q16 "Caroline moved Sweden" | rescued — `D4:3` @2 (cosine 0.23 / sem-rank 415, via rare-term IDF) |
111
+
112
+ The win conditions above are met except where they depend on faithful extraction
113
+ (the `about()` firehose) or an answer-level judge.
114
+
115
+ ## Faithful extraction — demonstrated (2026-06-13)
116
+
117
+ `LOCOMO_FAITHFUL=<path>` swaps the verbatim per-turn copy for a self-contained
118
+ fact per turn: pronouns resolved, the concept named in the text, speakers
119
+ attributed, and every named person tagged (not just the speaker). The facts for
120
+ conv0 sessions 1–4 (all 9 gold cases live there) are hand-written in
121
+ `faithful_conv0.json` — me-as-model, the Tier-2 reader. Sessions 5–19 stay
122
+ mechanical, so the run is faithful needles in a mechanical haystack and the turn
123
+ count is identical (419) — a clean A/B on text quality alone.
124
+
125
+ ```bash
126
+ mkdir -p /tmp/locomo-eval/conv0faith
127
+ LOCOMO_FAITHFUL=evals/locomo/faithful_conv0.json \
128
+ PHILEAS_HOME=/tmp/locomo-eval/conv0faith .venv/bin/python evals/locomo/locomo_smoke.py extract 0
129
+ PHILEAS_HOME=/tmp/locomo-eval/conv0faith .venv/bin/python evals/locomo/score_run.py
130
+ ```
131
+
132
+ | case | mechanical | faithful |
133
+ |------|-----------|----------|
134
+ | Q6 identity (`D1:5`) | @7 | **@2** |
135
+ | Q7 sunrise (`D1:12`) | miss | **@2** |
136
+ | Q14 self-care (`D2:5`)| miss | **@3** |
137
+ | Q1 research (`D2:8`) | @1 | @3 (longer fact, still top-10) |
138
+ | **any-gold-surfaced** | **7/9** | **9/9** |
139
+
140
+ The mechanism, read off `ask "Melanie self-care"`: mechanically, the answer turn
141
+ `D2:5` is verbatim *"carving out some me-time… running, reading, violin"* — no
142
+ "self-care" token, so it never surfaces; the query instead matches `D2:3`/`D2:4`,
143
+ which carry the word but not the answer. The faithful fact for `D2:5` reads
144
+ *"Melanie practices self-care by carving out daily me-time — running, reading, or
145
+ playing her violin"*, co-locating the concept with the answer so the cross-encoder
146
+ scores it `@3 (0.574)`. Per-turn copy splits the concept from its answer across
147
+ adjacent turns; a reader writes them into one fact. Closes Q14's vocabulary gap
148
+ and lifts Q6 with no bigger embedder, query expansion, or reranker change.
149
+
150
+ ### Open problems — pick up here next session
151
+
152
+ - [x] **Q14 vocabulary gap** — closed by faithful extraction (above); the concept
153
+ word lands in the answer-bearing fact, so the cross-encoder matches it.
154
+ - [x] **Faithful extraction** — `faithful_conv0.json` + `LOCOMO_FAITHFUL` exists
155
+ and tags named entities rather than the speaker. `about()` still returns ~half
156
+ the corpus, because in a two-person conversation nearly every fact names one of
157
+ the two speakers — the firehose is inherent to the corpus, not the tagging.
158
+ - [ ] **Evidence-rank still both under- and over-counts.** Q7 (`D1:12` is the
159
+ image-share turn; the answer `D1:14` "painted that lake sunrise" is the real
160
+ evidence) and Q6 (`D1:5` is one of several valid transgender turns) are mislabels.
161
+ An **answer-level judge** (LoCoMo's own protocol) is the honest metric — pending
162
+ because it needs a paid LLM call per question.
163
+ - [ ] **Faithful extraction at corpus scale.** Sessions 1–4 are hand-written.
164
+ A quotable number needs faithful facts for all 419 turns × 10 conversations,
165
+ which is the real ingest path (`ingest_session` → agent → `memorize_batch`),
166
+ not hand authoring.
167
+ - [ ] **Tier-2 real number.** This 9-case smoke is directional only. A quotable
168
+ LoCoMo figure needs the answer-level LLM judge + faithful extraction across all
169
+ 10 conversations (the agent-in-loop Mode B in
170
+ [`docs/research/eval-benchmarks.md`](../../docs/research/eval-benchmarks.md)).
171
+
172
+ ## Method sweep (distributional cut)
173
+
174
+ `sweep_standout.py` re-runs the 9 baseline cases under each `PHILEAS_STANDOUT`
175
+ strategy (`gap` / `zscore` / `ratio` / `knee`, plus `absolute:X` flat-floor
176
+ references) against an already-extracted store, so you can read off which cut
177
+ recovers cases like Q6/Q14 without re-extracting:
178
+
179
+ ```bash
180
+ PHILEAS_HOME=/tmp/locomo-eval/conv0 .venv/bin/python evals/locomo/sweep_standout.py
181
+ ```
182
+
183
+ It prints, per method, how many cases surfaced any gold and the mean rank of
184
+ surfaced golds (lower = better). The `absolute:X` rows apply one floor uniformly
185
+ to both cut sites — a baseline to beat, not the exact historical split.
186
+
187
+ ## Files
188
+
189
+ - `locomo_smoke.py` — loader, extractor (mechanical / windowed / faithful), `ask` / `about` / `gold` probes.
190
+ - `faithful_conv0.json` — hand-written self-contained facts for conv0 sessions 1–4, loaded via `LOCOMO_FAITHFUL`.
191
+ - `score_run.py` — the 9 baseline cases + `about()` probe, objective dia-id scoring.
192
+ - `sweep_standout.py` — re-scores those cases under each distributional-cut strategy.
@@ -0,0 +1,78 @@
1
+ {
2
+ "D1:1": "Caroline greeted Melanie warmly when the two friends reconnected.",
3
+ "D1:2": "Melanie is swamped with her kids and her work.",
4
+ "D1:3": "Caroline attended an LGBTQ support group and found it powerful and moving.",
5
+ "D1:4": "Melanie asked Caroline about the inspiring stories she heard at the LGBTQ support group.",
6
+ "D1:5": "Caroline was inspired by the transgender stories shared at the LGBTQ support group and felt grateful for the acceptance and support there.",
7
+ "D1:6": "Melanie admired a painting Caroline shared and asked what the support group had done for her.",
8
+ "D1:7": "The LGBTQ support group made Caroline feel accepted and gave her the courage to embrace herself.",
9
+ "D1:8": "Melanie praised Caroline's courage in embracing who she is.",
10
+ "D1:9": "Caroline plans to continue her education and explore career options.",
11
+ "D1:10": "Melanie asked what kinds of careers Caroline was considering.",
12
+ "D1:11": "Caroline is keen on a career in counseling or mental health, so she can support people facing struggles similar to her own.",
13
+ "D1:12": "Melanie told Caroline she'd make a great counselor and shared one of her own paintings — a sunset over a lake.",
14
+ "D1:13": "Caroline asked whether the lake painting was Melanie's own work.",
15
+ "D1:14": "Melanie painted the lake sunrise herself last year, and the painting is special to her.",
16
+ "D1:15": "Caroline praised the blended colors in Melanie's painting and called painting a great outlet for self-expression.",
17
+ "D1:16": "Melanie paints to express her feelings, get creative, and relax after a long day.",
18
+ "D1:17": "Caroline agreed that relaxing and self-expression matter, then headed off to do some research.",
19
+ "D1:18": "Melanie said taking care of ourselves is vital and went swimming with her kids.",
20
+ "D2:1": "Melanie ran a charity race for mental health and found it rewarding; it made her reflect on taking care of one's mind.",
21
+ "D2:2": "Caroline praised Melanie's charity race for making a difference and raising awareness for mental health.",
22
+ "D2:3": "Melanie is realizing that self-care is important: when she looks after herself, she can better look after her family.",
23
+ "D2:4": "Caroline agreed that self-care matters and praised Melanie for prioritizing it.",
24
+ "D2:5": "Melanie practices self-care by carving out daily me-time — running, reading, or playing her violin — which refreshes her and helps her stay present for her family.",
25
+ "D2:6": "Caroline encouraged Melanie that taking time for yourself is important and that she's looking after herself and her family well.",
26
+ "D2:7": "Melanie said her self-care is still a work in progress; her kids are excited for summer break and the family is planning a camping trip next month.",
27
+ "D2:8": "Caroline is researching adoption agencies — it has long been her dream to have a family and give a loving home to children who need one.",
28
+ "D2:9": "Melanie praised Caroline's plan to adopt and give children in need a loving home.",
29
+ "D2:10": "Caroline's goal is to give children a loving home; she shared one of the adoption agencies she is considering and feels hopeful and optimistic.",
30
+ "D2:11": "Melanie asked what made Caroline choose that particular adoption agency.",
31
+ "D2:12": "Caroline chose that adoption agency because it helps LGBTQ+ people adopt, and its inclusivity and support spoke to her.",
32
+ "D2:13": "Melanie praised the agency's inclusivity and asked what Caroline was most excited about in the adoption process.",
33
+ "D2:14": "Caroline is thrilled to build a family for children who need one, though she knows it will be hard as a single parent.",
34
+ "D2:15": "Melanie told Caroline she will be a wonderful mother to the children she adopts.",
35
+ "D2:16": "Caroline vowed to give the children she adopts a safe and loving home.",
36
+ "D2:17": "Melanie said Caroline's caring heart means the adopted children will have all the love and stability they need.",
37
+ "D3:1": "Caroline spoke at a school event about her transgender journey and encouraged students to get involved in the LGBTQ community; she reflected on how far she has come since she began transitioning three years ago.",
38
+ "D3:2": "Melanie said she is proud of Caroline for spreading LGBTQ awareness and coming so far since her transition.",
39
+ "D3:3": "Caroline felt powerful giving her talk, sharing her struggles and growth since coming out, and was grateful to give a voice to the trans community.",
40
+ "D3:4": "Melanie praised Caroline as brave for speaking up for the trans community and inspiring others toward inclusivity and acceptance.",
41
+ "D3:5": "Caroline believes sharing personal experiences promotes understanding and acceptance, and she wants to pass on the love and support she has received.",
42
+ "D3:6": "Melanie reflected that sharing our vulnerable stories bonds people and shows others facing the same challenges that they are not alone.",
43
+ "D3:7": "Caroline is grateful for the chance to share her story and intends to keep using her voice to build a more inclusive, understanding world.",
44
+ "D3:8": "Melanie said she is proud to be part of the difference Caroline is making.",
45
+ "D3:9": "Caroline said she and Melanie can spread love and tackle life's challenges together.",
46
+ "D3:10": "Melanie said her family motivates her to be courageous, and asked what motivates Caroline.",
47
+ "D3:11": "Caroline said her friends, family, and mentors are her rocks who give her strength, and she shared a photo from a recent meetup.",
48
+ "D3:12": "Melanie asked how long Caroline has had such a strong support system.",
49
+ "D3:13": "Caroline has known her close friends for four years, ever since she moved away from her home country; their support mattered especially after a tough breakup.",
50
+ "D3:14": "Melanie said her husband and kids keep her motivated, and shared a family photo.",
51
+ "D3:15": "Caroline asked Melanie how long she has been married.",
52
+ "D3:16": "Melanie has been married for five years and shared a photo from her wedding day.",
53
+ "D3:17": "Caroline congratulated Melanie on her marriage and wished the couple many happy years.",
54
+ "D3:18": "Melanie said her family and the moments they share make everything worthwhile.",
55
+ "D3:19": "Caroline remarked that Melanie's family looked happy and asked about their day.",
56
+ "D3:20": "Melanie described a fun family day of playing games, eating good food, and spending time together.",
57
+ "D3:21": "Caroline encouraged Melanie to cherish those family moments.",
58
+ "D3:22": "Melanie cherishes time with her family — it is when she feels most alive and happy.",
59
+ "D3:23": "Caroline agreed that time with loved ones brings happiness and that family is everything.",
60
+ "D4:1": "Caroline shared a photo of a necklace bearing a cross and a heart, saying a lot had been going on in her life.",
61
+ "D4:2": "Melanie admired Caroline's necklace and asked whether it held a special meaning.",
62
+ "D4:3": "Caroline's treasured necklace was a gift from her grandmother in her home country, Sweden; it symbolizes love, faith, and strength, and reminds her of her roots and her family's support.",
63
+ "D4:4": "Melanie reflected on how objects can hold deep meaning and asked Caroline about other treasured items.",
64
+ "D4:5": "Caroline treasures a hand-painted bowl a friend made for her eighteenth birthday ten years ago; its pattern and colors remind her of art and self-expression.",
65
+ "D4:6": "Melanie took her family camping in the mountains and had a wonderful time together.",
66
+ "D4:7": "Caroline asked Melanie about the family camping trip.",
67
+ "D4:8": "Melanie's mountain camping trip included exploring nature, roasting marshmallows by the campfire, and a hike with an amazing view; her two younger kids love nature.",
68
+ "D4:9": "Caroline said family moments like the camping trip are special.",
69
+ "D4:10": "Melanie said family time matters to her and asked what Caroline had been up to lately.",
70
+ "D4:11": "Caroline has been pursuing counseling and mental health as a career, to help people who have gone through what she has.",
71
+ "D4:12": "Melanie asked what kind of counseling and mental health work Caroline wants to pursue.",
72
+ "D4:13": "Caroline wants to work as a counselor with trans people, and she recently attended an enlightening LGBTQ+ counseling workshop on therapeutic methods for supporting trans clients.",
73
+ "D4:14": "Melanie praised Caroline's dedication and asked what motivated her to pursue counseling.",
74
+ "D4:15": "Caroline said her own journey and the support she received motivated her to pursue counseling, after she saw how counseling and support groups improved her life.",
75
+ "D4:16": "Melanie admired how Caroline turned her own experience into a passion for helping others.",
76
+ "D4:17": "Caroline thanked Melanie for her kind words.",
77
+ "D4:18": "Melanie congratulated Caroline for pursuing the work she truly cares about."
78
+ }