memex-chats 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. memex_chats-0.1.0/.env.example +46 -0
  2. memex_chats-0.1.0/.github/workflows/ci.yml +40 -0
  3. memex_chats-0.1.0/.gitignore +78 -0
  4. memex_chats-0.1.0/.python-version +1 -0
  5. memex_chats-0.1.0/CHANGELOG.md +133 -0
  6. memex_chats-0.1.0/CLAUDE.md +113 -0
  7. memex_chats-0.1.0/CONTRIBUTING.md +83 -0
  8. memex_chats-0.1.0/DEVLOG.md +984 -0
  9. memex_chats-0.1.0/LICENSE +21 -0
  10. memex_chats-0.1.0/PKG-INFO +322 -0
  11. memex_chats-0.1.0/PRIVACY.md +55 -0
  12. memex_chats-0.1.0/README.md +278 -0
  13. memex_chats-0.1.0/ROADMAP.md +137 -0
  14. memex_chats-0.1.0/chrome-extension/README.md +75 -0
  15. memex_chats-0.1.0/chrome-extension/WEB_STORE_CHECKLIST.md +149 -0
  16. memex_chats-0.1.0/chrome-extension/icons/icon-128.png +0 -0
  17. memex_chats-0.1.0/chrome-extension/icons/icon-16.png +0 -0
  18. memex_chats-0.1.0/chrome-extension/icons/icon-32.png +0 -0
  19. memex_chats-0.1.0/chrome-extension/icons/icon-48.png +0 -0
  20. memex_chats-0.1.0/chrome-extension/icons/icon.svg +6 -0
  21. memex_chats-0.1.0/chrome-extension/manifest.json +46 -0
  22. memex_chats-0.1.0/chrome-extension/src/background.js +193 -0
  23. memex_chats-0.1.0/chrome-extension/src/content.js +12 -0
  24. memex_chats-0.1.0/chrome-extension/src/inject.js +112 -0
  25. memex_chats-0.1.0/chrome-extension/src/popup.html +150 -0
  26. memex_chats-0.1.0/chrome-extension/src/popup.js +95 -0
  27. memex_chats-0.1.0/docs/screenshots/session-memory-check.jpeg +0 -0
  28. memex_chats-0.1.0/pyproject.toml +95 -0
  29. memex_chats-0.1.0/scripts/_run-server.ps1 +40 -0
  30. memex_chats-0.1.0/scripts/inspect_export.py +297 -0
  31. memex_chats-0.1.0/scripts/install-autostart.ps1 +179 -0
  32. memex_chats-0.1.0/scripts/install-autostart.sh +145 -0
  33. memex_chats-0.1.0/src/memex/__init__.py +3 -0
  34. memex_chats-0.1.0/src/memex/cli/__init__.py +0 -0
  35. memex_chats-0.1.0/src/memex/cli/main.py +911 -0
  36. memex_chats-0.1.0/src/memex/config.py +72 -0
  37. memex_chats-0.1.0/src/memex/core/__init__.py +0 -0
  38. memex_chats-0.1.0/src/memex/core/embeddings/__init__.py +47 -0
  39. memex_chats-0.1.0/src/memex/core/embeddings/base.py +70 -0
  40. memex_chats-0.1.0/src/memex/core/embeddings/fake.py +66 -0
  41. memex_chats-0.1.0/src/memex/core/embeddings/fastembed_embedder.py +88 -0
  42. memex_chats-0.1.0/src/memex/core/embeddings/ollama.py +98 -0
  43. memex_chats-0.1.0/src/memex/core/ingest/__init__.py +0 -0
  44. memex_chats-0.1.0/src/memex/core/ingest/chunker.py +68 -0
  45. memex_chats-0.1.0/src/memex/core/ingest/claude_export.py +235 -0
  46. memex_chats-0.1.0/src/memex/core/ingest/content_renderer.py +95 -0
  47. memex_chats-0.1.0/src/memex/core/ingest/pipeline.py +397 -0
  48. memex_chats-0.1.0/src/memex/core/models.py +118 -0
  49. memex_chats-0.1.0/src/memex/core/repos/__init__.py +42 -0
  50. memex_chats-0.1.0/src/memex/core/repos/discovery.py +176 -0
  51. memex_chats-0.1.0/src/memex/core/repos/keys.py +96 -0
  52. memex_chats-0.1.0/src/memex/core/repos/matcher.py +137 -0
  53. memex_chats-0.1.0/src/memex/core/repos/resolve.py +40 -0
  54. memex_chats-0.1.0/src/memex/core/storage/__init__.py +0 -0
  55. memex_chats-0.1.0/src/memex/core/storage/db.py +123 -0
  56. memex_chats-0.1.0/src/memex/core/storage/repo.py +859 -0
  57. memex_chats-0.1.0/src/memex/core/storage/schema.sql +147 -0
  58. memex_chats-0.1.0/src/memex/core/summaries/__init__.py +39 -0
  59. memex_chats-0.1.0/src/memex/core/summaries/anthropic_summarizer.py +115 -0
  60. memex_chats-0.1.0/src/memex/core/summaries/base.py +53 -0
  61. memex_chats-0.1.0/src/memex/core/summaries/fake.py +44 -0
  62. memex_chats-0.1.0/src/memex/transports/__init__.py +0 -0
  63. memex_chats-0.1.0/src/memex/transports/http_ingest.py +172 -0
  64. memex_chats-0.1.0/src/memex/transports/stdio.py +320 -0
  65. memex_chats-0.1.0/src/memex/transports/tools.py +551 -0
  66. memex_chats-0.1.0/tests/__init__.py +0 -0
  67. memex_chats-0.1.0/tests/conftest.py +121 -0
  68. memex_chats-0.1.0/tests/integration/__init__.py +0 -0
  69. memex_chats-0.1.0/tests/integration/test_full_flow.py +72 -0
  70. memex_chats-0.1.0/tests/integration/test_ollama_embedder.py +93 -0
  71. memex_chats-0.1.0/tests/unit/__init__.py +0 -0
  72. memex_chats-0.1.0/tests/unit/test_chunker.py +105 -0
  73. memex_chats-0.1.0/tests/unit/test_claude_export.py +310 -0
  74. memex_chats-0.1.0/tests/unit/test_cli.py +316 -0
  75. memex_chats-0.1.0/tests/unit/test_cli_repos.py +404 -0
  76. memex_chats-0.1.0/tests/unit/test_content_renderer.py +139 -0
  77. memex_chats-0.1.0/tests/unit/test_embedder_errors.py +92 -0
  78. memex_chats-0.1.0/tests/unit/test_embedder_factory.py +89 -0
  79. memex_chats-0.1.0/tests/unit/test_embeddings.py +96 -0
  80. memex_chats-0.1.0/tests/unit/test_http_ingest.py +203 -0
  81. memex_chats-0.1.0/tests/unit/test_models.py +157 -0
  82. memex_chats-0.1.0/tests/unit/test_pipeline.py +651 -0
  83. memex_chats-0.1.0/tests/unit/test_repos_discovery.py +214 -0
  84. memex_chats-0.1.0/tests/unit/test_repos_keys.py +109 -0
  85. memex_chats-0.1.0/tests/unit/test_repos_matcher.py +187 -0
  86. memex_chats-0.1.0/tests/unit/test_repos_storage.py +193 -0
  87. memex_chats-0.1.0/tests/unit/test_stdio_server.py +112 -0
  88. memex_chats-0.1.0/tests/unit/test_storage.py +708 -0
  89. memex_chats-0.1.0/tests/unit/test_summaries.py +145 -0
  90. memex_chats-0.1.0/tests/unit/test_tools.py +806 -0
  91. memex_chats-0.1.0/uv.lock +2263 -0
@@ -0,0 +1,46 @@
1
+ # Copy to .env and adjust values. .env is in .gitignore.
2
+
3
+ # Embeddings backend. Default: fastembed (zero-config, ONNX model embedded,
4
+ # downloads itself the first time). Alternative: ollama (requires a local
5
+ # Ollama instance running).
6
+ MEMEX_EMBED_BACKEND=fastembed
7
+
8
+ # Model name. If unset, each backend uses its default:
9
+ # - fastembed: nomic-ai/nomic-embed-text-v1.5-Q (~130 MB quantized)
10
+ # - ollama: nomic-embed-text
11
+ # MEMEX_EMBED_MODEL=nomic-ai/nomic-embed-text-v1.5
12
+
13
+ # Only used if MEMEX_EMBED_BACKEND=ollama
14
+ OLLAMA_HOST=http://localhost:11434
15
+
16
+ # SQLite database path (default: ./data/memex.db)
17
+ MEMEX_DB_PATH=./data/memex.db
18
+
19
+ # Directory where you keep official Claude.ai exports
20
+ MEMEX_EXPORTS_DIR=./data/exports
21
+
22
+ # Approximate chunk size in tokens (default: 500)
23
+ MEMEX_CHUNK_SIZE=500
24
+
25
+ # Overlap between chunks in tokens (default: 50)
26
+ MEMEX_CHUNK_OVERLAP=50
27
+
28
+ # Log level: DEBUG, INFO, WARNING, ERROR
29
+ MEMEX_LOG_LEVEL=INFO
30
+
31
+ # On-demand auto-summaries with Claude Haiku (opt-in). Requires the extra
32
+ # `summaries`: `uv sync --extra summaries`. When ON, summaries are generated
33
+ # lazily by `search_chats` for top-3 results without a cached summary, in
34
+ # parallel. Off by default to avoid surprise API calls.
35
+ MEMEX_SUMMARY_ENABLED=false
36
+
37
+ # Anthropic model for summary generation. Default: Haiku (cheap, fast).
38
+ # MEMEX_SUMMARY_MODEL=claude-haiku-4-5-20251001
39
+
40
+ # Max tokens for the generated summary. Default 200 (~3 sentences).
41
+ # MEMEX_SUMMARY_MAX_TOKENS=200
42
+
43
+ # Anthropic API key. If MEMEX_SUMMARY_ENABLED=true and this is missing or
44
+ # invalid, the search returns the matching chats without a summary and logs
45
+ # a warning per chat. The search itself never aborts.
46
+ # ANTHROPIC_API_KEY=sk-ant-...
@@ -0,0 +1,40 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ timeout-minutes: 10
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v3
21
+ with:
22
+ enable-cache: true
23
+
24
+ - name: Set up Python
25
+ run: uv python install 3.12
26
+
27
+ - name: Install dependencies
28
+ run: uv sync --extra dev
29
+
30
+ - name: Lint (ruff check)
31
+ run: uv run ruff check src tests
32
+
33
+ - name: Format check (ruff format)
34
+ run: uv run ruff format --check src tests
35
+
36
+ - name: Type check (mypy)
37
+ run: uv run mypy src/memex/core src/memex/config.py src/memex/transports
38
+
39
+ - name: Unit tests
40
+ run: uv run pytest tests/unit -q
@@ -0,0 +1,78 @@
1
+ # Datos personales y secretos
2
+ # Todo lo que esté acá NUNCA debe ir al repo público
3
+ data/
4
+ *.zip
5
+ *.db
6
+ *.db-journal
7
+ *.db-shm
8
+ *.db-wal
9
+ .env
10
+ .env.*
11
+ !.env.example
12
+
13
+ # Documento de contexto interno (handoff doc, no es para usuarios)
14
+ MEMEX.md
15
+
16
+ # Python
17
+ __pycache__/
18
+ *.py[cod]
19
+ *$py.class
20
+ *.so
21
+ .Python
22
+ *.egg-info/
23
+ *.egg
24
+ build/
25
+ dist/
26
+ .eggs/
27
+
28
+ # Entornos virtuales
29
+ .venv/
30
+ venv/
31
+ env/
32
+ ENV/
33
+
34
+ # uv
35
+ .uv/
36
+
37
+ # Testing y coverage
38
+ .pytest_cache/
39
+ .coverage
40
+ .coverage.*
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ coverage.xml
45
+ *.cover
46
+
47
+ # Type checking
48
+ .mypy_cache/
49
+ .pyre/
50
+ .pytype/
51
+ .ruff_cache/
52
+
53
+ # IDEs y editores
54
+ .idea/
55
+ .vscode/
56
+ *.swp
57
+ *.swo
58
+ *~
59
+ .DS_Store
60
+ Thumbs.db
61
+
62
+ # Logs
63
+ *.log
64
+ logs/
65
+
66
+ # Notebooks scratch
67
+ .ipynb_checkpoints/
68
+
69
+ # Memoria local de Claude Code (si llega a aparecer en el working dir)
70
+ .claude/
71
+
72
+ # MCP config local (path absoluto, específico de tu máquina). El snippet de
73
+ # ejemplo va en el README; cada dev crea el suyo.
74
+ .mcp.json
75
+
76
+ # Handoff doc entre sesiones de trabajo. Es contexto interno (qué estábamos
77
+ # haciendo, qué falta, decisiones abiertas), no para usuarios del repo.
78
+ handoff.md
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,133 @@
1
+ # Changelog
2
+
3
+ All notable changes to Memex are documented here.
4
+
5
+ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). `0.1.0` is the first alpha release; before it the project lived in `0.0.x`.
6
+
7
+ ## [Unreleased]
8
+
9
+ ### Added (Phase 5 packaging, 2026-05-25)
10
+ - `memex doctor` diagnostic command. Checks Python version, database existence + schema version, embedder instantiability, live-capture server reachability, summarizer configuration (only if enabled), registered repos count, and indexed corpus count. Reports OK / WARN / FAIL per check, exits non-zero only on FAIL. 4 new unit tests.
11
+ - `memex install-service` cross-platform autostart dispatcher. Detects host OS and delegates: Windows runs the existing Scheduled Task installer, Linux writes a new systemd user unit (`~/.config/systemd/user/memex-serve.service`) and starts it via `systemctl --user`. macOS prints manual instructions (launchd integration deferred to 0.2.0). 6 new unit tests covering the dispatch logic with mocked `platform.system` and `subprocess.run`.
12
+ - New `scripts/install-autostart.sh` for Linux. Subcommands `install`, `uninstall`, `status`. Resolves `uv` lazily at install time, falls back to PATH lookup if `uv` is not absolute. Auto-creates `~/.local/state/memex/` for logs.
13
+ - `chrome-extension/WEB_STORE_CHECKLIST.md`: full Web Store submission playbook (developer account, privacy policy URL, asset sizes, listing copy, permissions justification, post-approval checklist).
14
+
15
+ ### Changed
16
+ - Package renamed from `memex` to `memex-chats` for PyPI publication. Both `memex` and `memex-mcp` are already taken on PyPI by unrelated projects (the latter was claimed the same day we attempted to publish). The CLI entry points stay `memex` and `memex-mcp`, so `.mcp.json` configs do not change. `Development Status` classifier bumped from `Pre-Alpha` to `Alpha`. Added `Operating System :: OS Independent` classifier. New `[project.urls]` section with Homepage / Repository / Issues / Changelog links.
17
+ - README quickstart restructured: "install from PyPI" is now the recommended path (option A), source install is option B. Diagnostics section added linking `memex doctor`. Autostart section unified across Windows + Linux + macOS placeholder.
18
+ - Chrome extension manifest description translated to English (was the last Spanish string in the extension).
19
+
20
+ ## [0.1.0] - 2026-05-24
21
+
22
+ Phase 3 closed: quality pass on retrieval. All four feature sub-tasks shipped and audited.
23
+
24
+ ### Added
25
+ - Optional auto-summary generation per chat, powered by Claude Haiku via the Anthropic API. Opt-in by setting `MEMEX_SUMMARY_ENABLED=true` and `ANTHROPIC_API_KEY`. Summaries are generated lazily when `search_chats` returns a chat that does not have one cached: up to 3 in parallel per call (`ThreadPoolExecutor`), silent fail per chat if the API errors. The summary is stored in `conversations.summary` and persists, so subsequent searches hit cache and do not pay the API again.
26
+ - `core/summaries/` module: `Summarizer` ABC, `AnthropicSummarizer` (real backend, lazy import of the SDK), `FakeSummarizer` (deterministic, used in tests), `get_default_summarizer()` factory that returns `None` when the feature flag is off.
27
+ - `conversations.content_hash` column (SHA-256 hex of canonical text). The pipeline computes and persists it on every ingest. Lets the lazy summarizer (and future consumers) detect content changes so a stale summary can be invalidated.
28
+ - `repo.get_conversation_text(uuid)` reconstructs the canonical message stream of a chat (same format the chunker uses); `repo.update_conversation_summary(uuid, text)` patches only the summary field without touching the rest of the row.
29
+ - `anthropic>=0.40` as a new optional dependency: install with `uv sync --extra summaries`.
30
+ - Additive schema migration (`_apply_additive_migrations` in `db.py`) so existing local databases gain `content_hash` without a reset.
31
+ - `stdio.search_chats` resolves the summarizer once per process via `get_default_summarizer()` and passes it through.
32
+ - 24 new unit tests covering `FakeSummarizer`, the factory, `AnthropicSummarizer` error paths, the lazy wire in `tools.search_chats` (no-summarizer path, generation for missing summaries, cache reuse, cap at 3, persistence to DB, per-chat silent fail), pipeline `content_hash` persistence, cached-summary preservation across same-content reingests, and the additive schema migration on a legacy database.
33
+
34
+ ### Changed
35
+ - `_ingest_conversation` computes the canonical text and `content_hash` before inserting; if the chat already exists with the same hash and a cached summary, the summary is preserved across the upsert (the parser's `summary` field would otherwise overwrite a lazy-generated one).
36
+ - `tools.get_chat` defaults lowered to fit comfortably inside the Claude Code MCP token budget: `messages_limit` 20 → 10, per-message text cap 3000 → 1500 chars. Worst-case response ~17k chars (was ~62k, which occasionally exceeded the client limit and triggered the "result saved to file" fallback). Hard max `messages_limit=100` unchanged; callers needing more detail can opt in explicitly. Docstrings updated so Claude paginates with `messages_offset=10` on long chats.
37
+ - `ROADMAP.md` and `DEVLOG.md` translated to English (previously Spanish, kept as internal journal). README note about Spanish internal docs removed.
38
+
39
+ ### Added (chat ↔ repo association, Phase 3 sub-task 2)
40
+ - New `repos` and `chat_repos` tables (many-to-many with `source ∈ {'auto', 'manual'}`, `confidence`, cascade FKs on both ends).
41
+ - New `core/repos/` module:
42
+ - `keys.py`: `normalize_path`, `normalize_remote` (SCP/HTTPS git URLs), `canonical_repo_key` (prefers remote over path).
43
+ - `discovery.py`: `parse_repo(path)` reads `.git/config` and `pyproject.toml`/`package.json`/`Cargo.toml`; produces a `RepoInfo`. `ChatRepoAssociation` dataclass for joined rows.
44
+ - `matcher.py`: `match_text(text, repos, threshold)` returns `Match(repo_key, confidence)` per repo with four signals (remote URL 1.0, path 0.9, manifest name 0.8, display name 0.5; highest wins per repo).
45
+ - Storage helpers in `core/storage/repo.py`: `insert_repo`, `get_repo`, `list_repos`, `delete_repo`, `associate_chat_repo` (refuses to overwrite `manual` with `auto`), `dissociate_chat_repo`, `list_repos_for_conversation` (joined, hydrated), `list_conversations_for_repo`.
46
+ - Pipeline auto-scan at ingest: `_ingest_conversation` runs the matcher against all registered repos after persisting the conv and upserts `source='auto'` associations. No-op when no repos are registered.
47
+ - CLI: new `memex repos` sub-app (`add`, `list`, `remove`, `scan`) and top-level `memex tag` / `memex untag` for manual overrides.
48
+ - `tools.search_chats(query, ..., repo=...)` accepts a path / git remote URL / canonical key. Resolves it via `_resolve_repo_key`, then `_apply_repo_boost` lowers the distance of associated hits by `REPO_BOOST_WEIGHT (0.3) * confidence` and re-sorts. Oversamples candidates (×5) when boosting so chats just outside the top-N can surface. Unregistered repo argument short-circuits with an actionable error pointing at `memex repos add`.
49
+ - `stdio.search_chats` MCP wrapper exposes `repo` to Claude Code; docstring instructs it to pass the cwd when working inside a repo.
50
+ - 65 new unit tests across keys (21), discovery (11), matcher (15), storage helpers (18), pipeline auto-scan (4), CLI (15), and search boost / resolve (7).
51
+
52
+ ### Added (SessionStart hook + find_related, Phase 3 sub-tasks 3 and 4)
53
+ - `memex session-context` CLI command. Auto-detects the active repo from cwd (new `find_repo_root` walks up looking for `.git`, handles both directory and gitlink-file forms used by worktrees). Resolves to a registered repo, prints a short Markdown blob with up to N associated chats (manual first, then auto by confidence). Designed to be wired into Claude Code's `SessionStart` hook in `.claude/settings.json`. Silent no-op when no `.git`, repo not registered, or no associations (diagnostics go to stderr).
54
+ - `_resolve_repo_key` extracted from `transports/tools.py` to new `core/repos/resolve.py`. Single source of truth shared by `search_chats(repo=...)`, `find_related(repo=...)`, and the new session-context command.
55
+ - `find_related(context, limit, repo)` MCP tool: takes free-form text and returns semantically similar chats via pure vector search. Capped at `FIND_RELATED_MAX_INPUT_CHARS=4000` chars to bound embedder latency. Same repo-boost mechanic as `search_chats`. Wired into `stdio.py` as the 4th MCP tool with docstring guiding Claude when to prefer it over `search_chats`.
56
+ - 16 new unit tests: 5 for the session-context CLI (no-git, unregistered, no-associations, prints associated, limit respected), 4 for `find_repo_root`, 7 for `find_related` (empty context, shape, truncation, limit clamp, unknown repo, boost reorders, embedder error).
57
+
58
+ ## [0.0.2] - 2026-05-20
59
+
60
+ Phase 2 closed. Live capture + hybrid search work end-to-end. First public-facing polish (badges, screenshot, CONTRIBUTING, CHANGELOG, CI). Windows autostart as preview of Phase 5. Closing audit applied, 3 important fixes + 4 minor fixes landed in this release.
61
+
62
+ ### Added
63
+ - CI workflow on GitHub Actions (`ruff check`, `ruff format --check`, `mypy`, unit tests). Read-only permissions, 10 min job timeout.
64
+ - `CONTRIBUTING.md` with local setup, code style, and PR workflow.
65
+ - This `CHANGELOG.md`.
66
+ - Badges in the README: CI status, License MIT, Python 3.12+.
67
+ - "Session memory check" screenshot in the README, embedded as end-to-end demo of the live capture + recall flow.
68
+ - Chrome extension icons (16/32/48/128 PNG + SVG source under `chrome-extension/icons/`). Manifest declares them both top-level (`icons`) and in `action.default_icon` so the toolbar and the extension page render the brand instead of the gray placeholder.
69
+ - Tests for `memex serve` CLI (CliRunner mocking `uvicorn.run` and `connect_and_init`) and for ingest rollback when the embedder fails mid-batch (closes two audit follow-ups from 2026-05-19).
70
+ - Windows autostart for the HTTP server (Phase 5 preview): `scripts/install-autostart.ps1` registers a Scheduled Task running `uv run memex serve` at log on, with `LogonType S4U` (no window, independent from the shell that triggered it) and auto-restart on failure. Manage with `-Install` / `-Uninstall` / `-Status`. Logs to `%LOCALAPPDATA%\Memex\serve.log`. The cross-platform formal version (`memex install-service` with systemd / launchd backends) stays on the Phase 5 roadmap.
71
+
72
+ ### Changed
73
+ - README translated fully to English. `ROADMAP.md` and `DEVLOG.md` remain in Spanish (internal journal).
74
+ - `ruff format` applied across `src/` and `tests/` (16 files reformatted, semantics unchanged). The check is now back in CI.
75
+ - Comment in `transports/http_ingest.py::_get_conn` rewritten to accurately describe the threading model and the future invariant for background tasks.
76
+
77
+ ### Fixed
78
+ - `scripts/_run-server.ps1`: log file no longer has mixed encoding. The previous version used `Out-File -Encoding utf8` for the banner line plus `*>> $LogFile` for the server output, but PowerShell 5.1's `*>>` defaults to UTF-16 LE, garbling the file. Now uses `2>&1 | Out-File -Encoding utf8` for consistent UTF-8.
79
+ - `chrome-extension/src/popup.js`: replaced `innerHTML` with DOM API (`createElement` + `textContent`) when rendering recent error entries. Defense-in-depth even though the only data source is the local server.
80
+ - `scripts/install-autostart.ps1`: `New-Item -Force -ItemType Directory` instead of `Test-Path` + `New-Item`. Consistent with the wrapper script and removes a theoretical race between check and create.
81
+ - Removed em dashes used as connectors (project rule) in `popup.js` (`"—"` placeholder and `— ${fmtAgo()}` separator).
82
+
83
+ ## Phase 2 (in progress)
84
+
85
+ Goal: live capture from claude.ai + hybrid search good enough that Claude Code actually finds the right chat.
86
+
87
+ ### Added
88
+ - Hybrid search (`hybrid` mode) combining vector search and FTS5 BM25 via Reciprocal Rank Fusion. Default for `search_chats`. Fixes the "Amarok" case where lexical-only beats semantic-only on proper nouns.
89
+ - `search_chats(mode=...)` parameter accepting `hybrid`, `semantic`, `lexical`.
90
+ - `memex reindex-fts` CLI command to populate FTS5 index on databases created before hybrid landed.
91
+ - Local HTTP ingest server (`memex serve`) on `127.0.0.1:5777` for live capture.
92
+ - Chrome extension (MV3) that captures conversations from claude.ai and posts them to the local server.
93
+ - `fastembed` embedder as the new zero-config default (130 MB quantized ONNX model). Ollama moved to opt-in via `MEMEX_EMBED_BACKEND=ollama` and the `[ollama]` extra.
94
+
95
+ ### Changed
96
+ - `ollama` dependency moved to `[project.optional-dependencies]` under the `ollama` extra. Install with `uv pip install -e .[ollama]` if needed.
97
+ - `starlette>=0.40` and `uvicorn>=0.30` promoted to direct dependencies (no longer relying on transitive resolution through `fastmcp`).
98
+ - `cli/main.py` no longer hardcodes "embedder: Ollama"; it reports the active backend (`settings.embed_backend`) and the model name reported by the embedder instance.
99
+ - Chrome extension popup readable in dark mode.
100
+ - Chrome extension `inject.js` uses `window.location.origin` as `postMessage` target instead of `"*"`, preventing other page-world scripts on claude.ai from intercepting captured chat JSON.
101
+ - Chrome extension `manifest.json` adds an explicit CSP (`script-src 'self'; connect-src 'self' http://127.0.0.1:5777 http://localhost:5777`).
102
+
103
+ ### Fixed
104
+ - `transports/stdio.py` no longer leaks raw exception messages to MCP clients; it returns `Error interno ({Type})` and logs the detail server-side.
105
+ - Ollama embedder catches `httpx.ConnectError`, `ConnectTimeout`, `ReadTimeout`, `RemoteProtocolError` explicitly before falling back to substring matching.
106
+ - `_to_iso` in `storage/repo.py` uses `strftime` instead of `replace("+00:00", "Z")`; robust to non-UTC zones.
107
+ - `tools.search_chats(mode="lexical")` raises a clear error when the query sanitizes to empty (previously returned `[]` silently).
108
+ - Chrome extension `background.js` retries 3 times with backoff (2s, 8s) on network errors, covering the case where fastembed downloads the model on first ingest and the server takes 30-60s to respond.
109
+
110
+ ### Removed
111
+ - Empty `src/memex/core/retrieval/` directory.
112
+ - `pytest-cov` from dev dependencies (was not used in CI or docs).
113
+
114
+ ## Phase 1 (closed)
115
+
116
+ Goal: ingestion pipeline + storage + first MCP tools working end-to-end.
117
+
118
+ ### Added
119
+ - `core/ingest/` pipeline parsing the official Claude.ai export (`conversations.json`, `users/*/design_chats/*.json`, `memories.json`) into Project, Conversation, Message, Chunk models.
120
+ - `core/storage/` over SQLite + sqlite-vec with FTS5 enabled.
121
+ - `core/embeddings/` with Embedder ABC, Ollama implementation, and `FakeEmbedder` for tests.
122
+ - MCP server (`memex-mcp`) over stdio with `search_chats`, `get_chat`, `list_recent_chats`.
123
+ - CLI (`memex`) with `ingest`, `search`, `stats` commands.
124
+
125
+ ## Phase 0 (closed)
126
+
127
+ Goal: project scaffold + decisions of record.
128
+
129
+ ### Added
130
+ - Initial pyproject.toml, uv setup, package layout (`src/memex/`).
131
+ - Pydantic settings (`config.py`).
132
+ - Test infrastructure (pytest, asyncio mode, `not integration` marker).
133
+ - `CLAUDE.md`, `ROADMAP.md`, `DEVLOG.md` for project context.
@@ -0,0 +1,113 @@
1
+ # CLAUDE.md
2
+
3
+ Context and rules for any Claude Code instance working in this repo (including parallel worktrees).
4
+
5
+ ## Project idea in one line
6
+
7
+ The context Claude.ai has should also be available to Claude Code. Everything else (storage, embeddings, MCP, capture) is plumbing to get there.
8
+
9
+ Full detail in [README.md](README.md) and [ROADMAP.md](ROADMAP.md).
10
+
11
+ ## Working rules (apply ALWAYS)
12
+
13
+ 1. **Read the code before and after editing.** Before so you do not break anything, after to verify what ended up there.
14
+ 2. **Keep README, ROADMAP, and DEVLOG in sync with every relevant change.** Update them in the same iteration as the code.
15
+ 3. **Review the code you just wrote for bugs** before closing the task.
16
+ 4. **When closing each ROADMAP phase, audit the whole project** for bugs, obsolete code, and vulnerabilities. Deliver a written report.
17
+ 5. **Plan before coding.** No writing code without a clear plan.
18
+ 6. **If there are real doubts, ask.** Do not assume.
19
+ 7. **Code and plans designed to scale.** Clear separation of responsibilities (pure core, swappable transport, embedder and storage behind interfaces).
20
+ 8. **No em dashes as connectors.** Use commas, periods, parentheses. Applies to docs, commits, code, and replies to the user.
21
+ 9. **No Claude shoutouts in commits.** No `Co-Authored-By`, no AI footers. Commits signed only by the human author.
22
+ 10. **Apply these rules in every iteration.**
23
+
24
+ ## Stack
25
+
26
+ - Python 3.12+, package manager [uv](https://docs.astral.sh/uv/).
27
+ - [FastMCP](https://github.com/jlowin/fastmcp) for the MCP server (supports stdio and SSE/HTTP).
28
+ - SQLite + [sqlite-vec](https://github.com/asg017/sqlite-vec) for storage and vector search.
29
+ - [fastembed](https://github.com/qdrant/fastembed) by default (zero-config, embedded ONNX) or optional [Ollama](https://ollama.com) with `nomic-embed-text`. Backend configurable via `MEMEX_EMBED_BACKEND`.
30
+ - `pydantic` + `pydantic-settings` for config and models.
31
+ - `typer` + `rich` for CLI.
32
+ - `pytest`, `ruff`, `mypy` for test/lint/typecheck.
33
+
34
+ ## Architecture
35
+
36
+ ```
37
+ src/memex/
38
+ ├── config.py ← settings with pydantic-settings (DONE)
39
+ ├── core/ ← pure library, no transport
40
+ │ ├── models.py ← Project, Conversation, Message, Chunk, SearchHit
41
+ │ ├── storage/ ← SQLite + sqlite-vec + FTS5 (schema, db, repo)
42
+ │ └── ingest/ ← parsers + chunker + pipeline (content_renderer, chunker, claude_export, pipeline)
43
+ ├── core/embeddings/ ← factory + interfaces
44
+ │ ├── base.py ← Embedder ABC + EmbedderError + l2_normalize
45
+ │ ├── fastembed_embedder.py ← default (ONNX, zero-config)
46
+ │ ├── ollama.py ← optional (extra `ollama`)
47
+ │ ├── fake.py ← deterministic FakeEmbedder for tests
48
+ │ └── __init__.py ← get_default_embedder() factory based on MEMEX_EMBED_BACKEND
49
+ ├── core/summaries/ ← LLM summarizer (Phase 3)
50
+ │ ├── base.py ← Summarizer ABC + SummarizerError
51
+ │ ├── anthropic_summarizer.py ← real backend, lazy SDK import
52
+ │ ├── fake.py ← deterministic FakeSummarizer for tests
53
+ │ └── __init__.py ← get_default_summarizer() factory, returns None if disabled
54
+ ├── transports/ ← MCP bindings + local HTTP
55
+ │ ├── tools.py ← pure logic of the 3 MCP tools
56
+ │ ├── stdio.py ← stdio MCP entrypoint with FastMCP (memex-mcp)
57
+ │ ├── http_ingest.py ← local HTTP server for live capture (Starlette)
58
+ │ └── http.py ← SSE/HTTP remote MCP (TBD, Phase 4) ← does not exist yet
59
+ └── cli/ ← CLI with typer (ingest, search, stats, serve, reindex-fts)
60
+ ```
61
+
62
+ **Dependency rule:** `core/` does not import from `transports/` or `cli/`. Arrows point inward.
63
+
64
+ **State as of 2026-05-23 (Phase 3 in progress):**
65
+ - Phases 0, 1, and 2 closed with audit. Phase 3 first sub-task closed: on-demand auto-summaries via Claude Haiku (opt-in, lazy at first `search_chats`, persisted).
66
+ - `vector_search`, `text_search`, and `hybrid_search` live in `core/storage/repo.py`. The `core/retrieval/` directory was removed (it was empty); if retrieval logic grows (re-ranking, complex filters), it gets recreated with real content.
67
+ - `transports/http.py` does not exist yet; Phase 4 adds it when the remote MCP is built. Live capture uses `transports/http_ingest.py` (a different local server, not the MCP).
68
+
69
+ ## Common commands
70
+
71
+ ```bash
72
+ uv sync # install deps + create .venv
73
+ uv sync --extra summaries # also install anthropic SDK (for the optional summaries feature)
74
+ uv run pytest # tests (-m 'not integration' to skip integration)
75
+ uv run ruff check src tests # lint
76
+ uv run ruff format src tests # format
77
+ uv run mypy src/memex/core # type check (strict in core)
78
+ uv run memex --help # CLI (ingest, search, stats, serve, reindex-fts)
79
+ uv run memex-mcp # stdio MCP server (for Claude Code / Desktop)
80
+ uv run memex serve # local HTTP server for live capture from Chrome ext
81
+ ```
82
+
83
+ ## Multi-Claude with git worktrees
84
+
85
+ To run several Claudes in parallel on independent tasks:
86
+
87
+ ```bash
88
+ git worktree add ../Memex-ingest feature/ingest
89
+ git worktree add ../Memex-embed feature/embeddings
90
+ git worktree add ../Memex-store feature/storage
91
+ ```
92
+
93
+ Each worktree is a separate folder with its own branch and its own `.venv` (uv isolates on its own). They converge to the same `.git`. Each Claude works without stepping on files, and at merge time all changes land in the same repo.
94
+
95
+ **Limits:** worktrees do not see each other until merge. Better to split by independent module, not by cross-cutting feature. The coordinator is the human (or a "lead" Claude on `main`).
96
+
97
+ ## Sensitive data
98
+
99
+ Everything in `data/` is personal and NEVER goes to the repo (already excluded by `.gitignore`):
100
+ - `data/exports/*.zip`: Claude.ai exports with real conversations.
101
+ - `data/memex.db`: SQLite database with indexed chats.
102
+
103
+ The `MEMEX.md` file is also in `.gitignore` because it is an internal context document (SyncChat handoff), not for users.
104
+
105
+ ## Commit conventions
106
+
107
+ - Clear messages, imperative mood, English or Spanish (whichever is consistent within the message).
108
+ - No `Co-Authored-By: Claude...`. No AI footers. No `Generated with Claude Code`.
109
+ - One commit per logical unit of change.
110
+
111
+ ## Persistent memory
112
+
113
+ There is project memory at `C:\Users\dioni\.claude\projects\d--Dionisio-Memex\memory\`. It contains workflow rules, user context, setup decisions. Read it at the start of each session.
@@ -0,0 +1,83 @@
1
+ # Contributing to Memex
2
+
3
+ Thanks for taking a look. Memex is pre-alpha, runs from source, and the roadmap is driven by a single user-facing goal: give Claude Code the same context Claude.ai already has. Phases and close criteria live in [ROADMAP.md](ROADMAP.md); the project journal in [DEVLOG.md](DEVLOG.md).
4
+
5
+ ## Scope of contributions
6
+
7
+ Welcome:
8
+
9
+ - Bug reports with reproducible steps.
10
+ - Discussion on tool API shape (`search_chats`, `get_chat`, `list_recent_chats`) and how Claude actually uses them in practice.
11
+ - Improvements to the live capture path (Chrome extension + HTTP ingest server) for sites or flows that break.
12
+ - Better embedders, retrievers, or chunking, with benchmarks.
13
+
14
+ Out of scope for now:
15
+
16
+ - Packaging for distribution (Phase 5 work, owned).
17
+ - UI other than the Chrome extension.
18
+ - Provider integrations beyond Claude.ai (focus first).
19
+
20
+ If something is unclear, open an issue before writing code. Saves time on both sides.
21
+
22
+ ## Local setup
23
+
24
+ Requirements: Python 3.12+ and [uv](https://docs.astral.sh/uv/).
25
+
26
+ ```bash
27
+ git clone https://github.com/dioniipereyraa/memex
28
+ cd memex
29
+ uv sync --extra dev
30
+ ```
31
+
32
+ The `dev` extra brings pytest, ruff, mypy, and `ollama` (Python client, used by the optional Ollama backend).
33
+
34
+ ## Running checks locally
35
+
36
+ The same checks CI runs:
37
+
38
+ ```bash
39
+ uv run ruff check src tests
40
+ uv run ruff format --check src tests
41
+ uv run mypy src/memex/core src/memex/config.py src/memex/transports
42
+ uv run pytest tests/unit -q
43
+ ```
44
+
45
+ Integration tests live under `tests/integration/` and need external services (Ollama). Run with:
46
+
47
+ ```bash
48
+ uv run pytest tests/integration -q
49
+ ```
50
+
51
+ Skipped by default in CI.
52
+
53
+ ## Code style
54
+
55
+ - **No em dashes (`—`) as connectors.** Use commas, periods, parentheses. Applies to docs, commits, code, and PR descriptions.
56
+ - **No AI footers in commits.** No `Co-Authored-By: Claude...`, no `Generated with ...`. Commits signed by the human author.
57
+ - **Imperative mood in commit messages.** Spanish or English, pick one per message and stay consistent.
58
+ - **Read before you edit, read after.** Verify what you wrote landed as intended.
59
+ - **Plan before you code.** No stream-of-consciousness implementations.
60
+ - **Architecture rule:** `core/` does not import from `transports/` or `cli/`. Dependencies point inward.
61
+
62
+ ## Pull request workflow
63
+
64
+ 1. Branch off `main`. Conventional names like `feat/...`, `fix/...`, `docs/...`, `chore/...`.
65
+ 2. Make the change, with tests when it is testable. Match existing test style (see `tests/unit/` for examples).
66
+ 3. Run all local checks (lint, format, mypy, tests). CI runs the same set; if it is green locally, it should be green there.
67
+ 4. Open a PR with a description that explains the *why*, not just the *what*. The diff already shows the what.
68
+ 5. Update `DEVLOG.md` if the change is non-trivial (new feature, behavior change, architecture decision). Skip for pure refactors or test additions.
69
+
70
+ ## Reporting bugs
71
+
72
+ Open an issue with:
73
+
74
+ - What you expected.
75
+ - What happened instead.
76
+ - Steps to reproduce (the smaller the better).
77
+ - Output of `memex stats` and `uv run python -V` if relevant.
78
+
79
+ If it involves the Chrome extension, include the extension version and browser version.
80
+
81
+ ## License
82
+
83
+ By contributing, you agree your contribution is licensed under the [MIT License](LICENSE) of the project.