memex-chats 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memex_chats-0.1.0/.env.example +46 -0
- memex_chats-0.1.0/.github/workflows/ci.yml +40 -0
- memex_chats-0.1.0/.gitignore +78 -0
- memex_chats-0.1.0/.python-version +1 -0
- memex_chats-0.1.0/CHANGELOG.md +133 -0
- memex_chats-0.1.0/CLAUDE.md +113 -0
- memex_chats-0.1.0/CONTRIBUTING.md +83 -0
- memex_chats-0.1.0/DEVLOG.md +984 -0
- memex_chats-0.1.0/LICENSE +21 -0
- memex_chats-0.1.0/PKG-INFO +322 -0
- memex_chats-0.1.0/PRIVACY.md +55 -0
- memex_chats-0.1.0/README.md +278 -0
- memex_chats-0.1.0/ROADMAP.md +137 -0
- memex_chats-0.1.0/chrome-extension/README.md +75 -0
- memex_chats-0.1.0/chrome-extension/WEB_STORE_CHECKLIST.md +149 -0
- memex_chats-0.1.0/chrome-extension/icons/icon-128.png +0 -0
- memex_chats-0.1.0/chrome-extension/icons/icon-16.png +0 -0
- memex_chats-0.1.0/chrome-extension/icons/icon-32.png +0 -0
- memex_chats-0.1.0/chrome-extension/icons/icon-48.png +0 -0
- memex_chats-0.1.0/chrome-extension/icons/icon.svg +6 -0
- memex_chats-0.1.0/chrome-extension/manifest.json +46 -0
- memex_chats-0.1.0/chrome-extension/src/background.js +193 -0
- memex_chats-0.1.0/chrome-extension/src/content.js +12 -0
- memex_chats-0.1.0/chrome-extension/src/inject.js +112 -0
- memex_chats-0.1.0/chrome-extension/src/popup.html +150 -0
- memex_chats-0.1.0/chrome-extension/src/popup.js +95 -0
- memex_chats-0.1.0/docs/screenshots/session-memory-check.jpeg +0 -0
- memex_chats-0.1.0/pyproject.toml +95 -0
- memex_chats-0.1.0/scripts/_run-server.ps1 +40 -0
- memex_chats-0.1.0/scripts/inspect_export.py +297 -0
- memex_chats-0.1.0/scripts/install-autostart.ps1 +179 -0
- memex_chats-0.1.0/scripts/install-autostart.sh +145 -0
- memex_chats-0.1.0/src/memex/__init__.py +3 -0
- memex_chats-0.1.0/src/memex/cli/__init__.py +0 -0
- memex_chats-0.1.0/src/memex/cli/main.py +911 -0
- memex_chats-0.1.0/src/memex/config.py +72 -0
- memex_chats-0.1.0/src/memex/core/__init__.py +0 -0
- memex_chats-0.1.0/src/memex/core/embeddings/__init__.py +47 -0
- memex_chats-0.1.0/src/memex/core/embeddings/base.py +70 -0
- memex_chats-0.1.0/src/memex/core/embeddings/fake.py +66 -0
- memex_chats-0.1.0/src/memex/core/embeddings/fastembed_embedder.py +88 -0
- memex_chats-0.1.0/src/memex/core/embeddings/ollama.py +98 -0
- memex_chats-0.1.0/src/memex/core/ingest/__init__.py +0 -0
- memex_chats-0.1.0/src/memex/core/ingest/chunker.py +68 -0
- memex_chats-0.1.0/src/memex/core/ingest/claude_export.py +235 -0
- memex_chats-0.1.0/src/memex/core/ingest/content_renderer.py +95 -0
- memex_chats-0.1.0/src/memex/core/ingest/pipeline.py +397 -0
- memex_chats-0.1.0/src/memex/core/models.py +118 -0
- memex_chats-0.1.0/src/memex/core/repos/__init__.py +42 -0
- memex_chats-0.1.0/src/memex/core/repos/discovery.py +176 -0
- memex_chats-0.1.0/src/memex/core/repos/keys.py +96 -0
- memex_chats-0.1.0/src/memex/core/repos/matcher.py +137 -0
- memex_chats-0.1.0/src/memex/core/repos/resolve.py +40 -0
- memex_chats-0.1.0/src/memex/core/storage/__init__.py +0 -0
- memex_chats-0.1.0/src/memex/core/storage/db.py +123 -0
- memex_chats-0.1.0/src/memex/core/storage/repo.py +859 -0
- memex_chats-0.1.0/src/memex/core/storage/schema.sql +147 -0
- memex_chats-0.1.0/src/memex/core/summaries/__init__.py +39 -0
- memex_chats-0.1.0/src/memex/core/summaries/anthropic_summarizer.py +115 -0
- memex_chats-0.1.0/src/memex/core/summaries/base.py +53 -0
- memex_chats-0.1.0/src/memex/core/summaries/fake.py +44 -0
- memex_chats-0.1.0/src/memex/transports/__init__.py +0 -0
- memex_chats-0.1.0/src/memex/transports/http_ingest.py +172 -0
- memex_chats-0.1.0/src/memex/transports/stdio.py +320 -0
- memex_chats-0.1.0/src/memex/transports/tools.py +551 -0
- memex_chats-0.1.0/tests/__init__.py +0 -0
- memex_chats-0.1.0/tests/conftest.py +121 -0
- memex_chats-0.1.0/tests/integration/__init__.py +0 -0
- memex_chats-0.1.0/tests/integration/test_full_flow.py +72 -0
- memex_chats-0.1.0/tests/integration/test_ollama_embedder.py +93 -0
- memex_chats-0.1.0/tests/unit/__init__.py +0 -0
- memex_chats-0.1.0/tests/unit/test_chunker.py +105 -0
- memex_chats-0.1.0/tests/unit/test_claude_export.py +310 -0
- memex_chats-0.1.0/tests/unit/test_cli.py +316 -0
- memex_chats-0.1.0/tests/unit/test_cli_repos.py +404 -0
- memex_chats-0.1.0/tests/unit/test_content_renderer.py +139 -0
- memex_chats-0.1.0/tests/unit/test_embedder_errors.py +92 -0
- memex_chats-0.1.0/tests/unit/test_embedder_factory.py +89 -0
- memex_chats-0.1.0/tests/unit/test_embeddings.py +96 -0
- memex_chats-0.1.0/tests/unit/test_http_ingest.py +203 -0
- memex_chats-0.1.0/tests/unit/test_models.py +157 -0
- memex_chats-0.1.0/tests/unit/test_pipeline.py +651 -0
- memex_chats-0.1.0/tests/unit/test_repos_discovery.py +214 -0
- memex_chats-0.1.0/tests/unit/test_repos_keys.py +109 -0
- memex_chats-0.1.0/tests/unit/test_repos_matcher.py +187 -0
- memex_chats-0.1.0/tests/unit/test_repos_storage.py +193 -0
- memex_chats-0.1.0/tests/unit/test_stdio_server.py +112 -0
- memex_chats-0.1.0/tests/unit/test_storage.py +708 -0
- memex_chats-0.1.0/tests/unit/test_summaries.py +145 -0
- memex_chats-0.1.0/tests/unit/test_tools.py +806 -0
- memex_chats-0.1.0/uv.lock +2263 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Copy to .env and adjust values. .env is in .gitignore.
|
|
2
|
+
|
|
3
|
+
# Embeddings backend. Default: fastembed (zero-config, ONNX model embedded,
|
|
4
|
+
# downloads itself the first time). Alternative: ollama (requires a local
|
|
5
|
+
# Ollama instance running).
|
|
6
|
+
MEMEX_EMBED_BACKEND=fastembed
|
|
7
|
+
|
|
8
|
+
# Model name. If unset, each backend uses its default:
|
|
9
|
+
# - fastembed: nomic-ai/nomic-embed-text-v1.5-Q (~130 MB quantized)
|
|
10
|
+
# - ollama: nomic-embed-text
|
|
11
|
+
# MEMEX_EMBED_MODEL=nomic-ai/nomic-embed-text-v1.5
|
|
12
|
+
|
|
13
|
+
# Only used if MEMEX_EMBED_BACKEND=ollama
|
|
14
|
+
OLLAMA_HOST=http://localhost:11434
|
|
15
|
+
|
|
16
|
+
# SQLite database path (default: ./data/memex.db)
|
|
17
|
+
MEMEX_DB_PATH=./data/memex.db
|
|
18
|
+
|
|
19
|
+
# Directory where you keep official Claude.ai exports
|
|
20
|
+
MEMEX_EXPORTS_DIR=./data/exports
|
|
21
|
+
|
|
22
|
+
# Approximate chunk size in tokens (default: 500)
|
|
23
|
+
MEMEX_CHUNK_SIZE=500
|
|
24
|
+
|
|
25
|
+
# Overlap between chunks in tokens (default: 50)
|
|
26
|
+
MEMEX_CHUNK_OVERLAP=50
|
|
27
|
+
|
|
28
|
+
# Log level: DEBUG, INFO, WARNING, ERROR
|
|
29
|
+
MEMEX_LOG_LEVEL=INFO
|
|
30
|
+
|
|
31
|
+
# On-demand auto-summaries with Claude Haiku (opt-in). Requires the extra
|
|
32
|
+
# `summaries`: `uv sync --extra summaries`. When ON, summaries are generated
|
|
33
|
+
# lazily by `search_chats` for top-3 results without a cached summary, in
|
|
34
|
+
# parallel. Off by default to avoid surprise API calls.
|
|
35
|
+
MEMEX_SUMMARY_ENABLED=false
|
|
36
|
+
|
|
37
|
+
# Anthropic model for summary generation. Default: Haiku (cheap, fast).
|
|
38
|
+
# MEMEX_SUMMARY_MODEL=claude-haiku-4-5-20251001
|
|
39
|
+
|
|
40
|
+
# Max tokens for the generated summary. Default 200 (~3 sentences).
|
|
41
|
+
# MEMEX_SUMMARY_MAX_TOKENS=200
|
|
42
|
+
|
|
43
|
+
# Anthropic API key. If MEMEX_SUMMARY_ENABLED=true and this is missing or
|
|
44
|
+
# invalid, the search returns the matching chats without a summary and logs
|
|
45
|
+
# a warning per chat. The search itself never aborts.
|
|
46
|
+
# ANTHROPIC_API_KEY=sk-ant-...
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
timeout-minutes: 10
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v3
|
|
21
|
+
with:
|
|
22
|
+
enable-cache: true
|
|
23
|
+
|
|
24
|
+
- name: Set up Python
|
|
25
|
+
run: uv python install 3.12
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --extra dev
|
|
29
|
+
|
|
30
|
+
- name: Lint (ruff check)
|
|
31
|
+
run: uv run ruff check src tests
|
|
32
|
+
|
|
33
|
+
- name: Format check (ruff format)
|
|
34
|
+
run: uv run ruff format --check src tests
|
|
35
|
+
|
|
36
|
+
- name: Type check (mypy)
|
|
37
|
+
run: uv run mypy src/memex/core src/memex/config.py src/memex/transports
|
|
38
|
+
|
|
39
|
+
- name: Unit tests
|
|
40
|
+
run: uv run pytest tests/unit -q
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Datos personales y secretos
|
|
2
|
+
# Todo lo que esté acá NUNCA debe ir al repo público
|
|
3
|
+
data/
|
|
4
|
+
*.zip
|
|
5
|
+
*.db
|
|
6
|
+
*.db-journal
|
|
7
|
+
*.db-shm
|
|
8
|
+
*.db-wal
|
|
9
|
+
.env
|
|
10
|
+
.env.*
|
|
11
|
+
!.env.example
|
|
12
|
+
|
|
13
|
+
# Documento de contexto interno (handoff doc, no es para usuarios)
|
|
14
|
+
MEMEX.md
|
|
15
|
+
|
|
16
|
+
# Python
|
|
17
|
+
__pycache__/
|
|
18
|
+
*.py[cod]
|
|
19
|
+
*$py.class
|
|
20
|
+
*.so
|
|
21
|
+
.Python
|
|
22
|
+
*.egg-info/
|
|
23
|
+
*.egg
|
|
24
|
+
build/
|
|
25
|
+
dist/
|
|
26
|
+
.eggs/
|
|
27
|
+
|
|
28
|
+
# Entornos virtuales
|
|
29
|
+
.venv/
|
|
30
|
+
venv/
|
|
31
|
+
env/
|
|
32
|
+
ENV/
|
|
33
|
+
|
|
34
|
+
# uv
|
|
35
|
+
.uv/
|
|
36
|
+
|
|
37
|
+
# Testing y coverage
|
|
38
|
+
.pytest_cache/
|
|
39
|
+
.coverage
|
|
40
|
+
.coverage.*
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
coverage.xml
|
|
45
|
+
*.cover
|
|
46
|
+
|
|
47
|
+
# Type checking
|
|
48
|
+
.mypy_cache/
|
|
49
|
+
.pyre/
|
|
50
|
+
.pytype/
|
|
51
|
+
.ruff_cache/
|
|
52
|
+
|
|
53
|
+
# IDEs y editores
|
|
54
|
+
.idea/
|
|
55
|
+
.vscode/
|
|
56
|
+
*.swp
|
|
57
|
+
*.swo
|
|
58
|
+
*~
|
|
59
|
+
.DS_Store
|
|
60
|
+
Thumbs.db
|
|
61
|
+
|
|
62
|
+
# Logs
|
|
63
|
+
*.log
|
|
64
|
+
logs/
|
|
65
|
+
|
|
66
|
+
# Notebooks scratch
|
|
67
|
+
.ipynb_checkpoints/
|
|
68
|
+
|
|
69
|
+
# Memoria local de Claude Code (si llega a aparecer en el working dir)
|
|
70
|
+
.claude/
|
|
71
|
+
|
|
72
|
+
# MCP config local (path absoluto, específico de tu máquina). El snippet de
|
|
73
|
+
# ejemplo va en el README; cada dev crea el suyo.
|
|
74
|
+
.mcp.json
|
|
75
|
+
|
|
76
|
+
# Handoff doc entre sesiones de trabajo. Es contexto interno (qué estábamos
|
|
77
|
+
# haciendo, qué falta, decisiones abiertas), no para usuarios del repo.
|
|
78
|
+
handoff.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to Memex are documented here.
|
|
4
|
+
|
|
5
|
+
Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). `0.1.0` is the first alpha release; before it the project lived in `0.0.x`.
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
### Added (Phase 5 packaging, 2026-05-25)
|
|
10
|
+
- `memex doctor` diagnostic command. Checks Python version, database existence + schema version, embedder instantiability, live-capture server reachability, summarizer configuration (only if enabled), registered repos count, and indexed corpus count. Reports OK / WARN / FAIL per check, exits non-zero only on FAIL. 4 new unit tests.
|
|
11
|
+
- `memex install-service` cross-platform autostart dispatcher. Detects host OS and delegates: Windows runs the existing Scheduled Task installer, Linux writes a new systemd user unit (`~/.config/systemd/user/memex-serve.service`) and starts it via `systemctl --user`. macOS prints manual instructions (launchd integration deferred to 0.2.0). 6 new unit tests covering the dispatch logic with mocked `platform.system` and `subprocess.run`.
|
|
12
|
+
- New `scripts/install-autostart.sh` for Linux. Subcommands `install`, `uninstall`, `status`. Resolves `uv` lazily at install time, falls back to PATH lookup if `uv` is not absolute. Auto-creates `~/.local/state/memex/` for logs.
|
|
13
|
+
- `chrome-extension/WEB_STORE_CHECKLIST.md`: full Web Store submission playbook (developer account, privacy policy URL, asset sizes, listing copy, permissions justification, post-approval checklist).
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- Package renamed from `memex` to `memex-chats` for PyPI publication. Both `memex` and `memex-mcp` are already taken on PyPI by unrelated projects (the latter was claimed the same day we attempted to publish). The CLI entry points stay `memex` and `memex-mcp`, so `.mcp.json` configs do not change. `Development Status` classifier bumped from `Pre-Alpha` to `Alpha`. Added `Operating System :: OS Independent` classifier. New `[project.urls]` section with Homepage / Repository / Issues / Changelog links.
|
|
17
|
+
- README quickstart restructured: "install from PyPI" is now the recommended path (option A), source install is option B. Diagnostics section added linking `memex doctor`. Autostart section unified across Windows + Linux + macOS placeholder.
|
|
18
|
+
- Chrome extension manifest description translated to English (was the last Spanish string in the extension).
|
|
19
|
+
|
|
20
|
+
## [0.1.0] - 2026-05-24
|
|
21
|
+
|
|
22
|
+
Phase 3 closed: quality pass on retrieval. All four feature sub-tasks shipped and audited.
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
- Optional auto-summary generation per chat, powered by Claude Haiku via the Anthropic API. Opt-in by setting `MEMEX_SUMMARY_ENABLED=true` and `ANTHROPIC_API_KEY`. Summaries are generated lazily when `search_chats` returns a chat that does not have one cached: up to 3 in parallel per call (`ThreadPoolExecutor`), silent fail per chat if the API errors. The summary is stored in `conversations.summary` and persists, so subsequent searches hit cache and do not pay the API again.
|
|
26
|
+
- `core/summaries/` module: `Summarizer` ABC, `AnthropicSummarizer` (real backend, lazy import of the SDK), `FakeSummarizer` (deterministic, used in tests), `get_default_summarizer()` factory that returns `None` when the feature flag is off.
|
|
27
|
+
- `conversations.content_hash` column (SHA-256 hex of canonical text). The pipeline computes and persists it on every ingest. Lets the lazy summarizer (and future consumers) detect content changes so a stale summary can be invalidated.
|
|
28
|
+
- `repo.get_conversation_text(uuid)` reconstructs the canonical message stream of a chat (same format the chunker uses); `repo.update_conversation_summary(uuid, text)` patches only the summary field without touching the rest of the row.
|
|
29
|
+
- `anthropic>=0.40` as a new optional dependency: install with `uv sync --extra summaries`.
|
|
30
|
+
- Additive schema migration (`_apply_additive_migrations` in `db.py`) so existing local databases gain `content_hash` without a reset.
|
|
31
|
+
- `stdio.search_chats` resolves the summarizer once per process via `get_default_summarizer()` and passes it through.
|
|
32
|
+
- 24 new unit tests covering `FakeSummarizer`, the factory, `AnthropicSummarizer` error paths, the lazy wire in `tools.search_chats` (no-summarizer path, generation for missing summaries, cache reuse, cap at 3, persistence to DB, per-chat silent fail), pipeline `content_hash` persistence, cached-summary preservation across same-content reingests, and the additive schema migration on a legacy database.
|
|
33
|
+
|
|
34
|
+
### Changed
|
|
35
|
+
- `_ingest_conversation` computes the canonical text and `content_hash` before inserting; if the chat already exists with the same hash and a cached summary, the summary is preserved across the upsert (the parser's `summary` field would otherwise overwrite a lazy-generated one).
|
|
36
|
+
- `tools.get_chat` defaults lowered to fit comfortably inside the Claude Code MCP token budget: `messages_limit` 20 → 10, per-message text cap 3000 → 1500 chars. Worst-case response ~17k chars (was ~62k, which occasionally exceeded the client limit and triggered the "result saved to file" fallback). Hard max `messages_limit=100` unchanged; callers needing more detail can opt in explicitly. Docstrings updated so Claude paginates with `messages_offset=10` on long chats.
|
|
37
|
+
- `ROADMAP.md` and `DEVLOG.md` translated to English (previously Spanish, kept as internal journal). README note about Spanish internal docs removed.
|
|
38
|
+
|
|
39
|
+
### Added (chat ↔ repo association, Phase 3 sub-task 2)
|
|
40
|
+
- New `repos` and `chat_repos` tables (many-to-many with `source ∈ {'auto', 'manual'}`, `confidence`, cascade FKs on both ends).
|
|
41
|
+
- New `core/repos/` module:
|
|
42
|
+
- `keys.py`: `normalize_path`, `normalize_remote` (SCP/HTTPS git URLs), `canonical_repo_key` (prefers remote over path).
|
|
43
|
+
- `discovery.py`: `parse_repo(path)` reads `.git/config` and `pyproject.toml`/`package.json`/`Cargo.toml`; produces a `RepoInfo`. `ChatRepoAssociation` dataclass for joined rows.
|
|
44
|
+
- `matcher.py`: `match_text(text, repos, threshold)` returns `Match(repo_key, confidence)` per repo with four signals (remote URL 1.0, path 0.9, manifest name 0.8, display name 0.5; highest wins per repo).
|
|
45
|
+
- Storage helpers in `core/storage/repo.py`: `insert_repo`, `get_repo`, `list_repos`, `delete_repo`, `associate_chat_repo` (refuses to overwrite `manual` with `auto`), `dissociate_chat_repo`, `list_repos_for_conversation` (joined, hydrated), `list_conversations_for_repo`.
|
|
46
|
+
- Pipeline auto-scan at ingest: `_ingest_conversation` runs the matcher against all registered repos after persisting the conv and upserts `source='auto'` associations. No-op when no repos are registered.
|
|
47
|
+
- CLI: new `memex repos` sub-app (`add`, `list`, `remove`, `scan`) and top-level `memex tag` / `memex untag` for manual overrides.
|
|
48
|
+
- `tools.search_chats(query, ..., repo=...)` accepts a path / git remote URL / canonical key. Resolves it via `_resolve_repo_key`, then `_apply_repo_boost` lowers the distance of associated hits by `REPO_BOOST_WEIGHT (0.3) * confidence` and re-sorts. Oversamples candidates (×5) when boosting so chats just outside the top-N can surface. Unregistered repo argument short-circuits with an actionable error pointing at `memex repos add`.
|
|
49
|
+
- `stdio.search_chats` MCP wrapper exposes `repo` to Claude Code; docstring instructs it to pass the cwd when working inside a repo.
|
|
50
|
+
- 65 new unit tests across keys (21), discovery (11), matcher (15), storage helpers (18), pipeline auto-scan (4), CLI (15), and search boost / resolve (7).
|
|
51
|
+
|
|
52
|
+
### Added (SessionStart hook + find_related, Phase 3 sub-tasks 3 and 4)
|
|
53
|
+
- `memex session-context` CLI command. Auto-detects the active repo from cwd (new `find_repo_root` walks up looking for `.git`, handles both directory and gitlink-file forms used by worktrees). Resolves to a registered repo, prints a short Markdown blob with up to N associated chats (manual first, then auto by confidence). Designed to be wired into Claude Code's `SessionStart` hook in `.claude/settings.json`. Silent no-op when no `.git`, repo not registered, or no associations (diagnostics go to stderr).
|
|
54
|
+
- `_resolve_repo_key` extracted from `transports/tools.py` to new `core/repos/resolve.py`. Single source of truth shared by `search_chats(repo=...)`, `find_related(repo=...)`, and the new session-context command.
|
|
55
|
+
- `find_related(context, limit, repo)` MCP tool: takes free-form text and returns semantically similar chats via pure vector search. Capped at `FIND_RELATED_MAX_INPUT_CHARS=4000` chars to bound embedder latency. Same repo-boost mechanic as `search_chats`. Wired into `stdio.py` as the 4th MCP tool with docstring guiding Claude when to prefer it over `search_chats`.
|
|
56
|
+
- 16 new unit tests: 5 for the session-context CLI (no-git, unregistered, no-associations, prints associated, limit respected), 4 for `find_repo_root`, 7 for `find_related` (empty context, shape, truncation, limit clamp, unknown repo, boost reorders, embedder error).
|
|
57
|
+
|
|
58
|
+
## [0.0.2] - 2026-05-20
|
|
59
|
+
|
|
60
|
+
Phase 2 closed. Live capture + hybrid search work end-to-end. First public-facing polish (badges, screenshot, CONTRIBUTING, CHANGELOG, CI). Windows autostart as preview of Phase 5. Closing audit applied, 3 important fixes + 4 minor fixes landed in this release.
|
|
61
|
+
|
|
62
|
+
### Added
|
|
63
|
+
- CI workflow on GitHub Actions (`ruff check`, `ruff format --check`, `mypy`, unit tests). Read-only permissions, 10 min job timeout.
|
|
64
|
+
- `CONTRIBUTING.md` with local setup, code style, and PR workflow.
|
|
65
|
+
- This `CHANGELOG.md`.
|
|
66
|
+
- Badges in the README: CI status, License MIT, Python 3.12+.
|
|
67
|
+
- "Session memory check" screenshot in the README, embedded as end-to-end demo of the live capture + recall flow.
|
|
68
|
+
- Chrome extension icons (16/32/48/128 PNG + SVG source under `chrome-extension/icons/`). Manifest declares them both top-level (`icons`) and in `action.default_icon` so the toolbar and the extension page render the brand instead of the gray placeholder.
|
|
69
|
+
- Tests for `memex serve` CLI (CliRunner mocking `uvicorn.run` and `connect_and_init`) and for ingest rollback when the embedder fails mid-batch (closes two audit follow-ups from 2026-05-19).
|
|
70
|
+
- Windows autostart for the HTTP server (Phase 5 preview): `scripts/install-autostart.ps1` registers a Scheduled Task running `uv run memex serve` at log on, with `LogonType S4U` (no window, independent from the shell that triggered it) and auto-restart on failure. Manage with `-Install` / `-Uninstall` / `-Status`. Logs to `%LOCALAPPDATA%\Memex\serve.log`. The cross-platform formal version (`memex install-service` with systemd / launchd backends) stays on the Phase 5 roadmap.
|
|
71
|
+
|
|
72
|
+
### Changed
|
|
73
|
+
- README translated fully to English. `ROADMAP.md` and `DEVLOG.md` remain in Spanish (internal journal).
|
|
74
|
+
- `ruff format` applied across `src/` and `tests/` (16 files reformatted, semantics unchanged). The check is now back in CI.
|
|
75
|
+
- Comment in `transports/http_ingest.py::_get_conn` rewritten to accurately describe the threading model and the future invariant for background tasks.
|
|
76
|
+
|
|
77
|
+
### Fixed
|
|
78
|
+
- `scripts/_run-server.ps1`: log file no longer has mixed encoding. The previous version used `Out-File -Encoding utf8` for the banner line plus `*>> $LogFile` for the server output, but PowerShell 5.1's `*>>` defaults to UTF-16 LE, garbling the file. Now uses `2>&1 | Out-File -Encoding utf8` for consistent UTF-8.
|
|
79
|
+
- `chrome-extension/src/popup.js`: replaced `innerHTML` with DOM API (`createElement` + `textContent`) when rendering recent error entries. Defense-in-depth even though the only data source is the local server.
|
|
80
|
+
- `scripts/install-autostart.ps1`: `New-Item -Force -ItemType Directory` instead of `Test-Path` + `New-Item`. Consistent with the wrapper script and removes a theoretical race between check and create.
|
|
81
|
+
- Removed em dashes used as connectors (project rule) in `popup.js` (`"—"` placeholder and `— ${fmtAgo()}` separator).
|
|
82
|
+
|
|
83
|
+
## Phase 2 (in progress)
|
|
84
|
+
|
|
85
|
+
Goal: live capture from claude.ai + hybrid search good enough that Claude Code actually finds the right chat.
|
|
86
|
+
|
|
87
|
+
### Added
|
|
88
|
+
- Hybrid search (`hybrid` mode) combining vector search and FTS5 BM25 via Reciprocal Rank Fusion. Default for `search_chats`. Fixes the "Amarok" case where lexical-only beats semantic-only on proper nouns.
|
|
89
|
+
- `search_chats(mode=...)` parameter accepting `hybrid`, `semantic`, `lexical`.
|
|
90
|
+
- `memex reindex-fts` CLI command to populate FTS5 index on databases created before hybrid landed.
|
|
91
|
+
- Local HTTP ingest server (`memex serve`) on `127.0.0.1:5777` for live capture.
|
|
92
|
+
- Chrome extension (MV3) that captures conversations from claude.ai and posts them to the local server.
|
|
93
|
+
- `fastembed` embedder as the new zero-config default (130 MB quantized ONNX model). Ollama moved to opt-in via `MEMEX_EMBED_BACKEND=ollama` and the `[ollama]` extra.
|
|
94
|
+
|
|
95
|
+
### Changed
|
|
96
|
+
- `ollama` dependency moved to `[project.optional-dependencies]` under the `ollama` extra. Install with `uv pip install -e .[ollama]` if needed.
|
|
97
|
+
- `starlette>=0.40` and `uvicorn>=0.30` promoted to direct dependencies (no longer relying on transitive resolution through `fastmcp`).
|
|
98
|
+
- `cli/main.py` no longer hardcodes "embedder: Ollama"; it reports the active backend (`settings.embed_backend`) and the model name reported by the embedder instance.
|
|
99
|
+
- Chrome extension popup readable in dark mode.
|
|
100
|
+
- Chrome extension `inject.js` uses `window.location.origin` as `postMessage` target instead of `"*"`, preventing other page-world scripts on claude.ai from intercepting captured chat JSON.
|
|
101
|
+
- Chrome extension `manifest.json` adds an explicit CSP (`script-src 'self'; connect-src 'self' http://127.0.0.1:5777 http://localhost:5777`).
|
|
102
|
+
|
|
103
|
+
### Fixed
|
|
104
|
+
- `transports/stdio.py` no longer leaks raw exception messages to MCP clients; it returns `Error interno ({Type})` and logs the detail server-side.
|
|
105
|
+
- Ollama embedder catches `httpx.ConnectError`, `ConnectTimeout`, `ReadTimeout`, `RemoteProtocolError` explicitly before falling back to substring matching.
|
|
106
|
+
- `_to_iso` in `storage/repo.py` uses `strftime` instead of `replace("+00:00", "Z")`; robust to non-UTC zones.
|
|
107
|
+
- `tools.search_chats(mode="lexical")` raises a clear error when the query sanitizes to empty (previously returned `[]` silently).
|
|
108
|
+
- Chrome extension `background.js` retries 3 times with backoff (2s, 8s) on network errors, covering the case where fastembed downloads the model on first ingest and the server takes 30-60s to respond.
|
|
109
|
+
|
|
110
|
+
### Removed
|
|
111
|
+
- Empty `src/memex/core/retrieval/` directory.
|
|
112
|
+
- `pytest-cov` from dev dependencies (was not used in CI or docs).
|
|
113
|
+
|
|
114
|
+
## Phase 1 (closed)
|
|
115
|
+
|
|
116
|
+
Goal: ingestion pipeline + storage + first MCP tools working end-to-end.
|
|
117
|
+
|
|
118
|
+
### Added
|
|
119
|
+
- `core/ingest/` pipeline parsing the official Claude.ai export (`conversations.json`, `users/*/design_chats/*.json`, `memories.json`) into Project, Conversation, Message, Chunk models.
|
|
120
|
+
- `core/storage/` over SQLite + sqlite-vec with FTS5 enabled.
|
|
121
|
+
- `core/embeddings/` with Embedder ABC, Ollama implementation, and `FakeEmbedder` for tests.
|
|
122
|
+
- MCP server (`memex-mcp`) over stdio with `search_chats`, `get_chat`, `list_recent_chats`.
|
|
123
|
+
- CLI (`memex`) with `ingest`, `search`, `stats` commands.
|
|
124
|
+
|
|
125
|
+
## Phase 0 (closed)
|
|
126
|
+
|
|
127
|
+
Goal: project scaffold + decisions of record.
|
|
128
|
+
|
|
129
|
+
### Added
|
|
130
|
+
- Initial pyproject.toml, uv setup, package layout (`src/memex/`).
|
|
131
|
+
- Pydantic settings (`config.py`).
|
|
132
|
+
- Test infrastructure (pytest, asyncio mode, `not integration` marker).
|
|
133
|
+
- `CLAUDE.md`, `ROADMAP.md`, `DEVLOG.md` for project context.
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
Context and rules for any Claude Code instance working in this repo (including parallel worktrees).
|
|
4
|
+
|
|
5
|
+
## Project idea in one line
|
|
6
|
+
|
|
7
|
+
The context Claude.ai has should also be available to Claude Code. Everything else (storage, embeddings, MCP, capture) is plumbing to get there.
|
|
8
|
+
|
|
9
|
+
Full detail in [README.md](README.md) and [ROADMAP.md](ROADMAP.md).
|
|
10
|
+
|
|
11
|
+
## Working rules (apply ALWAYS)
|
|
12
|
+
|
|
13
|
+
1. **Read the code before and after editing.** Before so you do not break anything, after to verify what ended up there.
|
|
14
|
+
2. **Keep README, ROADMAP, and DEVLOG in sync with every relevant change.** Update them in the same iteration as the code.
|
|
15
|
+
3. **Review the code you just wrote for bugs** before closing the task.
|
|
16
|
+
4. **When closing each ROADMAP phase, audit the whole project** for bugs, obsolete code, and vulnerabilities. Deliver a written report.
|
|
17
|
+
5. **Plan before coding.** No writing code without a clear plan.
|
|
18
|
+
6. **If there are real doubts, ask.** Do not assume.
|
|
19
|
+
7. **Code and plans designed to scale.** Clear separation of responsibilities (pure core, swappable transport, embedder and storage behind interfaces).
|
|
20
|
+
8. **No em dashes as connectors.** Use commas, periods, parentheses. Applies to docs, commits, code, and replies to the user.
|
|
21
|
+
9. **No Claude shoutouts in commits.** No `Co-Authored-By`, no AI footers. Commits signed only by the human author.
|
|
22
|
+
10. **Apply these rules in every iteration.**
|
|
23
|
+
|
|
24
|
+
## Stack
|
|
25
|
+
|
|
26
|
+
- Python 3.12+, package manager [uv](https://docs.astral.sh/uv/).
|
|
27
|
+
- [FastMCP](https://github.com/jlowin/fastmcp) for the MCP server (supports stdio and SSE/HTTP).
|
|
28
|
+
- SQLite + [sqlite-vec](https://github.com/asg017/sqlite-vec) for storage and vector search.
|
|
29
|
+
- [fastembed](https://github.com/qdrant/fastembed) by default (zero-config, embedded ONNX) or optional [Ollama](https://ollama.com) with `nomic-embed-text`. Backend configurable via `MEMEX_EMBED_BACKEND`.
|
|
30
|
+
- `pydantic` + `pydantic-settings` for config and models.
|
|
31
|
+
- `typer` + `rich` for CLI.
|
|
32
|
+
- `pytest`, `ruff`, `mypy` for test/lint/typecheck.
|
|
33
|
+
|
|
34
|
+
## Architecture
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
src/memex/
|
|
38
|
+
├── config.py ← settings with pydantic-settings (DONE)
|
|
39
|
+
├── core/ ← pure library, no transport
|
|
40
|
+
│ ├── models.py ← Project, Conversation, Message, Chunk, SearchHit
|
|
41
|
+
│ ├── storage/ ← SQLite + sqlite-vec + FTS5 (schema, db, repo)
|
|
42
|
+
│ └── ingest/ ← parsers + chunker + pipeline (content_renderer, chunker, claude_export, pipeline)
|
|
43
|
+
├── core/embeddings/ ← factory + interfaces
|
|
44
|
+
│ ├── base.py ← Embedder ABC + EmbedderError + l2_normalize
|
|
45
|
+
│ ├── fastembed_embedder.py ← default (ONNX, zero-config)
|
|
46
|
+
│ ├── ollama.py ← optional (extra `ollama`)
|
|
47
|
+
│ ├── fake.py ← deterministic FakeEmbedder for tests
|
|
48
|
+
│ └── __init__.py ← get_default_embedder() factory based on MEMEX_EMBED_BACKEND
|
|
49
|
+
├── core/summaries/ ← LLM summarizer (Phase 3)
|
|
50
|
+
│ ├── base.py ← Summarizer ABC + SummarizerError
|
|
51
|
+
│ ├── anthropic_summarizer.py ← real backend, lazy SDK import
|
|
52
|
+
│ ├── fake.py ← deterministic FakeSummarizer for tests
|
|
53
|
+
│ └── __init__.py ← get_default_summarizer() factory, returns None if disabled
|
|
54
|
+
├── transports/ ← MCP bindings + local HTTP
|
|
55
|
+
│ ├── tools.py ← pure logic of the 3 MCP tools
|
|
56
|
+
│ ├── stdio.py ← stdio MCP entrypoint with FastMCP (memex-mcp)
|
|
57
|
+
│ ├── http_ingest.py ← local HTTP server for live capture (Starlette)
|
|
58
|
+
│ └── http.py ← SSE/HTTP remote MCP (TBD, Phase 4) ← does not exist yet
|
|
59
|
+
└── cli/ ← CLI with typer (ingest, search, stats, serve, reindex-fts)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Dependency rule:** `core/` does not import from `transports/` or `cli/`. Arrows point inward.
|
|
63
|
+
|
|
64
|
+
**State as of 2026-05-23 (Phase 3 in progress):**
|
|
65
|
+
- Phases 0, 1, and 2 closed with audit. Phase 3 first sub-task closed: on-demand auto-summaries via Claude Haiku (opt-in, lazy at first `search_chats`, persisted).
|
|
66
|
+
- `vector_search`, `text_search`, and `hybrid_search` live in `core/storage/repo.py`. The `core/retrieval/` directory was removed (it was empty); if retrieval logic grows (re-ranking, complex filters), it gets recreated with real content.
|
|
67
|
+
- `transports/http.py` does not exist yet; Phase 4 adds it when the remote MCP is built. Live capture uses `transports/http_ingest.py` (a different local server, not the MCP).
|
|
68
|
+
|
|
69
|
+
## Common commands
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
uv sync # install deps + create .venv
|
|
73
|
+
uv sync --extra summaries # also install anthropic SDK (for the optional summaries feature)
|
|
74
|
+
uv run pytest # tests (-m 'not integration' to skip integration)
|
|
75
|
+
uv run ruff check src tests # lint
|
|
76
|
+
uv run ruff format src tests # format
|
|
77
|
+
uv run mypy src/memex/core # type check (strict in core)
|
|
78
|
+
uv run memex --help # CLI (ingest, search, stats, serve, reindex-fts)
|
|
79
|
+
uv run memex-mcp # stdio MCP server (for Claude Code / Desktop)
|
|
80
|
+
uv run memex serve # local HTTP server for live capture from Chrome ext
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Multi-Claude with git worktrees
|
|
84
|
+
|
|
85
|
+
To run several Claudes in parallel on independent tasks:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
git worktree add ../Memex-ingest feature/ingest
|
|
89
|
+
git worktree add ../Memex-embed feature/embeddings
|
|
90
|
+
git worktree add ../Memex-store feature/storage
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Each worktree is a separate folder with its own branch and its own `.venv` (uv isolates on its own). They converge to the same `.git`. Each Claude works without stepping on files, and at merge time all changes land in the same repo.
|
|
94
|
+
|
|
95
|
+
**Limits:** worktrees do not see each other until merge. Better to split by independent module, not by cross-cutting feature. The coordinator is the human (or a "lead" Claude on `main`).
|
|
96
|
+
|
|
97
|
+
## Sensitive data
|
|
98
|
+
|
|
99
|
+
Everything in `data/` is personal and NEVER goes to the repo (already excluded by `.gitignore`):
|
|
100
|
+
- `data/exports/*.zip`: Claude.ai exports with real conversations.
|
|
101
|
+
- `data/memex.db`: SQLite database with indexed chats.
|
|
102
|
+
|
|
103
|
+
The `MEMEX.md` file is also in `.gitignore` because it is an internal context document (SyncChat handoff), not for users.
|
|
104
|
+
|
|
105
|
+
## Commit conventions
|
|
106
|
+
|
|
107
|
+
- Clear messages, imperative mood, English or Spanish (whichever is consistent within the message).
|
|
108
|
+
- No `Co-Authored-By: Claude...`. No AI footers. No `Generated with Claude Code`.
|
|
109
|
+
- One commit per logical unit of change.
|
|
110
|
+
|
|
111
|
+
## Persistent memory
|
|
112
|
+
|
|
113
|
+
There is project memory at `C:\Users\dioni\.claude\projects\d--Dionisio-Memex\memory\`. It contains workflow rules, user context, setup decisions. Read it at the start of each session.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Contributing to Memex
|
|
2
|
+
|
|
3
|
+
Thanks for taking a look. Memex is pre-alpha, runs from source, and the roadmap is driven by a single user-facing goal: give Claude Code the same context Claude.ai already has. Phases and close criteria live in [ROADMAP.md](ROADMAP.md); the project journal in [DEVLOG.md](DEVLOG.md).
|
|
4
|
+
|
|
5
|
+
## Scope of contributions
|
|
6
|
+
|
|
7
|
+
Welcome:
|
|
8
|
+
|
|
9
|
+
- Bug reports with reproducible steps.
|
|
10
|
+
- Discussion on tool API shape (`search_chats`, `get_chat`, `list_recent_chats`) and how Claude actually uses them in practice.
|
|
11
|
+
- Improvements to the live capture path (Chrome extension + HTTP ingest server) for sites or flows that break.
|
|
12
|
+
- Better embedders, retrievers, or chunking, with benchmarks.
|
|
13
|
+
|
|
14
|
+
Out of scope for now:
|
|
15
|
+
|
|
16
|
+
- Packaging for distribution (Phase 5 work, owned).
|
|
17
|
+
- UI other than the Chrome extension.
|
|
18
|
+
- Provider integrations beyond Claude.ai (focus first).
|
|
19
|
+
|
|
20
|
+
If something is unclear, open an issue before writing code. Saves time on both sides.
|
|
21
|
+
|
|
22
|
+
## Local setup
|
|
23
|
+
|
|
24
|
+
Requirements: Python 3.12+ and [uv](https://docs.astral.sh/uv/).
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
git clone https://github.com/dioniipereyraa/memex
|
|
28
|
+
cd memex
|
|
29
|
+
uv sync --extra dev
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
The `dev` extra brings pytest, ruff, mypy, and `ollama` (Python client, used by the optional Ollama backend).
|
|
33
|
+
|
|
34
|
+
## Running checks locally
|
|
35
|
+
|
|
36
|
+
The same checks CI runs:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
uv run ruff check src tests
|
|
40
|
+
uv run ruff format --check src tests
|
|
41
|
+
uv run mypy src/memex/core src/memex/config.py src/memex/transports
|
|
42
|
+
uv run pytest tests/unit -q
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Integration tests live under `tests/integration/` and need external services (Ollama). Run with:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
uv run pytest tests/integration -q
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Skipped by default in CI.
|
|
52
|
+
|
|
53
|
+
## Code style
|
|
54
|
+
|
|
55
|
+
- **No em dashes (`—`) as connectors.** Use commas, periods, parentheses. Applies to docs, commits, code, and PR descriptions.
|
|
56
|
+
- **No AI footers in commits.** No `Co-Authored-By: Claude...`, no `Generated with ...`. Commits signed by the human author.
|
|
57
|
+
- **Imperative mood in commit messages.** Spanish or English, pick one per message and stay consistent.
|
|
58
|
+
- **Read before you edit, read after.** Verify what you wrote landed as intended.
|
|
59
|
+
- **Plan before you code.** No stream-of-consciousness implementations.
|
|
60
|
+
- **Architecture rule:** `core/` does not import from `transports/` or `cli/`. Dependencies point inward.
|
|
61
|
+
|
|
62
|
+
## Pull request workflow
|
|
63
|
+
|
|
64
|
+
1. Branch off `main`. Conventional names like `feat/...`, `fix/...`, `docs/...`, `chore/...`.
|
|
65
|
+
2. Make the change, with tests when it is testable. Match existing test style (see `tests/unit/` for examples).
|
|
66
|
+
3. Run all local checks (lint, format, mypy, tests). CI runs the same set; if it is green locally, it should be green there.
|
|
67
|
+
4. Open a PR with a description that explains the *why*, not just the *what*. The diff already shows the what.
|
|
68
|
+
5. Update `DEVLOG.md` if the change is non-trivial (new feature, behavior change, architecture decision). Skip for pure refactors or test additions.
|
|
69
|
+
|
|
70
|
+
## Reporting bugs
|
|
71
|
+
|
|
72
|
+
Open an issue with:
|
|
73
|
+
|
|
74
|
+
- What you expected.
|
|
75
|
+
- What happened instead.
|
|
76
|
+
- Steps to reproduce (the smaller the better).
|
|
77
|
+
- Output of `memex stats` and `uv run python -V` if relevant.
|
|
78
|
+
|
|
79
|
+
If it involves the Chrome extension, include the extension version and browser version.
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
By contributing, you agree your contribution is licensed under the [MIT License](LICENSE) of the project.
|