markdown-memory-vec 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markdown_memory_vec-0.1.0/.github/workflows/publish.yml +31 -0
- markdown_memory_vec-0.1.0/.gitignore +42 -0
- markdown_memory_vec-0.1.0/LICENSE +21 -0
- markdown_memory_vec-0.1.0/PKG-INFO +219 -0
- markdown_memory_vec-0.1.0/README.md +186 -0
- markdown_memory_vec-0.1.0/pyproject.toml +143 -0
- markdown_memory_vec-0.1.0/src/memory_vec/__init__.py +73 -0
- markdown_memory_vec-0.1.0/src/memory_vec/__main__.py +109 -0
- markdown_memory_vec-0.1.0/src/memory_vec/embedder.py +137 -0
- markdown_memory_vec-0.1.0/src/memory_vec/indexer.py +307 -0
- markdown_memory_vec-0.1.0/src/memory_vec/interfaces.py +118 -0
- markdown_memory_vec-0.1.0/src/memory_vec/search.py +234 -0
- markdown_memory_vec-0.1.0/src/memory_vec/service.py +326 -0
- markdown_memory_vec-0.1.0/src/memory_vec/store.py +470 -0
- markdown_memory_vec-0.1.0/tests/__init__.py +0 -0
- markdown_memory_vec-0.1.0/tests/conftest.py +95 -0
- markdown_memory_vec-0.1.0/tests/test_indexer.py +260 -0
- markdown_memory_vec-0.1.0/tests/test_search.py +244 -0
- markdown_memory_vec-0.1.0/tests/test_service.py +161 -0
- markdown_memory_vec-0.1.0/tests/test_store.py +331 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment:
|
|
14
|
+
name: pypi
|
|
15
|
+
url: https://pypi.org/p/markdown-memory-vec
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
|
|
24
|
+
- name: Install build tools
|
|
25
|
+
run: pip install build
|
|
26
|
+
|
|
27
|
+
- name: Build package
|
|
28
|
+
run: python -m build
|
|
29
|
+
|
|
30
|
+
- name: Publish to PyPI
|
|
31
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*$py.class
|
|
4
|
+
*.so
|
|
5
|
+
.Python
|
|
6
|
+
build/
|
|
7
|
+
develop-eggs/
|
|
8
|
+
dist/
|
|
9
|
+
downloads/
|
|
10
|
+
eggs/
|
|
11
|
+
.eggs/
|
|
12
|
+
lib/
|
|
13
|
+
lib64/
|
|
14
|
+
parts/
|
|
15
|
+
sdist/
|
|
16
|
+
var/
|
|
17
|
+
wheels/
|
|
18
|
+
*.egg-info/
|
|
19
|
+
*.egg
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.manifest
|
|
22
|
+
*.spec
|
|
23
|
+
pip-log.txt
|
|
24
|
+
pip-delete-this-directory.txt
|
|
25
|
+
htmlcov/
|
|
26
|
+
.tox/
|
|
27
|
+
.nox/
|
|
28
|
+
.coverage
|
|
29
|
+
.coverage.*
|
|
30
|
+
.cache
|
|
31
|
+
nosetests.xml
|
|
32
|
+
coverage.xml
|
|
33
|
+
*.cover
|
|
34
|
+
*.py,cover
|
|
35
|
+
.hypothesis/
|
|
36
|
+
.pytest_cache/
|
|
37
|
+
.ruff_cache/
|
|
38
|
+
.mypy_cache/
|
|
39
|
+
.pytype/
|
|
40
|
+
.venv/
|
|
41
|
+
env/
|
|
42
|
+
venv/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Aigente
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: markdown-memory-vec
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight vector search for Markdown-based memory systems. Chunk, embed, index, and hybrid-search your .md knowledge base with sqlite-vec.
|
|
5
|
+
Project-URL: Homepage, https://github.com/aigente/markdown-memory-vec
|
|
6
|
+
Project-URL: Repository, https://github.com/aigente/markdown-memory-vec
|
|
7
|
+
Author-email: Aigente <dev@aigente.io>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: RAG,embeddings,markdown,memory,semantic-search,sqlite-vec,vector-search
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: pyyaml>=6.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: mypy>=1.10.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: pyright>=1.1.380; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: types-pyyaml; extra == 'dev'
|
|
29
|
+
Provides-Extra: vector
|
|
30
|
+
Requires-Dist: sentence-transformers>=3.0; extra == 'vector'
|
|
31
|
+
Requires-Dist: sqlite-vec>=0.1.6; extra == 'vector'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# markdown-memory-vec
|
|
35
|
+
|
|
36
|
+
Lightweight vector search for Markdown-based memory systems. Chunk, embed, index, and hybrid-search your `.md` knowledge base with sqlite-vec.
|
|
37
|
+
|
|
38
|
+
<!-- Badges -->
|
|
39
|
+
<!-- [](https://pypi.org/project/markdown-memory-vec/) -->
|
|
40
|
+
<!-- [](https://pypi.org/project/markdown-memory-vec/) -->
|
|
41
|
+
<!-- [](https://opensource.org/licenses/MIT) -->
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- **Markdown-native**: YAML frontmatter parsing for metadata (importance, type, tags)
|
|
46
|
+
- **Smart chunking**: Paragraph-aware splitting with configurable overlap (~400 tokens, 80 overlap)
|
|
47
|
+
- **SHA-256 dedup**: Never re-embed unchanged content — incremental indexing is fast
|
|
48
|
+
- **Hybrid search**: Combines semantic similarity (α), importance weighting (β), and temporal decay (γ)
|
|
49
|
+
- **Zero-copy storage**: sqlite-vec KNN search with cosine distance in a single `.db` file
|
|
50
|
+
|
|
51
|
+
## Quick Start
|
|
52
|
+
|
|
53
|
+
### Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Core only (YAML parsing, chunking, interfaces)
|
|
57
|
+
pip install markdown-memory-vec
|
|
58
|
+
|
|
59
|
+
# With vector search support (sqlite-vec + sentence-transformers)
|
|
60
|
+
pip install 'markdown-memory-vec[vector]'
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Python API
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from memory_vec import MemoryVectorService
|
|
67
|
+
|
|
68
|
+
svc = MemoryVectorService("/path/to/project")
|
|
69
|
+
svc.rebuild_index() # Full index build
|
|
70
|
+
results = svc.search("how to deploy") # Hybrid search
|
|
71
|
+
svc.close()
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Low-level API
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from memory_vec import (
|
|
78
|
+
SqliteVecStore,
|
|
79
|
+
SentenceTransformerEmbedder,
|
|
80
|
+
MemoryIndexer,
|
|
81
|
+
HybridSearchService,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
store = SqliteVecStore("memory.db")
|
|
85
|
+
store.ensure_tables()
|
|
86
|
+
|
|
87
|
+
embedder = SentenceTransformerEmbedder()
|
|
88
|
+
indexer = MemoryIndexer(store, embedder, memory_root="/path/to/memory")
|
|
89
|
+
indexer.index_directory("/path/to/memory")
|
|
90
|
+
|
|
91
|
+
search = HybridSearchService(vec_store=store, embedder=embedder)
|
|
92
|
+
results = search.search("how to deploy")
|
|
93
|
+
for r in results:
|
|
94
|
+
print(f"{r.file_path} (score={r.hybrid_score:.3f}): {r.chunk_text[:80]}...")
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## CLI Usage
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# Full rebuild
|
|
101
|
+
memory-vec /path/to/project --rebuild
|
|
102
|
+
|
|
103
|
+
# Incremental update (only changed files)
|
|
104
|
+
memory-vec /path/to/project --incremental
|
|
105
|
+
|
|
106
|
+
# Search
|
|
107
|
+
memory-vec /path/to/project --search "how to deploy" --top-k 5
|
|
108
|
+
|
|
109
|
+
# Statistics
|
|
110
|
+
memory-vec /path/to/project --stats
|
|
111
|
+
|
|
112
|
+
# Custom memory subdirectory
|
|
113
|
+
memory-vec /path/to/project --rebuild --memory-subdir "docs/memory"
|
|
114
|
+
|
|
115
|
+
# Verbose logging
|
|
116
|
+
memory-vec /path/to/project --rebuild -v
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## API Reference
|
|
120
|
+
|
|
121
|
+
### High-level
|
|
122
|
+
|
|
123
|
+
| Class | Description |
|
|
124
|
+
|-------|-------------|
|
|
125
|
+
| `MemoryVectorService` | All-in-one service: rebuild, incremental index, search, stats |
|
|
126
|
+
|
|
127
|
+
### Components
|
|
128
|
+
|
|
129
|
+
| Class | Description |
|
|
130
|
+
|-------|-------------|
|
|
131
|
+
| `SqliteVecStore` | sqlite-vec backed vector store with KNN search |
|
|
132
|
+
| `SentenceTransformerEmbedder` | Lazy-loading sentence-transformers embedder |
|
|
133
|
+
| `MemoryIndexer` | Markdown → chunks → embeddings → store pipeline |
|
|
134
|
+
| `HybridSearchService` | Hybrid scoring: `α×semantic + β×importance + γ×temporal` |
|
|
135
|
+
|
|
136
|
+
### Interfaces
|
|
137
|
+
|
|
138
|
+
| Interface | Description |
|
|
139
|
+
|-----------|-------------|
|
|
140
|
+
| `IEmbedder` | Abstract embedder (`embed`, `embed_batch`, `dimension`) |
|
|
141
|
+
| `ISqliteVecStore` | Abstract vector store (`add`, `search`, `delete`, `clear`, `count`) |
|
|
142
|
+
|
|
143
|
+
### Data Types
|
|
144
|
+
|
|
145
|
+
| Type | Description |
|
|
146
|
+
|------|-------------|
|
|
147
|
+
| `VectorRecord` | Record for insertion (id, embedding, metadata) |
|
|
148
|
+
| `VectorSearchResult` | Raw KNN result (id, distance, metadata) |
|
|
149
|
+
| `SearchResult` | Hybrid search result with all score components |
|
|
150
|
+
| `MemoryVecMeta` | Metadata dataclass for stored embeddings |
|
|
151
|
+
|
|
152
|
+
### Utilities
|
|
153
|
+
|
|
154
|
+
| Function | Description |
|
|
155
|
+
|----------|-------------|
|
|
156
|
+
| `chunk_text(text, chunk_size, overlap_size)` | Split text into overlapping chunks |
|
|
157
|
+
| `parse_frontmatter(text)` | Extract YAML frontmatter from Markdown |
|
|
158
|
+
| `content_hash(text)` | SHA-256 hex digest |
|
|
159
|
+
| `is_sqlite_vec_available()` | Check sqlite-vec availability |
|
|
160
|
+
| `is_sentence_transformers_available()` | Check sentence-transformers availability |
|
|
161
|
+
|
|
162
|
+
## Architecture
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
┌─────────────────────────────────────────────────────┐
|
|
166
|
+
│ MemoryVectorService │
|
|
167
|
+
│ (high-level orchestration layer) │
|
|
168
|
+
├──────────┬──────────┬──────────────┬────────────────┤
|
|
169
|
+
│ │ │ │ │
|
|
170
|
+
│ Indexer │ Search │ Embedder │ Store │
|
|
171
|
+
│ │ │ │ │
|
|
172
|
+
│ .md file │ hybrid │ sentence- │ sqlite-vec │
|
|
173
|
+
│ → chunks │ scoring │ transformers│ KNN + meta │
|
|
174
|
+
│ → embed │ α+β+γ │ (lazy load) │ (cosine) │
|
|
175
|
+
│ → store │ │ │ │
|
|
176
|
+
└──────────┴──────────┴──────────────┴────────────────┘
|
|
177
|
+
▲ │
|
|
178
|
+
│ YAML frontmatter │
|
|
179
|
+
│ importance/type/tags ▼
|
|
180
|
+
┌─┴─────────────────────┐ ┌─────────────────────┐
|
|
181
|
+
│ Markdown Files │ │ vector_index.db │
|
|
182
|
+
│ (.claude/memory/) │ │ (single file) │
|
|
183
|
+
└───────────────────────┘ └─────────────────────┘
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Configuration
|
|
187
|
+
|
|
188
|
+
### HuggingFace Model
|
|
189
|
+
|
|
190
|
+
By default, uses `paraphrase-multilingual-MiniLM-L12-v2` (384-dim, 50+ languages).
|
|
191
|
+
|
|
192
|
+
For users in China or regions with slow HuggingFace access:
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
# Use a mirror
|
|
196
|
+
export HF_ENDPOINT=https://hf-mirror.com
|
|
197
|
+
|
|
198
|
+
# Or use offline mode (model must be pre-cached)
|
|
199
|
+
export HF_HUB_OFFLINE=1
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Hybrid Search Weights
|
|
203
|
+
|
|
204
|
+
Default: `α=0.6, β=0.2, γ=0.2, λ=0.05`
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
search = HybridSearchService(
|
|
208
|
+
vec_store=store,
|
|
209
|
+
embedder=embedder,
|
|
210
|
+
alpha=0.8, # Semantic weight
|
|
211
|
+
beta=0.1, # Importance weight
|
|
212
|
+
gamma=0.1, # Temporal decay weight
|
|
213
|
+
decay_lambda=0.03, # Slower decay
|
|
214
|
+
)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## License
|
|
218
|
+
|
|
219
|
+
MIT
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# markdown-memory-vec
|
|
2
|
+
|
|
3
|
+
Lightweight vector search for Markdown-based memory systems. Chunk, embed, index, and hybrid-search your `.md` knowledge base with sqlite-vec.
|
|
4
|
+
|
|
5
|
+
<!-- Badges -->
|
|
6
|
+
<!-- [](https://pypi.org/project/markdown-memory-vec/) -->
|
|
7
|
+
<!-- [](https://pypi.org/project/markdown-memory-vec/) -->
|
|
8
|
+
<!-- [](https://opensource.org/licenses/MIT) -->
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- **Markdown-native**: YAML frontmatter parsing for metadata (importance, type, tags)
|
|
13
|
+
- **Smart chunking**: Paragraph-aware splitting with configurable overlap (~400 tokens, 80 overlap)
|
|
14
|
+
- **SHA-256 dedup**: Never re-embed unchanged content — incremental indexing is fast
|
|
15
|
+
- **Hybrid search**: Combines semantic similarity (α), importance weighting (β), and temporal decay (γ)
|
|
16
|
+
- **Zero-copy storage**: sqlite-vec KNN search with cosine distance in a single `.db` file
|
|
17
|
+
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
### Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# Core only (YAML parsing, chunking, interfaces)
|
|
24
|
+
pip install markdown-memory-vec
|
|
25
|
+
|
|
26
|
+
# With vector search support (sqlite-vec + sentence-transformers)
|
|
27
|
+
pip install 'markdown-memory-vec[vector]'
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Python API
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from memory_vec import MemoryVectorService
|
|
34
|
+
|
|
35
|
+
svc = MemoryVectorService("/path/to/project")
|
|
36
|
+
svc.rebuild_index() # Full index build
|
|
37
|
+
results = svc.search("how to deploy") # Hybrid search
|
|
38
|
+
svc.close()
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Low-level API
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from memory_vec import (
|
|
45
|
+
SqliteVecStore,
|
|
46
|
+
SentenceTransformerEmbedder,
|
|
47
|
+
MemoryIndexer,
|
|
48
|
+
HybridSearchService,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
store = SqliteVecStore("memory.db")
|
|
52
|
+
store.ensure_tables()
|
|
53
|
+
|
|
54
|
+
embedder = SentenceTransformerEmbedder()
|
|
55
|
+
indexer = MemoryIndexer(store, embedder, memory_root="/path/to/memory")
|
|
56
|
+
indexer.index_directory("/path/to/memory")
|
|
57
|
+
|
|
58
|
+
search = HybridSearchService(vec_store=store, embedder=embedder)
|
|
59
|
+
results = search.search("how to deploy")
|
|
60
|
+
for r in results:
|
|
61
|
+
print(f"{r.file_path} (score={r.hybrid_score:.3f}): {r.chunk_text[:80]}...")
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## CLI Usage
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Full rebuild
|
|
68
|
+
memory-vec /path/to/project --rebuild
|
|
69
|
+
|
|
70
|
+
# Incremental update (only changed files)
|
|
71
|
+
memory-vec /path/to/project --incremental
|
|
72
|
+
|
|
73
|
+
# Search
|
|
74
|
+
memory-vec /path/to/project --search "how to deploy" --top-k 5
|
|
75
|
+
|
|
76
|
+
# Statistics
|
|
77
|
+
memory-vec /path/to/project --stats
|
|
78
|
+
|
|
79
|
+
# Custom memory subdirectory
|
|
80
|
+
memory-vec /path/to/project --rebuild --memory-subdir "docs/memory"
|
|
81
|
+
|
|
82
|
+
# Verbose logging
|
|
83
|
+
memory-vec /path/to/project --rebuild -v
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## API Reference
|
|
87
|
+
|
|
88
|
+
### High-level
|
|
89
|
+
|
|
90
|
+
| Class | Description |
|
|
91
|
+
|-------|-------------|
|
|
92
|
+
| `MemoryVectorService` | All-in-one service: rebuild, incremental index, search, stats |
|
|
93
|
+
|
|
94
|
+
### Components
|
|
95
|
+
|
|
96
|
+
| Class | Description |
|
|
97
|
+
|-------|-------------|
|
|
98
|
+
| `SqliteVecStore` | sqlite-vec backed vector store with KNN search |
|
|
99
|
+
| `SentenceTransformerEmbedder` | Lazy-loading sentence-transformers embedder |
|
|
100
|
+
| `MemoryIndexer` | Markdown → chunks → embeddings → store pipeline |
|
|
101
|
+
| `HybridSearchService` | Hybrid scoring: `α×semantic + β×importance + γ×temporal` |
|
|
102
|
+
|
|
103
|
+
### Interfaces
|
|
104
|
+
|
|
105
|
+
| Interface | Description |
|
|
106
|
+
|-----------|-------------|
|
|
107
|
+
| `IEmbedder` | Abstract embedder (`embed`, `embed_batch`, `dimension`) |
|
|
108
|
+
| `ISqliteVecStore` | Abstract vector store (`add`, `search`, `delete`, `clear`, `count`) |
|
|
109
|
+
|
|
110
|
+
### Data Types
|
|
111
|
+
|
|
112
|
+
| Type | Description |
|
|
113
|
+
|------|-------------|
|
|
114
|
+
| `VectorRecord` | Record for insertion (id, embedding, metadata) |
|
|
115
|
+
| `VectorSearchResult` | Raw KNN result (id, distance, metadata) |
|
|
116
|
+
| `SearchResult` | Hybrid search result with all score components |
|
|
117
|
+
| `MemoryVecMeta` | Metadata dataclass for stored embeddings |
|
|
118
|
+
|
|
119
|
+
### Utilities
|
|
120
|
+
|
|
121
|
+
| Function | Description |
|
|
122
|
+
|----------|-------------|
|
|
123
|
+
| `chunk_text(text, chunk_size, overlap_size)` | Split text into overlapping chunks |
|
|
124
|
+
| `parse_frontmatter(text)` | Extract YAML frontmatter from Markdown |
|
|
125
|
+
| `content_hash(text)` | SHA-256 hex digest |
|
|
126
|
+
| `is_sqlite_vec_available()` | Check sqlite-vec availability |
|
|
127
|
+
| `is_sentence_transformers_available()` | Check sentence-transformers availability |
|
|
128
|
+
|
|
129
|
+
## Architecture
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
┌─────────────────────────────────────────────────────┐
|
|
133
|
+
│ MemoryVectorService │
|
|
134
|
+
│ (high-level orchestration layer) │
|
|
135
|
+
├──────────┬──────────┬──────────────┬────────────────┤
|
|
136
|
+
│ │ │ │ │
|
|
137
|
+
│ Indexer │ Search │ Embedder │ Store │
|
|
138
|
+
│ │ │ │ │
|
|
139
|
+
│ .md file │ hybrid │ sentence- │ sqlite-vec │
|
|
140
|
+
│ → chunks │ scoring │ transformers│ KNN + meta │
|
|
141
|
+
│ → embed │ α+β+γ │ (lazy load) │ (cosine) │
|
|
142
|
+
│ → store │ │ │ │
|
|
143
|
+
└──────────┴──────────┴──────────────┴────────────────┘
|
|
144
|
+
▲ │
|
|
145
|
+
│ YAML frontmatter │
|
|
146
|
+
│ importance/type/tags ▼
|
|
147
|
+
┌─┴─────────────────────┐ ┌─────────────────────┐
|
|
148
|
+
│ Markdown Files │ │ vector_index.db │
|
|
149
|
+
│ (.claude/memory/) │ │ (single file) │
|
|
150
|
+
└───────────────────────┘ └─────────────────────┘
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Configuration
|
|
154
|
+
|
|
155
|
+
### HuggingFace Model
|
|
156
|
+
|
|
157
|
+
By default, uses `paraphrase-multilingual-MiniLM-L12-v2` (384-dim, 50+ languages).
|
|
158
|
+
|
|
159
|
+
For users in China or regions with slow HuggingFace access:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
# Use a mirror
|
|
163
|
+
export HF_ENDPOINT=https://hf-mirror.com
|
|
164
|
+
|
|
165
|
+
# Or use offline mode (model must be pre-cached)
|
|
166
|
+
export HF_HUB_OFFLINE=1
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Hybrid Search Weights
|
|
170
|
+
|
|
171
|
+
Default: `α=0.6, β=0.2, γ=0.2, λ=0.05`
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
search = HybridSearchService(
|
|
175
|
+
vec_store=store,
|
|
176
|
+
embedder=embedder,
|
|
177
|
+
alpha=0.8, # Semantic weight
|
|
178
|
+
beta=0.1, # Importance weight
|
|
179
|
+
gamma=0.1, # Temporal decay weight
|
|
180
|
+
decay_lambda=0.03, # Slower decay
|
|
181
|
+
)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
MIT
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "markdown-memory-vec"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Lightweight vector search for Markdown-based memory systems. Chunk, embed, index, and hybrid-search your .md knowledge base with sqlite-vec."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Aigente", email = "dev@aigente.io" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"vector-search",
|
|
17
|
+
"markdown",
|
|
18
|
+
"memory",
|
|
19
|
+
"embeddings",
|
|
20
|
+
"sqlite-vec",
|
|
21
|
+
"semantic-search",
|
|
22
|
+
"RAG",
|
|
23
|
+
]
|
|
24
|
+
classifiers = [
|
|
25
|
+
"Development Status :: 3 - Alpha",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"License :: OSI Approved :: MIT License",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Programming Language :: Python :: 3.13",
|
|
32
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
33
|
+
"Topic :: Text Processing :: Indexing",
|
|
34
|
+
"Typing :: Typed",
|
|
35
|
+
]
|
|
36
|
+
dependencies = [
|
|
37
|
+
"PyYAML>=6.0",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
vector = [
|
|
42
|
+
"sqlite-vec>=0.1.6",
|
|
43
|
+
"sentence-transformers>=3.0",
|
|
44
|
+
]
|
|
45
|
+
dev = [
|
|
46
|
+
"pytest>=7.0",
|
|
47
|
+
"pyright>=1.1.380",
|
|
48
|
+
"ruff>=0.4.0",
|
|
49
|
+
"mypy>=1.10.0",
|
|
50
|
+
"types-PyYAML",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[project.urls]
|
|
54
|
+
Homepage = "https://github.com/aigente/markdown-memory-vec"
|
|
55
|
+
Repository = "https://github.com/aigente/markdown-memory-vec"
|
|
56
|
+
|
|
57
|
+
[project.scripts]
|
|
58
|
+
memory-vec = "memory_vec.__main__:main"
|
|
59
|
+
|
|
60
|
+
[tool.hatch.build.targets.wheel]
|
|
61
|
+
packages = ["src/memory_vec"]
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Ruff
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
[tool.ruff]
|
|
67
|
+
line-length = 120
|
|
68
|
+
fix = true
|
|
69
|
+
target-version = "py311"
|
|
70
|
+
include = ["src/**", "tests/**"]
|
|
71
|
+
|
|
72
|
+
[tool.ruff.format]
|
|
73
|
+
docstring-code-format = true
|
|
74
|
+
|
|
75
|
+
[tool.ruff.lint]
|
|
76
|
+
select = [
|
|
77
|
+
"E",
|
|
78
|
+
"F",
|
|
79
|
+
"W",
|
|
80
|
+
"B",
|
|
81
|
+
"Q",
|
|
82
|
+
"I",
|
|
83
|
+
"ASYNC",
|
|
84
|
+
"T20",
|
|
85
|
+
]
|
|
86
|
+
ignore = [
|
|
87
|
+
"F401",
|
|
88
|
+
"E501",
|
|
89
|
+
"W293",
|
|
90
|
+
"W291",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
[tool.ruff.lint.per-file-ignores]
|
|
94
|
+
"tests/**" = ["T201"] # Allow print statements in test files
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# Pyright
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
[tool.pyright]
|
|
100
|
+
include = ["src", "tests"]
|
|
101
|
+
typeCheckingMode = "strict"
|
|
102
|
+
reportUnnecessaryIsInstance = false
|
|
103
|
+
reportMissingTypeStubs = false
|
|
104
|
+
reportUnknownVariableType = false
|
|
105
|
+
reportUnknownMemberType = false
|
|
106
|
+
reportUnknownArgumentType = false
|
|
107
|
+
reportUnusedImport = false
|
|
108
|
+
|
|
109
|
+
# ---------------------------------------------------------------------------
|
|
110
|
+
# Mypy
|
|
111
|
+
# ---------------------------------------------------------------------------
|
|
112
|
+
[tool.mypy]
|
|
113
|
+
strict = true
|
|
114
|
+
python_version = "3.11"
|
|
115
|
+
ignore_missing_imports = true
|
|
116
|
+
disallow_untyped_defs = true
|
|
117
|
+
no_implicit_optional = true
|
|
118
|
+
check_untyped_defs = true
|
|
119
|
+
warn_return_any = true
|
|
120
|
+
show_error_codes = true
|
|
121
|
+
warn_unused_ignores = false
|
|
122
|
+
disallow_incomplete_defs = true
|
|
123
|
+
disallow_untyped_decorators = true
|
|
124
|
+
disallow_any_unimported = false
|
|
125
|
+
cache_dir = ".mypy_cache"
|
|
126
|
+
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
# Pytest
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
[tool.pytest.ini_options]
|
|
131
|
+
minversion = "6.0"
|
|
132
|
+
testpaths = ["tests"]
|
|
133
|
+
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
# Poe the Poet (task runner)
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
[tool.poe.tasks]
|
|
138
|
+
fmt = "ruff format"
|
|
139
|
+
lint = "ruff check"
|
|
140
|
+
pyright = "pyright"
|
|
141
|
+
mypy = "mypy src tests"
|
|
142
|
+
test = "pytest -v"
|
|
143
|
+
check = ["fmt", "lint", "pyright", "test"]
|