agmem 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/METADATA +24 -18
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/RECORD +25 -24
- memvcs/commands/daemon.py +21 -3
- memvcs/commands/distill.py +10 -2
- memvcs/commands/federated.py +7 -1
- memvcs/commands/garden.py +10 -2
- memvcs/commands/gc.py +18 -1
- memvcs/commands/prove.py +4 -2
- memvcs/commands/timeline.py +28 -0
- memvcs/commands/when.py +28 -0
- memvcs/core/compression_pipeline.py +165 -0
- memvcs/core/crypto_verify.py +12 -1
- memvcs/core/distiller.py +70 -4
- memvcs/core/federated.py +80 -9
- memvcs/core/gardener.py +80 -5
- memvcs/core/ipfs_remote.py +168 -8
- memvcs/core/knowledge_graph.py +79 -6
- memvcs/core/objects.py +33 -21
- memvcs/core/pack.py +201 -1
- memvcs/core/remote.py +200 -3
- memvcs/core/zk_proofs.py +145 -11
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/WHEEL +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agmem
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Agentic Memory Version Control System - Git for AI agent memories
|
|
5
5
|
Home-page: https://github.com/vivek-tiwari-vt/agmem
|
|
6
6
|
Author: agmem Team
|
|
@@ -33,6 +33,7 @@ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
|
33
33
|
Requires-Dist: black==24.10.0; extra == "dev"
|
|
34
34
|
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
35
35
|
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: bandit[toml]>=1.7.0; extra == "dev"
|
|
36
37
|
Provides-Extra: llm
|
|
37
38
|
Requires-Dist: openai>=1.0.0; extra == "llm"
|
|
38
39
|
Requires-Dist: anthropic>=0.18.0; extra == "llm"
|
|
@@ -68,6 +69,8 @@ Provides-Extra: crypto
|
|
|
68
69
|
Requires-Dist: cryptography>=41.0.0; extra == "crypto"
|
|
69
70
|
Provides-Extra: ipfs
|
|
70
71
|
Requires-Dist: requests>=2.28.0; extra == "ipfs"
|
|
72
|
+
Provides-Extra: ipfs-daemon
|
|
73
|
+
Requires-Dist: ipfshttpclient>=0.8.0; extra == "ipfs-daemon"
|
|
71
74
|
Provides-Extra: all
|
|
72
75
|
Requires-Dist: mcp>=1.0.0; extra == "all"
|
|
73
76
|
Requires-Dist: cryptography>=41.0.0; extra == "all"
|
|
@@ -129,12 +132,12 @@ agmem solves all of these problems with a familiar Git-like interface.
|
|
|
129
132
|
- ✅ **Tamper-evident audit trail** — Append-only hash-chained log (init, add, commit, checkout, merge, push, pull, config); `agmem audit` and `agmem audit --verify`
|
|
130
133
|
- ✅ **Multi-agent trust** — Trust store (full / conditional / untrusted) per public key; applied on pull/merge; clone copies remote keys
|
|
131
134
|
- ✅ **Conflict resolution** — `agmem resolve` with ours/theirs/both; conflicts persisted in `.mem/merge/`; path-safe
|
|
132
|
-
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden
|
|
133
|
-
- ✅ **Pack files & GC** — `agmem gc` (reachable from refs, prune loose, optional
|
|
135
|
+
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise applied to counts and frontmatter
|
|
136
|
+
- ✅ **Pack files & GC** — `agmem gc [--repack]` (reachable from refs, prune loose, optional pack file + index); ObjectStore reads from pack when loose missing
|
|
134
137
|
- ✅ **Multi-provider LLM** — OpenAI and Anthropic via `memvcs.core.llm`; config/repo or env; used by gardener, distiller, consistency, merge
|
|
135
138
|
- ✅ **Temporal querying** — Point-in-time and range queries in temporal index; frontmatter timestamps
|
|
136
|
-
- ✅ **Federated collaboration** — `agmem federated push|pull
|
|
137
|
-
- ✅ **Zero-knowledge proofs** — `agmem prove` (
|
|
139
|
+
- ✅ **Federated collaboration** — `agmem federated push|pull`; real summaries (topic counts, fact hashes); optional DP on outbound; coordinator API in docs/FEDERATED.md
|
|
140
|
+
- ✅ **Zero-knowledge proofs** — `agmem prove` (hash/signature-based): keyword containment (Merkle set membership), memory freshness (signed timestamp)
|
|
138
141
|
- ✅ **Daemon health** — Periodic Merkle verification in daemon loop; safe auto-remediation hooks
|
|
139
142
|
- ✅ **GPU acceleration** — Vector store detects GPU for embedding model when available
|
|
140
143
|
- ✅ **Optional** — `serve`, `daemon` (watch + auto-commit), `garden` (episode archival), MCP server; install extras as needed
|
|
@@ -264,9 +267,9 @@ All commands are listed below. Highlights: **`agmem blame <file>`** (who changed
|
|
|
264
267
|
| `agmem verify [ref]` | Belief consistency (contradictions); use `--crypto` to verify commit Merkle/signature |
|
|
265
268
|
| `agmem audit [--verify] [--max n]` | Show tamper-evident audit log; `--verify` checks hash chain |
|
|
266
269
|
| `agmem resolve [path]` | Resolve merge conflicts (ours/theirs/both); path under `current/` |
|
|
267
|
-
| `agmem gc [--dry-run] [--prune-days n]` | Garbage collection: delete unreachable loose objects; optional
|
|
268
|
-
| `agmem prove --memory <path> --property keyword\|freshness --value <v> [-o out]` | Generate ZK proofs (
|
|
269
|
-
| `agmem federated push\|pull` | Federated collaboration (
|
|
270
|
+
| `agmem gc [--dry-run] [--repack] [--prune-days n]` | Garbage collection: delete unreachable loose objects; optional pack file creation |
|
|
271
|
+
| `agmem prove --memory <path> --property keyword\|freshness --value <v> [-o out]` | Generate ZK proofs (keyword: Merkle set membership; freshness: signed timestamp) |
|
|
272
|
+
| `agmem federated push\|pull` | Federated collaboration (real summaries, optional DP; requires coordinator in config) |
|
|
270
273
|
|
|
271
274
|
### Optional (install extras)
|
|
272
275
|
|
|
@@ -370,26 +373,26 @@ The following 18 capabilities are implemented (or stubbed) per the agmem feature
|
|
|
370
373
|
|
|
371
374
|
| # | Feature | Description |
|
|
372
375
|
|---|---------|-------------|
|
|
373
|
-
| **9** | **Decentralized storage (IPFS)** | Push/pull via
|
|
374
|
-
| **10** | **Pack files and garbage collection** | Pack loose objects into pack file + index; GC deletes unreachable
|
|
375
|
-
| **11** | **Enhanced cloud remote operations** | Push conflict detection
|
|
376
|
+
| **9** | **Decentralized storage (IPFS)** | Push/pull via gateway (POST /api/v0/add, GET /ipfs/<cid>). Bundle/unbundle in `memvcs.core.ipfs_remote`; optional `agmem[ipfs]`. |
|
|
377
|
+
| **10** | **Pack files and garbage collection** | Pack loose objects into pack file + index; GC deletes unreachable; ObjectStore reads from pack. **Command:** `agmem gc [--dry-run] [--repack] [--prune-days n]`. |
|
|
378
|
+
| **11** | **Enhanced cloud remote operations** | Push conflict detection; S3/GCS remotes with distributed locking (acquire before push/fetch, release in finally). Config: `lock_table` for S3. |
|
|
376
379
|
|
|
377
380
|
### Tier 5 — Intelligence and retrieval
|
|
378
381
|
|
|
379
382
|
| # | Feature | Description |
|
|
380
383
|
|---|---------|-------------|
|
|
381
384
|
| **12** | **Multi-provider LLM** | `memvcs.core.llm`: OpenAI and Anthropic; factory by config or env. Used by gardener, distiller, consistency checker, merge. Credentials via env (e.g. `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`). |
|
|
382
|
-
| **13** | **Enhanced semantic compression** | Multi-stage pipeline
|
|
383
|
-
| **14** | **Temporal querying and time-travel** | Point-in-time and range
|
|
384
|
-
| **15** | **Cross-memory relationship graph** | Knowledge graph
|
|
385
|
+
| **13** | **Enhanced semantic compression** | Multi-stage pipeline in `memvcs.core.compression_pipeline`: chunk, fact extraction, dedup by hash; hybrid retrieval in strategies. |
|
|
386
|
+
| **14** | **Temporal querying and time-travel** | Point-in-time and range in `memvcs.core.temporal_index`; CLI: `agmem when --from/--to`, `agmem timeline --from/--to`. |
|
|
387
|
+
| **15** | **Cross-memory relationship graph** | Knowledge graph: co-occurrence, causal edges; incremental-update docstring in `knowledge_graph.py`. |
|
|
385
388
|
|
|
386
389
|
### Tier 6 — Operations and maintenance
|
|
387
390
|
|
|
388
391
|
| # | Feature | Description |
|
|
389
392
|
|---|---------|-------------|
|
|
390
|
-
| **16** | **Automated memory health monitoring** | Daemon
|
|
391
|
-
| **17** | **GPU-accelerated operations** | Vector store
|
|
392
|
-
| **18** | **Test suite and quality** |
|
|
393
|
+
| **16** | **Automated memory health monitoring** | Daemon: configurable `daemon.health_check_interval_seconds` and `AGMEM_DAEMON_HEALTH_INTERVAL`; alert only on verify failure; suggest `agmem fsck`. |
|
|
394
|
+
| **17** | **GPU-accelerated operations** | Vector store `_device()` returns cuda/mps/cpu; model loaded with that device. |
|
|
395
|
+
| **18** | **Test suite and quality** | Tests: crypto (tampered blob, key missing), encryption (wrong key, corrupted ciphertext), privacy budget, pack/GC, ZK prove/verify, federated mock, IPFS bundle; see docs/TEST_REPORT.md. |
|
|
393
396
|
|
|
394
397
|
### New files and config (summary)
|
|
395
398
|
|
|
@@ -399,7 +402,10 @@ The following 18 capabilities are implemented (or stubbed) per the agmem feature
|
|
|
399
402
|
| `memvcs/core/audit.py` | Tamper-evident audit append and verify |
|
|
400
403
|
| `memvcs/core/trust.py` | Trust store (key → level) |
|
|
401
404
|
| `memvcs/core/privacy_budget.py` | Epsilon/delta budget for DP |
|
|
402
|
-
| `memvcs/core/pack.py` | Pack format, index, GC |
|
|
405
|
+
| `memvcs/core/pack.py` | Pack format, index, GC, repack |
|
|
406
|
+
| `memvcs/core/compression_pipeline.py` | Chunk, fact extraction, dedup; hybrid retrieval |
|
|
407
|
+
| `memvcs/core/zk_proofs.py` | Hash/signature-based proofs (keyword, freshness) |
|
|
408
|
+
| `docs/FEDERATED.md` | Coordinator API for federated push/pull |
|
|
403
409
|
| `memvcs/core/encryption.py` | AES-256-GCM, Argon2id, config |
|
|
404
410
|
| `memvcs/core/llm/` | LLM provider interface and OpenAI/Anthropic |
|
|
405
411
|
| `memvcs/core/zk_proofs.py` | ZK proof stubs |
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
agmem-0.1.
|
|
1
|
+
agmem-0.1.5.dist-info/licenses/LICENSE,sha256=X_S6RBErW-F0IDbM3FAEoDB-zxExFnl2m8640rTXphM,1067
|
|
2
2
|
memvcs/__init__.py,sha256=mXwHTSlUPWo4ERqJLGJnxmxtGQQHPSbXb4IpO61l04M,193
|
|
3
3
|
memvcs/cli.py,sha256=YF06oMNjKWUmiNahILmfjrIXgoXzU-5BJFmbunSb8Sc,6075
|
|
4
4
|
memvcs/commands/__init__.py,sha256=A2D6xWaO6epU7iV4QSvqvF5TspnwRyDN7NojmGatPrE,510
|
|
@@ -11,21 +11,21 @@ memvcs/commands/checkout.py,sha256=xaYZSbCQ-MyLWPtwA2FdH6WqGMI3oF3R2JmCufGBVFg,3
|
|
|
11
11
|
memvcs/commands/clean.py,sha256=e0OhSQdHfFnOPTRbyKbM8IcX4yJD5n_kaBKjIeoaRBo,1973
|
|
12
12
|
memvcs/commands/clone.py,sha256=aB0LcugIWJE9IEez6y70KlpZu4eIF2EdXZxE24jXyac,3260
|
|
13
13
|
memvcs/commands/commit.py,sha256=W4ulVZuEETJh1SHpscaQfNjyQMqeIE0AYZIbMbTrsq4,6801
|
|
14
|
-
memvcs/commands/daemon.py,sha256=
|
|
14
|
+
memvcs/commands/daemon.py,sha256=fV6aIz8bFP9VwB_MLudAb_lhhhBxSe2aV-Wjqe-nvPw,10708
|
|
15
15
|
memvcs/commands/decay.py,sha256=QcgOTMJZxrfw_AOz94YHA3LGoNXRMDn69TxWlUrpSw4,2421
|
|
16
16
|
memvcs/commands/diff.py,sha256=KcgD57_fae4uvQ8G9ZbXmLpAYYIDiWiBuVcjsDtyE1U,5480
|
|
17
|
-
memvcs/commands/distill.py,sha256=
|
|
18
|
-
memvcs/commands/federated.py,sha256=
|
|
17
|
+
memvcs/commands/distill.py,sha256=reOldqg0lMgqIlpYEIKYfN_TxNwsjU9RnI8Uah1VqTQ,3088
|
|
18
|
+
memvcs/commands/federated.py,sha256=Zj4kxHnjdIs1xu4v7B8XosQXNYK8Alv4I0kJQpmJe6Y,1840
|
|
19
19
|
memvcs/commands/fsck.py,sha256=AdJBMLA2myQ0cJJcjUgsYptsE3qvX4JQc9UAwVmSHlA,7772
|
|
20
|
-
memvcs/commands/garden.py,sha256=
|
|
21
|
-
memvcs/commands/gc.py,sha256=
|
|
20
|
+
memvcs/commands/garden.py,sha256=8JiLe3JRkOhY-N-h-IDuvdJiECiSElnUzXVtxtU2QgY,4050
|
|
21
|
+
memvcs/commands/gc.py,sha256=SIx_AG1msCFW2E1VPbJgiPBLTHCucpvBlbmxnIKCBhY,1978
|
|
22
22
|
memvcs/commands/graph.py,sha256=MDi6bK2w0OrpK5VOE8XXw5gQX7BuD7VzUyqJ5Ra9Bsg,4746
|
|
23
23
|
memvcs/commands/init.py,sha256=TsrLFLXwkDFT0opsYJTfwu0NIxLrNiiba5SpzRtxjDI,1614
|
|
24
24
|
memvcs/commands/log.py,sha256=eNlLs0-PS2nF0pMAMI8izKGUiEb2m3S0RB4Zh6cUQpE,2859
|
|
25
25
|
memvcs/commands/mcp.py,sha256=PMfwVD6uHltN58Jh7IOiS1w7oND42tg14QKRCJNudmY,1740
|
|
26
26
|
memvcs/commands/merge.py,sha256=s3QLZp-_I6OvhllLhL9yFZAQ8d4M4FbvxkXV7gUgw5M,4877
|
|
27
27
|
memvcs/commands/pack.py,sha256=rIDjMpxJG0oxrWnB3vCGHqviCITIeIbdy3nhuHVHzM8,3629
|
|
28
|
-
memvcs/commands/prove.py,sha256=
|
|
28
|
+
memvcs/commands/prove.py,sha256=xSqNYGYtLOCdBywcDIduqVq-XoYDL9073mMTlkZmvuE,2171
|
|
29
29
|
memvcs/commands/pull.py,sha256=hn9FIlNc3KUr5EUDo4_66KQSK0BSSXjOn32xaDNxf0Q,3621
|
|
30
30
|
memvcs/commands/push.py,sha256=0abEdHkCMfHpH_Nmlw3OaU7Hzi0-RXF-cTVHpiSPw6k,5086
|
|
31
31
|
memvcs/commands/recall.py,sha256=7nwC4mFYpdjKWG-Cs3cpDLr5_SgYJ6HkVSXDOkFke5A,4592
|
|
@@ -42,33 +42,34 @@ memvcs/commands/stash.py,sha256=CD3mRWehcmfVRPGGpndUBdTT_ku4LC_rmSKPvTEOTAo,3193
|
|
|
42
42
|
memvcs/commands/status.py,sha256=O6BgzTiW3UHjXx6OKwH8X4g0hP0IlYDgr7As5RmeujU,3447
|
|
43
43
|
memvcs/commands/tag.py,sha256=CaCnA3JifVrdr8DfX4g0bp-_oRvagJkQFcI4bJbW1uM,3004
|
|
44
44
|
memvcs/commands/test.py,sha256=HZrpGZQhu9HnGZLjiq8TXi8jfOZqP-wc3bW6mgpP2yk,3926
|
|
45
|
-
memvcs/commands/timeline.py,sha256=
|
|
45
|
+
memvcs/commands/timeline.py,sha256=JkuhsQ-6wPWbsjlbJb_qM4mEkxkxcWWzniXXQB4Qtec,4764
|
|
46
46
|
memvcs/commands/tree.py,sha256=vdULq4vIXA_4gNfMnHn_Y78BwE0sJoeTBOnFJR3WsZ4,4927
|
|
47
47
|
memvcs/commands/verify.py,sha256=04CVW5NYWkUlPJ5z1Kci6dfQFM6UmPTGZh9ZextFLMc,3887
|
|
48
|
-
memvcs/commands/when.py,sha256=
|
|
48
|
+
memvcs/commands/when.py,sha256=bxG_tEYnZNBTl2IPkoxpc2LUEbO_5ev1hRvEzxQQDmc,4773
|
|
49
49
|
memvcs/core/__init__.py,sha256=dkIC-4tS0GhwV2mZIbofEe8xR8uiFwrxslGf1aXwhYg,493
|
|
50
50
|
memvcs/core/access_index.py,sha256=HhacnzSUASzRV2jhDHkwRFoPS3rtqh9n9yE1VV7JXpk,5596
|
|
51
51
|
memvcs/core/audit.py,sha256=8APkm9Spl_-1rIdyRQz1elyxOeK3nlpwm0CLkpLlhTE,3732
|
|
52
|
+
memvcs/core/compression_pipeline.py,sha256=Vzr5v_0pgAG20C8znC0-Ho5fEwBoaTOLddxMTldd64M,5564
|
|
52
53
|
memvcs/core/config_loader.py,sha256=j-jgLDp2TRzWN9ZEZebfWSfatevBNYs0FEb3ud1SIR8,8277
|
|
53
54
|
memvcs/core/consistency.py,sha256=YOG8xhqZLKZCLbai2rdcP0KxYPNGFv5RRMwrQ6qCeyc,7462
|
|
54
55
|
memvcs/core/constants.py,sha256=WUjAb50BFcF0mbFi_GNteDLCxLihmViBm9Fb-JMPmbM,220
|
|
55
|
-
memvcs/core/crypto_verify.py,sha256=
|
|
56
|
+
memvcs/core/crypto_verify.py,sha256=DTuC7Kfx6z2b8UWOWziBTqP633LrjXbdtGmBBqrJTF0,10424
|
|
56
57
|
memvcs/core/decay.py,sha256=ROGwnqngs7eJNkbKmwyOdij607m73vpmoJqzrIDLBzk,6581
|
|
57
58
|
memvcs/core/diff.py,sha256=koEHTLciIUxYKVJVuvmY0GDXMgDgGZP_qg5RayhF-iE,13226
|
|
58
|
-
memvcs/core/distiller.py,sha256=
|
|
59
|
+
memvcs/core/distiller.py,sha256=ZOmrwYYhOla8rZncQP_0y0Ab9jCl3GjtdoH82HkXlsw,12621
|
|
59
60
|
memvcs/core/encryption.py,sha256=epny_nlW6ylllv1qxs1mAcFq-PrLIisgfot4llOoAqw,5289
|
|
60
|
-
memvcs/core/federated.py,sha256=
|
|
61
|
-
memvcs/core/gardener.py,sha256=
|
|
61
|
+
memvcs/core/federated.py,sha256=vUYMZ0xv80hqGDRKq645Od1i8N33l-pIAkklJbJUlVg,5445
|
|
62
|
+
memvcs/core/gardener.py,sha256=lBWkyE72O-JMiHM-oqrnex9k_xSv7FvztjkOdLdB0Kk,18610
|
|
62
63
|
memvcs/core/hooks.py,sha256=XF9z8J5sWjAcuOyWQ2nuvEzK0UV8s4ThrcltaBZttzw,5448
|
|
63
|
-
memvcs/core/ipfs_remote.py,sha256=
|
|
64
|
-
memvcs/core/knowledge_graph.py,sha256=
|
|
64
|
+
memvcs/core/ipfs_remote.py,sha256=xmEO14bn_7Ej-W5jhx2QJyBd-ljj9S2COOxMmcZBiTs,6643
|
|
65
|
+
memvcs/core/knowledge_graph.py,sha256=GY27e1rgraF2zMpz_jsumdUtpgTRk48yH5CAEQ3TDl4,16416
|
|
65
66
|
memvcs/core/merge.py,sha256=x2eSaxr4f63Eq00FCJ6DDe2TZU8H5yHQpzKzMhYsaFw,19871
|
|
66
|
-
memvcs/core/objects.py,sha256=
|
|
67
|
-
memvcs/core/pack.py,sha256=
|
|
67
|
+
memvcs/core/objects.py,sha256=Xgw1IpQnJLCG5o_7gDHVQ-TNGR9CSpDYWRXzLgLSuec,11006
|
|
68
|
+
memvcs/core/pack.py,sha256=nTzpPNNk47e7_oN3z7bjaichpzI7q-ql2E8eI2UuGyM,9828
|
|
68
69
|
memvcs/core/pii_scanner.py,sha256=T6gQ1APFrSDk980fjnv4ZMF-UztbJgmUFSwGrwWixEw,10802
|
|
69
70
|
memvcs/core/privacy_budget.py,sha256=fOPlxoKEAmsKtda-OJCrSaKjTyw7ekcqdN7KfRBw1CY,2113
|
|
70
71
|
memvcs/core/refs.py,sha256=4Nx2ZVRa_DzfUZ4O1AwzOHEjoGAEICJKqSd9GxaiD_g,16754
|
|
71
|
-
memvcs/core/remote.py,sha256=
|
|
72
|
+
memvcs/core/remote.py,sha256=1PINc6qYBIHRkNLMS8MLWM5DJIrX81uIfRrV6fXwwro,19495
|
|
72
73
|
memvcs/core/repository.py,sha256=NzC2UFPv6ePxi5lfiSKyZFLclH4bJpWJz88pY7tDiv4,20605
|
|
73
74
|
memvcs/core/schema.py,sha256=_CrEWCdArc0yDJ04GT7fyvjHqkal7gegdFSsFOjVpBc,15287
|
|
74
75
|
memvcs/core/staging.py,sha256=dptdGi_74lhDkcGqGVU39ZyTkb25j-Rnkz0GWi83W1k,7221
|
|
@@ -76,7 +77,7 @@ memvcs/core/temporal_index.py,sha256=81hZHlVElp2UpXjseFVCdDUwxGM45zIU-y1dDlOhFHI
|
|
|
76
77
|
memvcs/core/test_runner.py,sha256=7-0jCvji63JRbVfy3LNQWIQ7VL5weulOoG7SY1-YJbw,11496
|
|
77
78
|
memvcs/core/trust.py,sha256=msx80Cl3bxyQTY8mFUKWY9P6l3zb1s8FafympgHwtpo,3494
|
|
78
79
|
memvcs/core/vector_store.py,sha256=yUAp5BlaAtjkrtsdY1I-vmAp_YIFgJykBoNlp5hcg0I,11063
|
|
79
|
-
memvcs/core/zk_proofs.py,sha256=
|
|
80
|
+
memvcs/core/zk_proofs.py,sha256=j9AyHucYe9tOSrlxDeUMGgpRHMvNFOl8s4Q0AQHLKP0,5514
|
|
80
81
|
memvcs/core/llm/__init__.py,sha256=vnjtE9Xlv9a2pZV88DMT9JaINkZ30hC9VLPL5lJRlps,236
|
|
81
82
|
memvcs/core/llm/anthropic_provider.py,sha256=O1eaCb9r464ajLJz-Gy8lGxBie5ojRUZ_5HdgRXO5KY,1540
|
|
82
83
|
memvcs/core/llm/base.py,sha256=qPzg3KPAMeoyWGc_2JoVR4-plpdft5Rc2g9uO-Z4fJQ,623
|
|
@@ -98,8 +99,8 @@ memvcs/retrieval/recaller.py,sha256=8KY-XjMUz5_vcKf46zI64uk1DEM__u7wM92ShukOtsY,
|
|
|
98
99
|
memvcs/retrieval/strategies.py,sha256=26yxQQubQfjxWQXknfVMxuzPHf2EcZxJg_B99BEdl5c,11458
|
|
99
100
|
memvcs/utils/__init__.py,sha256=8psUzz4Ntv2GzbRebkeVsoyC6Ck-FIwi0_lfYdj5oho,185
|
|
100
101
|
memvcs/utils/helpers.py,sha256=37zg_DcQ2y99b9NSLqxFkglHe13rJXKhFDpEbQ7iLhM,4121
|
|
101
|
-
agmem-0.1.
|
|
102
|
-
agmem-0.1.
|
|
103
|
-
agmem-0.1.
|
|
104
|
-
agmem-0.1.
|
|
105
|
-
agmem-0.1.
|
|
102
|
+
agmem-0.1.5.dist-info/METADATA,sha256=q_9dsCFXbo9DGn6Hx4-7A9T3aHq9Sc5nS7ldEoazAdc,37487
|
|
103
|
+
agmem-0.1.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
104
|
+
agmem-0.1.5.dist-info/entry_points.txt,sha256=at7eWycgjqOo1wbUMECnXUsNo3gpCkJTU71OzrGLHu0,42
|
|
105
|
+
agmem-0.1.5.dist-info/top_level.txt,sha256=HtMMsKuwLKLOdgF1GxqQztqFM54tTJctVdJuOec6B-4,7
|
|
106
|
+
agmem-0.1.5.dist-info/RECORD,,
|
memvcs/commands/daemon.py
CHANGED
|
@@ -205,13 +205,29 @@ class DaemonCommand:
|
|
|
205
205
|
|
|
206
206
|
# Health monitoring: periodic integrity check (configurable interval)
|
|
207
207
|
last_health_check = 0
|
|
208
|
-
health_check_interval = 3600 # 1 hour
|
|
208
|
+
health_check_interval = 3600 # default 1 hour
|
|
209
|
+
try:
|
|
210
|
+
from ..core.config_loader import load_agmem_config
|
|
211
|
+
|
|
212
|
+
config = load_agmem_config(repo.root)
|
|
213
|
+
daemon_cfg = config.get("daemon") or {}
|
|
214
|
+
health_check_interval = int(daemon_cfg.get("health_check_interval_seconds", 3600))
|
|
215
|
+
if health_check_interval <= 0:
|
|
216
|
+
health_check_interval = 0
|
|
217
|
+
except Exception:
|
|
218
|
+
pass
|
|
219
|
+
env_interval = os.environ.get("AGMEM_DAEMON_HEALTH_INTERVAL")
|
|
220
|
+
if env_interval is not None:
|
|
221
|
+
try:
|
|
222
|
+
health_check_interval = int(env_interval)
|
|
223
|
+
except ValueError:
|
|
224
|
+
pass
|
|
209
225
|
|
|
210
226
|
try:
|
|
211
227
|
while running:
|
|
212
228
|
time.sleep(1)
|
|
213
229
|
|
|
214
|
-
# Periodic health check (Merkle/signature, optional)
|
|
230
|
+
# Periodic health check (Merkle/signature, optional). Alert only; no destructive action.
|
|
215
231
|
if (
|
|
216
232
|
health_check_interval
|
|
217
233
|
and (time.time() - last_health_check) >= health_check_interval
|
|
@@ -229,8 +245,10 @@ class DaemonCommand:
|
|
|
229
245
|
load_public_key(repo.mem_dir),
|
|
230
246
|
mem_dir=repo.mem_dir,
|
|
231
247
|
)
|
|
232
|
-
if not ok and err
|
|
248
|
+
if not ok and err:
|
|
233
249
|
sys.stderr.write(f"Health check: {err}\n")
|
|
250
|
+
if "tampered" in (err or "").lower():
|
|
251
|
+
sys.stderr.write("Run 'agmem fsck' for safe integrity check.\n")
|
|
234
252
|
except Exception:
|
|
235
253
|
pass
|
|
236
254
|
last_health_check = time.time()
|
memvcs/commands/distill.py
CHANGED
|
@@ -53,21 +53,29 @@ class DistillCommand:
|
|
|
53
53
|
if code != 0:
|
|
54
54
|
return code
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
use_dp = getattr(args, "private", False)
|
|
57
|
+
dp_epsilon = None
|
|
58
|
+
dp_delta = None
|
|
59
|
+
if use_dp:
|
|
57
60
|
from ..core.privacy_budget import load_budget, spend_epsilon
|
|
58
61
|
|
|
59
|
-
spent, max_eps,
|
|
62
|
+
spent, max_eps, delta = load_budget(repo.mem_dir)
|
|
60
63
|
epsilon_cost = 0.1
|
|
61
64
|
if not spend_epsilon(repo.mem_dir, epsilon_cost):
|
|
62
65
|
print(f"Privacy budget exceeded (spent {spent:.2f}, max {max_eps}).")
|
|
63
66
|
return 1
|
|
64
67
|
if spent + epsilon_cost > max_eps * 0.8:
|
|
65
68
|
print(f"Privacy budget low: {spent + epsilon_cost:.2f}/{max_eps}")
|
|
69
|
+
dp_epsilon = 0.05
|
|
70
|
+
dp_delta = delta
|
|
66
71
|
|
|
67
72
|
config = DistillerConfig(
|
|
68
73
|
source_dir=args.source,
|
|
69
74
|
target_dir=args.target,
|
|
70
75
|
create_safety_branch=not args.no_branch,
|
|
76
|
+
use_dp=use_dp,
|
|
77
|
+
dp_epsilon=dp_epsilon,
|
|
78
|
+
dp_delta=dp_delta,
|
|
71
79
|
)
|
|
72
80
|
distiller = Distiller(repo, config)
|
|
73
81
|
|
memvcs/commands/federated.py
CHANGED
|
@@ -38,7 +38,13 @@ class FederatedCommand:
|
|
|
38
38
|
return 1
|
|
39
39
|
|
|
40
40
|
if args.action == "push":
|
|
41
|
-
summary = produce_local_summary(
|
|
41
|
+
summary = produce_local_summary(
|
|
42
|
+
repo.root,
|
|
43
|
+
cfg["memory_types"],
|
|
44
|
+
use_dp=cfg.get("use_dp", False),
|
|
45
|
+
dp_epsilon=cfg.get("dp_epsilon") or 0.1,
|
|
46
|
+
dp_delta=cfg.get("dp_delta") or 1e-5,
|
|
47
|
+
)
|
|
42
48
|
msg = push_updates(repo.root, summary)
|
|
43
49
|
print(msg)
|
|
44
50
|
return 0 if "Pushed" in msg else 1
|
memvcs/commands/garden.py
CHANGED
|
@@ -50,14 +50,19 @@ class GardenCommand:
|
|
|
50
50
|
if code != 0:
|
|
51
51
|
return code
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
use_dp = getattr(args, "private", False)
|
|
54
|
+
dp_epsilon = None
|
|
55
|
+
dp_delta = None
|
|
56
|
+
if use_dp:
|
|
54
57
|
from ..core.privacy_budget import load_budget, spend_epsilon
|
|
55
58
|
|
|
56
|
-
spent, max_eps,
|
|
59
|
+
spent, max_eps, delta = load_budget(repo.mem_dir)
|
|
57
60
|
epsilon_cost = 0.1
|
|
58
61
|
if not spend_epsilon(repo.mem_dir, epsilon_cost):
|
|
59
62
|
print(f"Privacy budget exceeded (spent {spent:.2f}, max {max_eps}).")
|
|
60
63
|
return 1
|
|
64
|
+
dp_epsilon = 0.05
|
|
65
|
+
dp_delta = delta
|
|
61
66
|
|
|
62
67
|
# Build config
|
|
63
68
|
config = GardenerConfig(
|
|
@@ -65,6 +70,9 @@ class GardenCommand:
|
|
|
65
70
|
auto_commit=not args.no_commit,
|
|
66
71
|
llm_provider=args.llm if args.llm != "none" else None,
|
|
67
72
|
llm_model=args.model,
|
|
73
|
+
use_dp=use_dp,
|
|
74
|
+
dp_epsilon=dp_epsilon,
|
|
75
|
+
dp_delta=dp_delta,
|
|
68
76
|
)
|
|
69
77
|
|
|
70
78
|
# Create gardener
|
memvcs/commands/gc.py
CHANGED
|
@@ -7,7 +7,7 @@ Remove unreachable objects; optionally repack.
|
|
|
7
7
|
import argparse
|
|
8
8
|
|
|
9
9
|
from ..commands.base import require_repo
|
|
10
|
-
from ..core.pack import run_gc
|
|
10
|
+
from ..core.pack import run_gc, run_repack
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class GcCommand:
|
|
@@ -30,6 +30,11 @@ class GcCommand:
|
|
|
30
30
|
metavar="N",
|
|
31
31
|
help="Consider reflog entries within N days (default 90)",
|
|
32
32
|
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--repack",
|
|
35
|
+
action="store_true",
|
|
36
|
+
help="After GC, pack reachable loose objects into a pack file",
|
|
37
|
+
)
|
|
33
38
|
|
|
34
39
|
@staticmethod
|
|
35
40
|
def execute(args) -> int:
|
|
@@ -48,4 +53,16 @@ class GcCommand:
|
|
|
48
53
|
print(f"Would remove {deleted} unreachable object(s) ({freed} bytes).")
|
|
49
54
|
else:
|
|
50
55
|
print(f"Removed {deleted} unreachable object(s) ({freed} bytes reclaimed).")
|
|
56
|
+
|
|
57
|
+
if getattr(args, "repack", False) and not args.dry_run:
|
|
58
|
+
packed, repack_freed = run_repack(
|
|
59
|
+
repo.mem_dir,
|
|
60
|
+
repo.object_store,
|
|
61
|
+
gc_prune_days=gc_prune_days,
|
|
62
|
+
dry_run=False,
|
|
63
|
+
)
|
|
64
|
+
if packed > 0:
|
|
65
|
+
print(
|
|
66
|
+
f"Packed {packed} object(s) into pack file ({repack_freed} bytes from loose)."
|
|
67
|
+
)
|
|
51
68
|
return 0
|
memvcs/commands/prove.py
CHANGED
|
@@ -57,10 +57,12 @@ class ProveCommand:
|
|
|
57
57
|
if not args.value:
|
|
58
58
|
print("--value required for freshness (ISO date)")
|
|
59
59
|
return 1
|
|
60
|
-
ok = prove_memory_freshness(path, args.value, out_path)
|
|
60
|
+
ok = prove_memory_freshness(path, args.value, out_path, mem_dir=repo.mem_dir)
|
|
61
61
|
|
|
62
62
|
if not ok:
|
|
63
|
-
print(
|
|
63
|
+
print(
|
|
64
|
+
"Proof generation failed (keyword not in file, or signing key not set for freshness)."
|
|
65
|
+
)
|
|
64
66
|
return 1
|
|
65
67
|
print(f"Proof written to {out_path}")
|
|
66
68
|
return 0
|
memvcs/commands/timeline.py
CHANGED
|
@@ -28,6 +28,18 @@ class TimelineCommand:
|
|
|
28
28
|
default=20,
|
|
29
29
|
help="Max commits to show (default: 20)",
|
|
30
30
|
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--from",
|
|
33
|
+
dest="from_ts",
|
|
34
|
+
metavar="ISO",
|
|
35
|
+
help="Start of time range (ISO 8601, e.g. 2025-01-01)",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--to",
|
|
39
|
+
dest="to_ts",
|
|
40
|
+
metavar="ISO",
|
|
41
|
+
help="End of time range (ISO 8601)",
|
|
42
|
+
)
|
|
31
43
|
|
|
32
44
|
@staticmethod
|
|
33
45
|
def execute(args) -> int:
|
|
@@ -36,6 +48,18 @@ class TimelineCommand:
|
|
|
36
48
|
return code
|
|
37
49
|
|
|
38
50
|
filepath = args.file.replace("current/", "").lstrip("/")
|
|
51
|
+
from_ts = getattr(args, "from_ts", None)
|
|
52
|
+
to_ts = getattr(args, "to_ts", None)
|
|
53
|
+
commits_in_range = None
|
|
54
|
+
if from_ts and to_ts:
|
|
55
|
+
try:
|
|
56
|
+
from ..core.temporal_index import TemporalIndex
|
|
57
|
+
|
|
58
|
+
ti = TemporalIndex(repo.mem_dir, repo.object_store)
|
|
59
|
+
range_entries = ti.range_query(from_ts, to_ts)
|
|
60
|
+
commits_in_range = {ch for _, ch in range_entries}
|
|
61
|
+
except Exception:
|
|
62
|
+
pass
|
|
39
63
|
|
|
40
64
|
# Walk commit history
|
|
41
65
|
head = repo.refs.get_head()
|
|
@@ -51,6 +75,10 @@ class TimelineCommand:
|
|
|
51
75
|
if commit_hash in seen:
|
|
52
76
|
break
|
|
53
77
|
seen.add(commit_hash)
|
|
78
|
+
if commits_in_range is not None and commit_hash not in commits_in_range:
|
|
79
|
+
commit = Commit.load(repo.object_store, commit_hash)
|
|
80
|
+
commit_hash = commit.parents[0] if commit and commit.parents else None
|
|
81
|
+
continue
|
|
54
82
|
|
|
55
83
|
commit = Commit.load(repo.object_store, commit_hash)
|
|
56
84
|
if not commit:
|
memvcs/commands/when.py
CHANGED
|
@@ -34,6 +34,18 @@ class WhenCommand:
|
|
|
34
34
|
default=10,
|
|
35
35
|
help="Max commits to report (default: 10)",
|
|
36
36
|
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--from",
|
|
39
|
+
dest="from_ts",
|
|
40
|
+
metavar="ISO",
|
|
41
|
+
help="Start of time range (ISO 8601)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--to",
|
|
45
|
+
dest="to_ts",
|
|
46
|
+
metavar="ISO",
|
|
47
|
+
help="End of time range (ISO 8601)",
|
|
48
|
+
)
|
|
37
49
|
|
|
38
50
|
@staticmethod
|
|
39
51
|
def execute(args) -> int:
|
|
@@ -48,6 +60,18 @@ class WhenCommand:
|
|
|
48
60
|
|
|
49
61
|
fact_lower = args.fact.lower()
|
|
50
62
|
file_filter = args.file.replace("current/", "").lstrip("/") if args.file else None
|
|
63
|
+
from_ts = getattr(args, "from_ts", None)
|
|
64
|
+
to_ts = getattr(args, "to_ts", None)
|
|
65
|
+
commits_in_range = None
|
|
66
|
+
if from_ts and to_ts:
|
|
67
|
+
try:
|
|
68
|
+
from ..core.temporal_index import TemporalIndex
|
|
69
|
+
|
|
70
|
+
ti = TemporalIndex(repo.mem_dir, repo.object_store)
|
|
71
|
+
range_entries = ti.range_query(from_ts, to_ts)
|
|
72
|
+
commits_in_range = {ch for _, ch in range_entries}
|
|
73
|
+
except Exception:
|
|
74
|
+
pass
|
|
51
75
|
|
|
52
76
|
# Walk commit history from HEAD
|
|
53
77
|
head = repo.refs.get_head()
|
|
@@ -63,6 +87,10 @@ class WhenCommand:
|
|
|
63
87
|
if commit_hash in seen:
|
|
64
88
|
break
|
|
65
89
|
seen.add(commit_hash)
|
|
90
|
+
if commits_in_range is not None and commit_hash not in commits_in_range:
|
|
91
|
+
commit = Commit.load(repo.object_store, commit_hash)
|
|
92
|
+
commit_hash = commit.parents[0] if commit and commit.parents else None
|
|
93
|
+
continue
|
|
66
94
|
|
|
67
95
|
commit = Commit.load(repo.object_store, commit_hash)
|
|
68
96
|
if not commit:
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced semantic compression pipeline for agmem (#11).
|
|
3
|
+
|
|
4
|
+
Multi-stage: chunk -> fact extraction -> dedup -> embed -> tiered storage.
|
|
5
|
+
Hybrid retrieval (keyword + vector) is in memvcs.retrieval.strategies.HybridStrategy.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Optional, Tuple, Any
|
|
12
|
+
|
|
13
|
+
from .constants import MEMORY_TYPES
|
|
14
|
+
|
|
15
|
+
CHUNK_SIZE_DEFAULT = 512
|
|
16
|
+
CHUNK_OVERLAP = 64
|
|
17
|
+
DEDUP_HASH_ALGO = "sha256"
|
|
18
|
+
TIER_HOT_DAYS = 7
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def chunk_by_size(
|
|
22
|
+
text: str, size: int = CHUNK_SIZE_DEFAULT, overlap: int = CHUNK_OVERLAP
|
|
23
|
+
) -> List[str]:
|
|
24
|
+
"""Split text into chunks by character size with optional overlap."""
|
|
25
|
+
if not text or size <= 0:
|
|
26
|
+
return []
|
|
27
|
+
chunks = []
|
|
28
|
+
start = 0
|
|
29
|
+
while start < len(text):
|
|
30
|
+
end = min(start + size, len(text))
|
|
31
|
+
chunk = text[start:end].strip()
|
|
32
|
+
if chunk:
|
|
33
|
+
chunks.append(chunk)
|
|
34
|
+
start = end - overlap if end < len(text) else len(text)
|
|
35
|
+
return chunks
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def chunk_by_sentences(text: str, max_chunk_chars: int = 512) -> List[str]:
|
|
39
|
+
"""Split text into chunks by sentence boundaries, up to max_chunk_chars per chunk."""
|
|
40
|
+
if not text:
|
|
41
|
+
return []
|
|
42
|
+
sentences = re.split(r"(?<=[.!?])\s+", text)
|
|
43
|
+
chunks = []
|
|
44
|
+
current = []
|
|
45
|
+
current_len = 0
|
|
46
|
+
for s in sentences:
|
|
47
|
+
s = s.strip()
|
|
48
|
+
if not s:
|
|
49
|
+
continue
|
|
50
|
+
if current_len + len(s) + 1 <= max_chunk_chars:
|
|
51
|
+
current.append(s)
|
|
52
|
+
current_len += len(s) + 1
|
|
53
|
+
else:
|
|
54
|
+
if current:
|
|
55
|
+
chunks.append(" ".join(current))
|
|
56
|
+
current = [s]
|
|
57
|
+
current_len = len(s) + 1
|
|
58
|
+
if current:
|
|
59
|
+
chunks.append(" ".join(current))
|
|
60
|
+
return chunks
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def extract_facts_from_chunk(chunk: str) -> List[str]:
|
|
64
|
+
"""Extract fact-like lines (bullets or short statements). Reuse distiller logic in callers if needed."""
|
|
65
|
+
facts = []
|
|
66
|
+
for line in chunk.splitlines():
|
|
67
|
+
line = line.strip()
|
|
68
|
+
if not line or line.startswith("#"):
|
|
69
|
+
continue
|
|
70
|
+
if line.startswith("- ") and len(line) > 10:
|
|
71
|
+
facts.append(line)
|
|
72
|
+
elif len(line) > 20 and len(line) < 300 and not line.startswith("```"):
|
|
73
|
+
facts.append(line)
|
|
74
|
+
return facts[:15]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def dedup_by_hash(items: List[str]) -> List[Tuple[str, str]]:
|
|
78
|
+
"""Return (item, hash_hex) for unique items by content hash. Order preserved, first occurrence kept."""
|
|
79
|
+
seen_hashes = set()
|
|
80
|
+
result = []
|
|
81
|
+
for item in items:
|
|
82
|
+
h = hashlib.new(DEDUP_HASH_ALGO, item.encode()).hexdigest()
|
|
83
|
+
if h not in seen_hashes:
|
|
84
|
+
seen_hashes.add(h)
|
|
85
|
+
result.append((item, h))
|
|
86
|
+
return result
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def dedup_by_similarity_threshold(
|
|
90
|
+
items: List[str], vector_store: Any, threshold: float = 0.95
|
|
91
|
+
) -> List[str]:
|
|
92
|
+
"""Filter items by embedding similarity; keep first of clusters above threshold. Requires vector_store."""
|
|
93
|
+
if not items or vector_store is None:
|
|
94
|
+
return items
|
|
95
|
+
try:
|
|
96
|
+
embeddings = vector_store.embed(items)
|
|
97
|
+
kept = [items[0]]
|
|
98
|
+
for i in range(1, len(items)):
|
|
99
|
+
sims = [
|
|
100
|
+
vector_store.similarity(embeddings[i], vector_store.embed([kept[j]])[0])
|
|
101
|
+
for j in range(len(kept))
|
|
102
|
+
]
|
|
103
|
+
if not any(s >= threshold for s in sims):
|
|
104
|
+
kept.append(items[i])
|
|
105
|
+
return kept
|
|
106
|
+
except Exception:
|
|
107
|
+
return items
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class CompressionPipeline:
|
|
111
|
+
"""
|
|
112
|
+
Multi-stage compression: chunk -> optional fact extraction -> dedup -> optional embed -> tiered storage.
|
|
113
|
+
Wire to vector_store and retrieval for hybrid recall.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
chunk_size: int = CHUNK_SIZE_DEFAULT,
|
|
119
|
+
use_sentences: bool = True,
|
|
120
|
+
extract_facts: bool = False,
|
|
121
|
+
dedup_hash: bool = True,
|
|
122
|
+
vector_store: Optional[Any] = None,
|
|
123
|
+
tier_by_recency: bool = True,
|
|
124
|
+
):
|
|
125
|
+
self.chunk_size = chunk_size
|
|
126
|
+
self.use_sentences = use_sentences
|
|
127
|
+
self.extract_facts = extract_facts
|
|
128
|
+
self.dedup_hash = dedup_hash
|
|
129
|
+
self.vector_store = vector_store
|
|
130
|
+
self.tier_by_recency = tier_by_recency
|
|
131
|
+
|
|
132
|
+
def chunk(self, text: str) -> List[str]:
|
|
133
|
+
"""Chunk text by size or sentences."""
|
|
134
|
+
if self.use_sentences:
|
|
135
|
+
return chunk_by_sentences(text, max_chunk_chars=self.chunk_size)
|
|
136
|
+
return chunk_by_size(text, size=self.chunk_size)
|
|
137
|
+
|
|
138
|
+
def run(self, text: str, path: Optional[Path] = None) -> List[Tuple[str, str, Optional[str]]]:
|
|
139
|
+
"""
|
|
140
|
+
Run pipeline: chunk -> optional fact extraction -> dedup.
|
|
141
|
+
Returns list of (content, content_hash, tier) where tier is "hot" or "cold" or None.
|
|
142
|
+
"""
|
|
143
|
+
chunks = self.chunk(text)
|
|
144
|
+
if self.extract_facts:
|
|
145
|
+
facts = []
|
|
146
|
+
for c in chunks:
|
|
147
|
+
facts.extend(extract_facts_from_chunk(c))
|
|
148
|
+
chunks = facts if facts else chunks
|
|
149
|
+
if self.dedup_hash:
|
|
150
|
+
chunk_tuples = dedup_by_hash(chunks)
|
|
151
|
+
else:
|
|
152
|
+
chunk_tuples = [
|
|
153
|
+
(c, hashlib.new(DEDUP_HASH_ALGO, c.encode()).hexdigest()) for c in chunks
|
|
154
|
+
]
|
|
155
|
+
tier = None
|
|
156
|
+
if self.tier_by_recency and path and path.exists():
|
|
157
|
+
try:
|
|
158
|
+
mtime = path.stat().st_mtime
|
|
159
|
+
from datetime import datetime, timezone
|
|
160
|
+
|
|
161
|
+
age_days = (datetime.now(timezone.utc).timestamp() - mtime) / 86400
|
|
162
|
+
tier = "hot" if age_days <= TIER_HOT_DAYS else "cold"
|
|
163
|
+
except Exception:
|
|
164
|
+
pass
|
|
165
|
+
return [(c, h, tier) for c, h in chunk_tuples]
|