agmem 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.2.0/agmem.egg-info → agmem-0.2.1}/PKG-INFO +5 -4
- {agmem-0.2.0 → agmem-0.2.1}/README.md +4 -3
- {agmem-0.2.0 → agmem-0.2.1/agmem.egg-info}/PKG-INFO +5 -4
- {agmem-0.2.0 → agmem-0.2.1}/agmem.egg-info/SOURCES.txt +6 -9
- {agmem-0.2.0 → agmem-0.2.1}/docs/FEDERATED.md +12 -5
- {agmem-0.2.0 → agmem-0.2.1}/docs/TEST_REPORT.md +7 -3
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/__init__.py +1 -1
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/cli.py +1 -1
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/coordinator/server.py +18 -2
- agmem-0.2.1/memvcs/core/compression_metrics.py +248 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/distiller.py +3 -12
- agmem-0.2.1/memvcs/core/fast_similarity.py +404 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/federated.py +13 -2
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/gardener.py +8 -68
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/pack.py +1 -1
- agmem-0.2.1/memvcs/core/privacy_validator.py +187 -0
- agmem-0.2.1/memvcs/core/protocol_builder.py +198 -0
- {agmem-0.2.0 → agmem-0.2.1}/pyproject.toml +1 -1
- {agmem-0.2.0 → agmem-0.2.1}/setup.py +1 -1
- agmem-0.2.1/tests/test_distiller_dp.py +52 -0
- agmem-0.2.1/tests/test_performance_benchmarks.py +257 -0
- agmem-0.2.0/docs/FINAL_COMPLETION_REPORT.md +0 -613
- agmem-0.2.0/docs/FINAL_STATUS_REPORT.md +0 -295
- agmem-0.2.0/docs/HEALTH_MONITORING.md +0 -265
- agmem-0.2.0/docs/IMPLEMENTATION_COMPLETE_SUMMARY.md +0 -508
- agmem-0.2.0/docs/PACKAGE_UPDATES_VERIFICATION.md +0 -209
- agmem-0.2.0/docs/QUICK_REFERENCE.md +0 -339
- agmem-0.2.0/docs/STEP10_SOLID_REFACTORING_COMPLETION.md +0 -495
- agmem-0.2.0/docs/STEP8_HEALTH_MONITORING_COMPLETION.md +0 -293
- agmem-0.2.0/docs/STEP9_DELTA_ENCODING_COMPLETION.md +0 -233
- {agmem-0.2.0 → agmem-0.2.1}/LICENSE +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/MANIFEST.in +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/agmem.egg-info/dependency_links.txt +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/agmem.egg-info/entry_points.txt +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/agmem.egg-info/requires.txt +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/agmem.egg-info/top_level.txt +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/docs/AGMEM_PUBLISHING_SETUP.md +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/docs/CONFIG.md +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/docs/GTM.md +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/docs/KNOWLEDGE_GRAPH.md +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/docs/SEQUENTIAL_VALIDATION.md +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/docs/aux/INSTALL.md +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/examples/basic_workflow.sh +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/add.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/audit.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/base.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/blame.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/branch.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/checkout.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/clean.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/clone.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/commit.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/daemon.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/decay.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/diff.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/distill.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/federated.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/fsck.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/garden.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/gc.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/graph.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/init.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/log.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/mcp.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/merge.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/pack.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/prove.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/pull.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/push.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/recall.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/reflog.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/remote.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/repair.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/reset.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/resolve.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/resurrect.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/search.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/serve.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/show.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/stash.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/status.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/tag.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/test.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/timeline.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/tree.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/verify.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/commands/when.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/coordinator/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/access_index.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/audit.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/compression_pipeline.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/config_loader.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/consistency.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/constants.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/crypto_verify.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/decay.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/delta.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/diff.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/encryption.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/hooks.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/ipfs_remote.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/knowledge_graph.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/llm/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/llm/anthropic_provider.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/llm/base.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/llm/factory.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/llm/openai_provider.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/merge.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/objects.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/pii_scanner.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/privacy_budget.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/refs.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/remote.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/repository.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/schema.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/staging.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/storage/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/storage/base.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/storage/gcs.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/storage/local.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/storage/s3.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/temporal_index.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/test_runner.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/trust.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/vector_store.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/core/zk_proofs.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/health/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/health/monitor.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/integrations/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/integrations/mcp_server.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/integrations/web_ui/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/integrations/web_ui/server.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/retrieval/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/retrieval/base.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/retrieval/pack.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/retrieval/recaller.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/retrieval/strategies.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/utils/__init__.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/memvcs/utils/helpers.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/setup.cfg +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_access_index.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_advanced_commands.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_audit.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_commit_importance.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_compression_pipeline.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_consistency.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_crypto_verify.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_decay.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_delta_encoding.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_edge_cases.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_encryption.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_federated.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_health_monitor.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_ipfs_integration.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_ipfs_remote.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_llm_provider.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_objects.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_pack_gc.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_pii.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_plan_features.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_privacy_budget.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_repository.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_resolve_helpers.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_retrieval.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_temporal_index.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_trust.py +0 -0
- {agmem-0.2.0 → agmem-0.2.1}/tests/test_zk_proofs.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agmem
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Agentic Memory Version Control System - Git for AI agent memories
|
|
5
5
|
Home-page: https://github.com/vivek-tiwari-vt/agmem
|
|
6
6
|
Author: agmem Team
|
|
@@ -137,14 +137,15 @@ agmem solves all of these problems with a familiar Git-like interface.
|
|
|
137
137
|
- ✅ **Tamper-evident audit trail** — Append-only hash-chained log (init, add, commit, checkout, merge, push, pull, config); `agmem audit` and `agmem audit --verify`
|
|
138
138
|
- ✅ **Multi-agent trust** — Trust store (full / conditional / untrusted) per public key; applied on pull/merge; clone copies remote keys
|
|
139
139
|
- ✅ **Conflict resolution** — `agmem resolve` with ours/theirs/both; conflicts persisted in `.mem/merge/`; path-safe
|
|
140
|
-
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise
|
|
140
|
+
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise applies to fact-level data only (metadata fields excluded)
|
|
141
141
|
- ✅ **Pack files & GC** — `agmem gc [--repack]` (reachable from refs, prune loose, optional pack file + index); ObjectStore reads from pack when loose missing
|
|
142
142
|
- ✅ **Multi-provider LLM** — OpenAI and Anthropic via `memvcs.core.llm`; config/repo or env; used by gardener, distiller, consistency, merge
|
|
143
143
|
- ✅ **Temporal querying** — Point-in-time and range queries in temporal index; frontmatter timestamps
|
|
144
|
-
- ✅ **Federated collaboration** — `agmem federated push|pull`;
|
|
144
|
+
- ✅ **Federated collaboration** — `agmem federated push|pull`; protocol-compliant summaries (agent_id, timestamp, topic_counts, fact_hashes); optional DP on outbound; coordinator API in docs/FEDERATED.md
|
|
145
145
|
- ✅ **Zero-knowledge proofs** — `agmem prove` (hash/signature-based): keyword containment (Merkle set membership), memory freshness (signed timestamp). **Note:** Current implementation is proof-of-knowledge with known limitations; see docs for migration to true zk-SNARKs.
|
|
146
146
|
- ✅ **Daemon health** — 4-point health monitoring (storage, redundancy, staleness, graph consistency) with periodic checks; visible warnings and JSON reports
|
|
147
|
-
- ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher;
|
|
147
|
+
- ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher; enabled in GC repack with multi-tier similarity filtering
|
|
148
|
+
- ✅ **Performance safeguards** — Multi-tier similarity filter (length ratio + SimHash) avoids O(n²×m²) worst-case comparisons
|
|
148
149
|
- ✅ **GPU acceleration** — Vector store detects GPU for embedding model when available
|
|
149
150
|
- ✅ **Optional** — `serve`, `daemon` (watch + auto-commit), `garden` (episode archival), MCP server; install extras as needed
|
|
150
151
|
|
|
@@ -37,14 +37,15 @@ agmem solves all of these problems with a familiar Git-like interface.
|
|
|
37
37
|
- ✅ **Tamper-evident audit trail** — Append-only hash-chained log (init, add, commit, checkout, merge, push, pull, config); `agmem audit` and `agmem audit --verify`
|
|
38
38
|
- ✅ **Multi-agent trust** — Trust store (full / conditional / untrusted) per public key; applied on pull/merge; clone copies remote keys
|
|
39
39
|
- ✅ **Conflict resolution** — `agmem resolve` with ours/theirs/both; conflicts persisted in `.mem/merge/`; path-safe
|
|
40
|
-
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise
|
|
40
|
+
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise applies to fact-level data only (metadata fields excluded)
|
|
41
41
|
- ✅ **Pack files & GC** — `agmem gc [--repack]` (reachable from refs, prune loose, optional pack file + index); ObjectStore reads from pack when loose missing
|
|
42
42
|
- ✅ **Multi-provider LLM** — OpenAI and Anthropic via `memvcs.core.llm`; config/repo or env; used by gardener, distiller, consistency, merge
|
|
43
43
|
- ✅ **Temporal querying** — Point-in-time and range queries in temporal index; frontmatter timestamps
|
|
44
|
-
- ✅ **Federated collaboration** — `agmem federated push|pull`;
|
|
44
|
+
- ✅ **Federated collaboration** — `agmem federated push|pull`; protocol-compliant summaries (agent_id, timestamp, topic_counts, fact_hashes); optional DP on outbound; coordinator API in docs/FEDERATED.md
|
|
45
45
|
- ✅ **Zero-knowledge proofs** — `agmem prove` (hash/signature-based): keyword containment (Merkle set membership), memory freshness (signed timestamp). **Note:** Current implementation is proof-of-knowledge with known limitations; see docs for migration to true zk-SNARKs.
|
|
46
46
|
- ✅ **Daemon health** — 4-point health monitoring (storage, redundancy, staleness, graph consistency) with periodic checks; visible warnings and JSON reports
|
|
47
|
-
- ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher;
|
|
47
|
+
- ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher; enabled in GC repack with multi-tier similarity filtering
|
|
48
|
+
- ✅ **Performance safeguards** — Multi-tier similarity filter (length ratio + SimHash) avoids O(n²×m²) worst-case comparisons
|
|
48
49
|
- ✅ **GPU acceleration** — Vector store detects GPU for embedding model when available
|
|
49
50
|
- ✅ **Optional** — `serve`, `daemon` (watch + auto-commit), `garden` (episode archival), MCP server; install extras as needed
|
|
50
51
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agmem
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Agentic Memory Version Control System - Git for AI agent memories
|
|
5
5
|
Home-page: https://github.com/vivek-tiwari-vt/agmem
|
|
6
6
|
Author: agmem Team
|
|
@@ -137,14 +137,15 @@ agmem solves all of these problems with a familiar Git-like interface.
|
|
|
137
137
|
- ✅ **Tamper-evident audit trail** — Append-only hash-chained log (init, add, commit, checkout, merge, push, pull, config); `agmem audit` and `agmem audit --verify`
|
|
138
138
|
- ✅ **Multi-agent trust** — Trust store (full / conditional / untrusted) per public key; applied on pull/merge; clone copies remote keys
|
|
139
139
|
- ✅ **Conflict resolution** — `agmem resolve` with ours/theirs/both; conflicts persisted in `.mem/merge/`; path-safe
|
|
140
|
-
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise
|
|
140
|
+
- ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise applies to fact-level data only (metadata fields excluded)
|
|
141
141
|
- ✅ **Pack files & GC** — `agmem gc [--repack]` (reachable from refs, prune loose, optional pack file + index); ObjectStore reads from pack when loose missing
|
|
142
142
|
- ✅ **Multi-provider LLM** — OpenAI and Anthropic via `memvcs.core.llm`; config/repo or env; used by gardener, distiller, consistency, merge
|
|
143
143
|
- ✅ **Temporal querying** — Point-in-time and range queries in temporal index; frontmatter timestamps
|
|
144
|
-
- ✅ **Federated collaboration** — `agmem federated push|pull`;
|
|
144
|
+
- ✅ **Federated collaboration** — `agmem federated push|pull`; protocol-compliant summaries (agent_id, timestamp, topic_counts, fact_hashes); optional DP on outbound; coordinator API in docs/FEDERATED.md
|
|
145
145
|
- ✅ **Zero-knowledge proofs** — `agmem prove` (hash/signature-based): keyword containment (Merkle set membership), memory freshness (signed timestamp). **Note:** Current implementation is proof-of-knowledge with known limitations; see docs for migration to true zk-SNARKs.
|
|
146
146
|
- ✅ **Daemon health** — 4-point health monitoring (storage, redundancy, staleness, graph consistency) with periodic checks; visible warnings and JSON reports
|
|
147
|
-
- ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher;
|
|
147
|
+
- ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher; enabled in GC repack with multi-tier similarity filtering
|
|
148
|
+
- ✅ **Performance safeguards** — Multi-tier similarity filter (length ratio + SimHash) avoids O(n²×m²) worst-case comparisons
|
|
148
149
|
- ✅ **GPU acceleration** — Vector store detects GPU for embedding model when available
|
|
149
150
|
- ✅ **Optional** — `serve`, `daemon` (watch + auto-commit), `garden` (episode archival), MCP server; install extras as needed
|
|
150
151
|
|
|
@@ -12,18 +12,9 @@ agmem.egg-info/top_level.txt
|
|
|
12
12
|
docs/AGMEM_PUBLISHING_SETUP.md
|
|
13
13
|
docs/CONFIG.md
|
|
14
14
|
docs/FEDERATED.md
|
|
15
|
-
docs/FINAL_COMPLETION_REPORT.md
|
|
16
|
-
docs/FINAL_STATUS_REPORT.md
|
|
17
15
|
docs/GTM.md
|
|
18
|
-
docs/HEALTH_MONITORING.md
|
|
19
|
-
docs/IMPLEMENTATION_COMPLETE_SUMMARY.md
|
|
20
16
|
docs/KNOWLEDGE_GRAPH.md
|
|
21
|
-
docs/PACKAGE_UPDATES_VERIFICATION.md
|
|
22
|
-
docs/QUICK_REFERENCE.md
|
|
23
17
|
docs/SEQUENTIAL_VALIDATION.md
|
|
24
|
-
docs/STEP10_SOLID_REFACTORING_COMPLETION.md
|
|
25
|
-
docs/STEP8_HEALTH_MONITORING_COMPLETION.md
|
|
26
|
-
docs/STEP9_DELTA_ENCODING_COMPLETION.md
|
|
27
18
|
docs/TEST_REPORT.md
|
|
28
19
|
docs/aux/INSTALL.md
|
|
29
20
|
examples/basic_workflow.sh
|
|
@@ -79,6 +70,7 @@ memvcs/coordinator/server.py
|
|
|
79
70
|
memvcs/core/__init__.py
|
|
80
71
|
memvcs/core/access_index.py
|
|
81
72
|
memvcs/core/audit.py
|
|
73
|
+
memvcs/core/compression_metrics.py
|
|
82
74
|
memvcs/core/compression_pipeline.py
|
|
83
75
|
memvcs/core/config_loader.py
|
|
84
76
|
memvcs/core/consistency.py
|
|
@@ -89,6 +81,7 @@ memvcs/core/delta.py
|
|
|
89
81
|
memvcs/core/diff.py
|
|
90
82
|
memvcs/core/distiller.py
|
|
91
83
|
memvcs/core/encryption.py
|
|
84
|
+
memvcs/core/fast_similarity.py
|
|
92
85
|
memvcs/core/federated.py
|
|
93
86
|
memvcs/core/gardener.py
|
|
94
87
|
memvcs/core/hooks.py
|
|
@@ -99,6 +92,8 @@ memvcs/core/objects.py
|
|
|
99
92
|
memvcs/core/pack.py
|
|
100
93
|
memvcs/core/pii_scanner.py
|
|
101
94
|
memvcs/core/privacy_budget.py
|
|
95
|
+
memvcs/core/privacy_validator.py
|
|
96
|
+
memvcs/core/protocol_builder.py
|
|
102
97
|
memvcs/core/refs.py
|
|
103
98
|
memvcs/core/remote.py
|
|
104
99
|
memvcs/core/repository.py
|
|
@@ -141,6 +136,7 @@ tests/test_consistency.py
|
|
|
141
136
|
tests/test_crypto_verify.py
|
|
142
137
|
tests/test_decay.py
|
|
143
138
|
tests/test_delta_encoding.py
|
|
139
|
+
tests/test_distiller_dp.py
|
|
144
140
|
tests/test_edge_cases.py
|
|
145
141
|
tests/test_encryption.py
|
|
146
142
|
tests/test_federated.py
|
|
@@ -150,6 +146,7 @@ tests/test_ipfs_remote.py
|
|
|
150
146
|
tests/test_llm_provider.py
|
|
151
147
|
tests/test_objects.py
|
|
152
148
|
tests/test_pack_gc.py
|
|
149
|
+
tests/test_performance_benchmarks.py
|
|
153
150
|
tests/test_pii.py
|
|
154
151
|
tests/test_plan_features.py
|
|
155
152
|
tests/test_privacy_budget.py
|
|
@@ -23,7 +23,7 @@ In `.mem/config.json` or user config:
|
|
|
23
23
|
|
|
24
24
|
- `coordinator_url`: Base URL of the coordinator (no trailing slash).
|
|
25
25
|
- `memory_types`: Which memory dirs to include in the summary.
|
|
26
|
-
- `differential_privacy.enabled`: If true, numeric fields
|
|
26
|
+
- `differential_privacy.enabled`: If true, fact-level numeric fields are noised before push (metadata is exempt).
|
|
27
27
|
|
|
28
28
|
## Coordinator API
|
|
29
29
|
|
|
@@ -33,15 +33,22 @@ The coordinator must expose two endpoints.
|
|
|
33
33
|
|
|
34
34
|
**Request**
|
|
35
35
|
|
|
36
|
-
- Body: JSON object (
|
|
36
|
+
- Body: JSON object (protocol-compliant summary envelope).
|
|
37
37
|
- `Content-Type: application/json`.
|
|
38
38
|
|
|
39
39
|
**Summary shape**
|
|
40
40
|
|
|
41
|
+
Top-level envelope:
|
|
42
|
+
|
|
43
|
+
- `summary`: object containing the fields below.
|
|
44
|
+
|
|
45
|
+
Summary fields:
|
|
46
|
+
|
|
47
|
+
- `agent_id`: deterministic client identifier (SHA-256).
|
|
48
|
+
- `timestamp`: ISO-8601 UTC timestamp.
|
|
41
49
|
- `memory_types`: list of strings (e.g. `["episodic", "semantic"]`).
|
|
42
|
-
- `
|
|
43
|
-
- `
|
|
44
|
-
- `fact_count`: integer (total fact/file count; may be noised if DP enabled).
|
|
50
|
+
- `topic_counts`: dict of memory type → integer count (may be noised if DP enabled).
|
|
51
|
+
- `fact_hashes`: list of strings (hashes; no raw content).
|
|
45
52
|
|
|
46
53
|
**Response**
|
|
47
54
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# agmem Test Report — What Works and What Doesn’t
|
|
2
2
|
|
|
3
|
-
This report is based on full-flow tests (`scripts/test_full_flow.py`), manual command runs, security fixes, and the knowledge graph feature.
|
|
3
|
+
This report is based on full-flow tests (`scripts/test_full_flow.py`), automated pytest runs, manual command runs, security fixes, and the knowledge graph feature.
|
|
4
|
+
|
|
5
|
+
**Latest automated run (2026-02-01):** 246 passed, 5 skipped in ~45s.
|
|
4
6
|
|
|
5
7
|
---
|
|
6
8
|
|
|
@@ -82,11 +84,13 @@ This report is based on full-flow tests (`scripts/test_full_flow.py`), manual co
|
|
|
82
84
|
|------|--------|------|
|
|
83
85
|
| **Crypto** | ✅ Tests | Merkle build/verify, tampered blob fails verification, signature present but no public key. |
|
|
84
86
|
| **Encryption** | ✅ Tests | Round-trip, wrong key fails, corrupted ciphertext fails. |
|
|
85
|
-
| **Privacy budget** | ✅ Tests | load_budget, spend_epsilon, add_noise, Gardener/Distiller DP integration (mocked). |
|
|
87
|
+
| **Privacy budget** | ✅ Tests | load_budget, spend_epsilon, add_noise, Gardener/Distiller DP integration (mocked), DP sampling (no fixed seed), metadata fields exempted from noise. |
|
|
86
88
|
| **Pack/GC** | ✅ Tests | list_loose_objects, run_gc, write_pack, retrieve_from_pack, ObjectStore read from pack, run_repack dry-run. |
|
|
87
89
|
| **ZK proofs** | ✅ Tests | prove_keyword_containment / verify_proof round-trip; keyword not in file returns False; freshness (skipped without signing key). |
|
|
88
|
-
| **Federated** | ✅ Tests |
|
|
90
|
+
| **Federated** | ✅ Tests | protocol-compliant summary (agent_id, timestamp, topic_counts, fact_hashes), DP noising; push/pull with mock coordinator. |
|
|
89
91
|
| **IPFS** | ✅ Tests | parse_ipfs_url, bundle/unbundle round-trip; push/pull with mock gateway. |
|
|
92
|
+
| **Protocol & privacy** | ✅ Tests | schema validation, privacy audit (metadata noise rejection), strict mode enforcement. |
|
|
93
|
+
| **Performance** | ✅ Tests | Levenshtein, SimHash, multi-tier similarity filtering regression checks. |
|
|
90
94
|
|
|
91
95
|
### Security (vulnerability check)
|
|
92
96
|
|
|
@@ -141,7 +141,7 @@ For more information: https://github.com/vivek-tiwari-vt/agmem
|
|
|
141
141
|
""",
|
|
142
142
|
)
|
|
143
143
|
|
|
144
|
-
parser.add_argument("--version", "-v", action="version", version="%(prog)s 0.1
|
|
144
|
+
parser.add_argument("--version", "-v", action="version", version="%(prog)s 0.2.1")
|
|
145
145
|
|
|
146
146
|
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
|
147
147
|
|
|
@@ -21,6 +21,7 @@ from typing import Dict, List, Optional, Any
|
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
import json
|
|
23
23
|
import hashlib
|
|
24
|
+
import re
|
|
24
25
|
|
|
25
26
|
try:
|
|
26
27
|
from fastapi import FastAPI, HTTPException, Request
|
|
@@ -39,10 +40,25 @@ except ImportError:
|
|
|
39
40
|
return None
|
|
40
41
|
|
|
41
42
|
|
|
43
|
+
def _get_version() -> str:
|
|
44
|
+
"""Get agmem version from pyproject.toml. Falls back to 0.2.1 if not found."""
|
|
45
|
+
try:
|
|
46
|
+
pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
|
|
47
|
+
if pyproject_path.exists():
|
|
48
|
+
content = pyproject_path.read_text()
|
|
49
|
+
match = re.search(r'version\s*=\s*"([^"]+)"', content)
|
|
50
|
+
if match:
|
|
51
|
+
return match.group(1)
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
return "0.2.1"
|
|
55
|
+
|
|
56
|
+
|
|
42
57
|
# Storage: In-memory for simplicity (use Redis/PostgreSQL for production)
|
|
43
58
|
summaries_store: Dict[str, List[Dict[str, Any]]] = {}
|
|
59
|
+
_version = _get_version()
|
|
44
60
|
metadata_store: Dict[str, Any] = {
|
|
45
|
-
"coordinator_version":
|
|
61
|
+
"coordinator_version": _version,
|
|
46
62
|
"started_at": datetime.now(timezone.utc).isoformat(),
|
|
47
63
|
"total_pushes": 0,
|
|
48
64
|
"total_agents": 0,
|
|
@@ -79,7 +95,7 @@ if FASTAPI_AVAILABLE:
|
|
|
79
95
|
app = FastAPI(
|
|
80
96
|
title="agmem Federated Coordinator",
|
|
81
97
|
description="Minimal coordinator for federated agent memory collaboration",
|
|
82
|
-
version=
|
|
98
|
+
version=_version,
|
|
83
99
|
)
|
|
84
100
|
|
|
85
101
|
@app.get("/")
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Delta compression metrics and observability.
|
|
3
|
+
|
|
4
|
+
Tracks compression effectiveness across object types to enable future
|
|
5
|
+
optimization and auto-tuning of delta encoding parameters.
|
|
6
|
+
|
|
7
|
+
Provides:
|
|
8
|
+
- DeltaCompressionMetrics: Tracks compression ratio, object types, benefits
|
|
9
|
+
- CompressionHeatmap: Visualizes which types compress best
|
|
10
|
+
- Statistics reporting for gc --repack operations
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Dict, List, Any, Optional, Tuple
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ObjectCompressionStats:
|
|
20
|
+
"""Statistics for a single object's compression."""
|
|
21
|
+
|
|
22
|
+
object_id: str
|
|
23
|
+
object_type: str # "semantic", "episodic", "procedural"
|
|
24
|
+
original_size: int # bytes
|
|
25
|
+
compressed_size: int # bytes after delta encoding
|
|
26
|
+
compression_ratio: float # compressed_size / original_size (0.0 = 100% compression)
|
|
27
|
+
delta_used: bool # Whether delta encoding was applied
|
|
28
|
+
compression_benefit: float # original_size - compressed_size
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class TypeCompressionStats:
|
|
33
|
+
"""Aggregated statistics for an object type."""
|
|
34
|
+
|
|
35
|
+
object_type: str
|
|
36
|
+
count: int = 0
|
|
37
|
+
total_original_size: int = 0
|
|
38
|
+
total_compressed_size: int = 0
|
|
39
|
+
avg_compression_ratio: float = 0.0
|
|
40
|
+
total_benefit: int = 0 # Total bytes saved
|
|
41
|
+
objects_with_delta: int = 0 # How many used delta encoding
|
|
42
|
+
min_ratio: float = 1.0
|
|
43
|
+
max_ratio: float = 0.0
|
|
44
|
+
|
|
45
|
+
def update_from_object(self, obj_stats: ObjectCompressionStats) -> None:
|
|
46
|
+
"""Update type stats with a single object's stats."""
|
|
47
|
+
self.count += 1
|
|
48
|
+
self.total_original_size += obj_stats.original_size
|
|
49
|
+
self.total_compressed_size += obj_stats.compressed_size
|
|
50
|
+
self.total_benefit += int(obj_stats.compression_benefit)
|
|
51
|
+
if obj_stats.delta_used:
|
|
52
|
+
self.objects_with_delta += 1
|
|
53
|
+
self.min_ratio = min(self.min_ratio, obj_stats.compression_ratio)
|
|
54
|
+
self.max_ratio = max(self.max_ratio, obj_stats.compression_ratio)
|
|
55
|
+
|
|
56
|
+
# Recalculate average
|
|
57
|
+
if self.total_original_size > 0:
|
|
58
|
+
self.avg_compression_ratio = self.total_compressed_size / self.total_original_size
|
|
59
|
+
|
|
60
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
61
|
+
"""Convert to dict for reporting."""
|
|
62
|
+
savings_pct = 0.0
|
|
63
|
+
if self.total_original_size > 0:
|
|
64
|
+
savings_pct = (self.total_benefit / self.total_original_size) * 100
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
"object_type": self.object_type,
|
|
68
|
+
"count": self.count,
|
|
69
|
+
"total_original_bytes": self.total_original_size,
|
|
70
|
+
"total_compressed_bytes": self.total_compressed_size,
|
|
71
|
+
"avg_compression_ratio": round(self.avg_compression_ratio, 3),
|
|
72
|
+
"compression_range": f"{self.min_ratio:.1%} - {self.max_ratio:.1%}",
|
|
73
|
+
"total_bytes_saved": self.total_benefit,
|
|
74
|
+
"savings_percentage": round(savings_pct, 1),
|
|
75
|
+
"objects_using_delta": self.objects_with_delta,
|
|
76
|
+
"delta_adoption_rate": (
|
|
77
|
+
round((self.objects_with_delta / self.count * 100), 1) if self.count > 0 else 0
|
|
78
|
+
),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class DeltaCompressionMetrics:
|
|
83
|
+
"""Tracks delta compression statistics across all objects.
|
|
84
|
+
|
|
85
|
+
Usage:
|
|
86
|
+
metrics = DeltaCompressionMetrics()
|
|
87
|
+
# ... during packing ...
|
|
88
|
+
metrics.record_object(ObjectCompressionStats(...))
|
|
89
|
+
# ... after packing ...
|
|
90
|
+
report = metrics.get_report()
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self):
|
|
94
|
+
self.objects: List[ObjectCompressionStats] = []
|
|
95
|
+
self.type_stats: Dict[str, TypeCompressionStats] = {}
|
|
96
|
+
self.total_original_size: int = 0
|
|
97
|
+
self.total_compressed_size: int = 0
|
|
98
|
+
|
|
99
|
+
def record_object(self, obj_stats: ObjectCompressionStats) -> None:
|
|
100
|
+
"""Record compression stats for a single object."""
|
|
101
|
+
self.objects.append(obj_stats)
|
|
102
|
+
self.total_original_size += obj_stats.original_size
|
|
103
|
+
self.total_compressed_size += obj_stats.compressed_size
|
|
104
|
+
|
|
105
|
+
# Update type-specific stats
|
|
106
|
+
if obj_stats.object_type not in self.type_stats:
|
|
107
|
+
self.type_stats[obj_stats.object_type] = TypeCompressionStats(
|
|
108
|
+
object_type=obj_stats.object_type
|
|
109
|
+
)
|
|
110
|
+
self.type_stats[obj_stats.object_type].update_from_object(obj_stats)
|
|
111
|
+
|
|
112
|
+
def get_type_stats(self, object_type: str) -> Optional[TypeCompressionStats]:
|
|
113
|
+
"""Get stats for a specific object type."""
|
|
114
|
+
return self.type_stats.get(object_type)
|
|
115
|
+
|
|
116
|
+
def get_overall_ratio(self) -> float:
|
|
117
|
+
"""Get overall compression ratio across all objects."""
|
|
118
|
+
if self.total_original_size == 0:
|
|
119
|
+
return 0.0
|
|
120
|
+
return self.total_compressed_size / self.total_original_size
|
|
121
|
+
|
|
122
|
+
def get_overall_savings(self) -> int:
|
|
123
|
+
"""Get total bytes saved across all objects."""
|
|
124
|
+
return self.total_original_size - self.total_compressed_size
|
|
125
|
+
|
|
126
|
+
def get_report(self) -> Dict[str, Any]:
|
|
127
|
+
"""Generate a comprehensive compression report."""
|
|
128
|
+
overall_ratio = self.get_overall_ratio()
|
|
129
|
+
overall_savings = self.get_overall_savings()
|
|
130
|
+
savings_pct = (
|
|
131
|
+
(overall_savings / self.total_original_size * 100)
|
|
132
|
+
if self.total_original_size > 0
|
|
133
|
+
else 0
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
"timestamp": None, # Set by caller if needed
|
|
138
|
+
"total_objects": len(self.objects),
|
|
139
|
+
"total_original_bytes": self.total_original_size,
|
|
140
|
+
"total_compressed_bytes": self.total_compressed_size,
|
|
141
|
+
"overall_compression_ratio": round(overall_ratio, 3),
|
|
142
|
+
"total_bytes_saved": overall_savings,
|
|
143
|
+
"compression_percentage": round(savings_pct, 1),
|
|
144
|
+
"type_statistics": {otype: stats.to_dict() for otype, stats in self.type_stats.items()},
|
|
145
|
+
"recommendations": self._generate_recommendations(),
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
def _generate_recommendations(self) -> List[str]:
|
|
149
|
+
"""Generate optimization recommendations based on compression stats."""
|
|
150
|
+
recommendations = []
|
|
151
|
+
|
|
152
|
+
# Check if delta encoding is worth it
|
|
153
|
+
objects_with_delta = sum(s.objects_with_delta for s in self.type_stats.values())
|
|
154
|
+
if objects_with_delta == 0:
|
|
155
|
+
recommendations.append("No objects used delta encoding. Check similarity thresholds.")
|
|
156
|
+
|
|
157
|
+
# Check for types with poor compression
|
|
158
|
+
for otype, stats in self.type_stats.items():
|
|
159
|
+
if stats.count > 0 and stats.avg_compression_ratio > 0.9:
|
|
160
|
+
recommendations.append(
|
|
161
|
+
f"Type '{otype}' compresses poorly (ratio: {stats.avg_compression_ratio:.1%}). "
|
|
162
|
+
f"Consider increasing similarity threshold or reducing delta cost."
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Check for types with excellent compression
|
|
166
|
+
for otype, stats in self.type_stats.items():
|
|
167
|
+
if stats.count > 0 and stats.avg_compression_ratio < 0.5:
|
|
168
|
+
recommendations.append(
|
|
169
|
+
f"Type '{otype}' compresses very well (ratio: {stats.avg_compression_ratio:.1%}). "
|
|
170
|
+
f"Consider aggressive delta encoding or reduced threshold."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if not recommendations:
|
|
174
|
+
recommendations.append("Compression is operating normally.")
|
|
175
|
+
|
|
176
|
+
return recommendations
|
|
177
|
+
|
|
178
|
+
def get_heatmap(self) -> str:
|
|
179
|
+
"""Generate a text-based compression heatmap."""
|
|
180
|
+
lines = ["Delta Compression Heatmap", "=" * 50]
|
|
181
|
+
|
|
182
|
+
if not self.type_stats:
|
|
183
|
+
lines.append("No compression data available")
|
|
184
|
+
return "\n".join(lines)
|
|
185
|
+
|
|
186
|
+
# Sort by compression ratio
|
|
187
|
+
sorted_types = sorted(
|
|
188
|
+
self.type_stats.values(),
|
|
189
|
+
key=lambda s: s.avg_compression_ratio,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
for stats in sorted_types:
|
|
193
|
+
if stats.count == 0:
|
|
194
|
+
continue
|
|
195
|
+
ratio = stats.avg_compression_ratio
|
|
196
|
+
# Create a simple bar chart
|
|
197
|
+
bar_width = 30
|
|
198
|
+
filled = int(bar_width * ratio)
|
|
199
|
+
bar = "█" * filled + "░" * (bar_width - filled)
|
|
200
|
+
saved_pct = (
|
|
201
|
+
(stats.total_benefit / stats.total_original_size * 100)
|
|
202
|
+
if stats.total_original_size > 0
|
|
203
|
+
else 0
|
|
204
|
+
)
|
|
205
|
+
lines.append(
|
|
206
|
+
f"{stats.object_type:12} {bar} {saved_pct:5.1f}% saved ({stats.objects_with_delta}/{stats.count} using delta)"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
return "\n".join(lines)
|
|
210
|
+
|
|
211
|
+
def log_report(self, logger: Any = None) -> None:
|
|
212
|
+
"""Log the compression report."""
|
|
213
|
+
report = self.get_report()
|
|
214
|
+
heatmap = self.get_heatmap()
|
|
215
|
+
|
|
216
|
+
output = [
|
|
217
|
+
"=" * 70,
|
|
218
|
+
"Delta Compression Report",
|
|
219
|
+
"=" * 70,
|
|
220
|
+
f"Total Objects: {report['total_objects']}",
|
|
221
|
+
f"Total Original: {report['total_original_bytes']:,} bytes",
|
|
222
|
+
f"Total Compressed: {report['total_compressed_bytes']:,} bytes",
|
|
223
|
+
f"Overall Ratio: {report['overall_compression_ratio']:.1%}",
|
|
224
|
+
f"Bytes Saved: {report['total_bytes_saved']:,} ({report['compression_percentage']:.1f}%)",
|
|
225
|
+
"",
|
|
226
|
+
heatmap,
|
|
227
|
+
"",
|
|
228
|
+
"Type Breakdown:",
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
for otype, stats in sorted(report["type_statistics"].items()):
|
|
232
|
+
output.append(f" {otype}:")
|
|
233
|
+
output.append(f" Count: {stats['count']}")
|
|
234
|
+
output.append(f" Compression: {stats['avg_compression_ratio']:.1%}")
|
|
235
|
+
output.append(f" Saved: {stats['total_bytes_saved']:,} bytes")
|
|
236
|
+
output.append(f" Delta adoption: {stats['delta_adoption_rate']:.0f}%")
|
|
237
|
+
|
|
238
|
+
output.extend(["", "Recommendations:"])
|
|
239
|
+
for rec in report["recommendations"]:
|
|
240
|
+
output.append(f" - {rec}")
|
|
241
|
+
|
|
242
|
+
output.append("=" * 70)
|
|
243
|
+
|
|
244
|
+
full_output = "\n".join(output)
|
|
245
|
+
if logger:
|
|
246
|
+
logger.info(full_output)
|
|
247
|
+
else:
|
|
248
|
+
print(full_output)
|
|
@@ -211,7 +211,6 @@ class Distiller:
|
|
|
211
211
|
# Sample facts with noise - prevents any single episode from dominating
|
|
212
212
|
import random
|
|
213
213
|
|
|
214
|
-
random.seed(42) # Deterministic but different per cluster due to content
|
|
215
214
|
sampled = random.sample(facts, min(noisy_count, len(facts)))
|
|
216
215
|
|
|
217
216
|
# Optional: Add slight noise to fact embeddings if vector store available
|
|
@@ -233,17 +232,9 @@ class Distiller:
|
|
|
233
232
|
out_path = self.target_dir / f"consolidated-{ts}.md"
|
|
234
233
|
|
|
235
234
|
confidence_score = self.config.extraction_confidence_threshold
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
and self.config.dp_delta is not None
|
|
240
|
-
):
|
|
241
|
-
from .privacy_budget import add_noise
|
|
242
|
-
|
|
243
|
-
confidence_score = add_noise(
|
|
244
|
-
confidence_score, 0.1, self.config.dp_epsilon, self.config.dp_delta
|
|
245
|
-
)
|
|
246
|
-
confidence_score = max(0.0, min(1.0, confidence_score))
|
|
235
|
+
# Metadata noise removed: confidence_score is a metadata field (threshold setting),
|
|
236
|
+
# not an individual fact. Adding noise to metadata doesn't provide meaningful
|
|
237
|
+
# privacy guarantees. See privacy_validator.py for the distinction.
|
|
247
238
|
frontmatter = {
|
|
248
239
|
"schema_version": "1.0",
|
|
249
240
|
"last_updated": datetime.utcnow().isoformat() + "Z",
|