memorymaster 3.5.1__tar.gz → 3.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. {memorymaster-3.5.1/memorymaster.egg-info → memorymaster-3.6.0}/PKG-INFO +1 -1
  2. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-steward-cycle.py +15 -4
  3. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/entity_extractor.py +37 -9
  4. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/llm_provider.py +52 -9
  5. {memorymaster-3.5.1 → memorymaster-3.6.0/memorymaster.egg-info}/PKG-INFO +1 -1
  6. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster.egg-info/SOURCES.txt +5 -0
  7. {memorymaster-3.5.1 → memorymaster-3.6.0}/pyproject.toml +1 -1
  8. memorymaster-3.6.0/scripts/grid_recall_weights.py +191 -0
  9. memorymaster-3.6.0/scripts/label_prompts_with_judge.py +211 -0
  10. memorymaster-3.6.0/scripts/precompute_candidates.py +94 -0
  11. memorymaster-3.6.0/tests/test_hook_env_isolation.py +79 -0
  12. memorymaster-3.6.0/tests/test_llm_provider_claude_cli.py +186 -0
  13. {memorymaster-3.5.1 → memorymaster-3.6.0}/LICENSE +0 -0
  14. {memorymaster-3.5.1 → memorymaster-3.6.0}/README.md +0 -0
  15. {memorymaster-3.5.1 → memorymaster-3.6.0}/artifacts/bm25-per-field-eval-harness.py +0 -0
  16. {memorymaster-3.5.1 → memorymaster-3.6.0}/artifacts/l2-haiku-batches/extract.py +0 -0
  17. {memorymaster-3.5.1 → memorymaster-3.6.0}/benchmarks/longmemeval_runner.py +0 -0
  18. {memorymaster-3.5.1 → memorymaster-3.6.0}/benchmarks/longmemeval_vector_runner.py +0 -0
  19. {memorymaster-3.5.1 → memorymaster-3.6.0}/benchmarks/perf_smoke.py +0 -0
  20. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/examples/run_conversation_opd.py +0 -0
  21. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/examples/run_conversation_replay.py +0 -0
  22. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/examples/run_conversation_rl.py +0 -0
  23. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/__init__.py +0 -0
  24. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/__main__.py +0 -0
  25. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/api_server.py +0 -0
  26. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/bedrock_client.py +0 -0
  27. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/calendar_client.py +0 -0
  28. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/claw_adapter.py +0 -0
  29. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/cli.py +0 -0
  30. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/config.py +0 -0
  31. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/config_store.py +0 -0
  32. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/data_formatter.py +0 -0
  33. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/idle_detector.py +0 -0
  34. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/launcher.py +0 -0
  35. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/log_color.py +0 -0
  36. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/openclaw_env_rollout.py +0 -0
  37. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/prm_scorer.py +0 -0
  38. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/rollout.py +0 -0
  39. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/runtime_state.py +0 -0
  40. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/scheduler.py +0 -0
  41. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/sdk_backend.py +0 -0
  42. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/setup_wizard.py +0 -0
  43. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/skill_evolver.py +0 -0
  44. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/skill_manager.py +0 -0
  45. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/trainer.py +0 -0
  46. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/metaclaw/utils.py +0 -0
  47. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/scripts/run_v03_benchmark.py +0 -0
  48. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/conftest.py +0 -0
  49. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_cli.py +0 -0
  50. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_launcher.py +0 -0
  51. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_openclaw_env_rollout.py +0 -0
  52. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_runtime_state.py +0 -0
  53. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_sdk_backend.py +0 -0
  54. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_setup_wizard.py +0 -0
  55. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_utils.py +0 -0
  56. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/MetaClaw/tests/test_v03_live_tinker.py +0 -0
  57. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/agent-skill-creator/references/examples/stock-analyzer/scripts/main.py +0 -0
  58. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/agent-skill-creator/scripts/export_utils.py +0 -0
  59. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/agent-skill-creator/scripts/security_scan.py +0 -0
  60. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/agent-skill-creator/scripts/skill_registry.py +0 -0
  61. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/agent-skill-creator/scripts/staleness_check.py +0 -0
  62. {memorymaster-3.5.1 → memorymaster-3.6.0}/cloned/agent-skill-creator/scripts/validate.py +0 -0
  63. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/__init__.py +0 -0
  64. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/__main__.py +0 -0
  65. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/_storage_lifecycle.py +0 -0
  66. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/_storage_read.py +0 -0
  67. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/_storage_schema.py +0 -0
  68. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/_storage_shared.py +0 -0
  69. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/_storage_write_claims.py +0 -0
  70. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/access_control.py +0 -0
  71. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/auto_extractor.py +0 -0
  72. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/auto_resolver.py +0 -0
  73. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/claim_verifier.py +0 -0
  74. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/cli.py +0 -0
  75. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/cli_handlers_basic.py +0 -0
  76. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/cli_handlers_curation.py +0 -0
  77. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/cli_helpers.py +0 -0
  78. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config.py +0 -0
  79. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/claude-md-append.md +0 -0
  80. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/codex-agents-md-append.md +0 -0
  81. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-auto-ingest.py +0 -0
  82. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-classify.py +0 -0
  83. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-dream-sync.py +0 -0
  84. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-observe.py +0 -0
  85. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-precompact.py +0 -0
  86. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-recall.py +0 -0
  87. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-session-start.py +0 -0
  88. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/config_templates/hooks/memorymaster-validate-wiki.py +0 -0
  89. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/conflict_resolver.py +0 -0
  90. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/context_hook.py +0 -0
  91. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/context_optimizer.py +0 -0
  92. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/daily_notes.py +0 -0
  93. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/dashboard.py +0 -0
  94. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/db_merge.py +0 -0
  95. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/dream_bridge.py +0 -0
  96. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/embeddings.py +0 -0
  97. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/entity_graph.py +0 -0
  98. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/entity_registry.py +0 -0
  99. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/feedback.py +0 -0
  100. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/graph_store.py +0 -0
  101. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/hook_log.py +0 -0
  102. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/__init__.py +0 -0
  103. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/compact_summaries.py +0 -0
  104. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/compactor.py +0 -0
  105. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/decay.py +0 -0
  106. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/dedup.py +0 -0
  107. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/deterministic.py +0 -0
  108. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/extractor.py +0 -0
  109. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/staleness.py +0 -0
  110. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/jobs/validator.py +0 -0
  111. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/key_rotator.py +0 -0
  112. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/lifecycle.py +0 -0
  113. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/llm_steward.py +0 -0
  114. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/mcp_server.py +0 -0
  115. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/metrics_exporter.py +0 -0
  116. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/models.py +0 -0
  117. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/operator.py +0 -0
  118. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/operator_queue.py +0 -0
  119. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/plugins.py +0 -0
  120. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/policy.py +0 -0
  121. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/postgres_store.py +0 -0
  122. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/qdrant_backend.py +0 -0
  123. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/qdrant_recall_fallback.py +0 -0
  124. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/qmd_bridge.py +0 -0
  125. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/query_classifier.py +0 -0
  126. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/query_expansion.py +0 -0
  127. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/recall_fusion.py +0 -0
  128. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/recall_tokenizer.py +0 -0
  129. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/retrieval.py +0 -0
  130. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/retry.py +0 -0
  131. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/review.py +0 -0
  132. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/rl_trainer.py +0 -0
  133. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/scheduler.py +0 -0
  134. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/schema.py +0 -0
  135. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/schema.sql +0 -0
  136. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/schema_postgres.sql +0 -0
  137. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/security.py +0 -0
  138. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/service.py +0 -0
  139. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/session_tracker.py +0 -0
  140. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/setup_hooks.py +0 -0
  141. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/skill_evolver.py +0 -0
  142. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/snapshot.py +0 -0
  143. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/steward.py +0 -0
  144. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/steward_classifier.py +0 -0
  145. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/steward_features.py +0 -0
  146. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/storage.py +0 -0
  147. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/store_factory.py +0 -0
  148. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/transcript_miner.py +0 -0
  149. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/turn_schema.py +0 -0
  150. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/vault_bases.py +0 -0
  151. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/vault_curator.py +0 -0
  152. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/vault_exporter.py +0 -0
  153. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/vault_linter.py +0 -0
  154. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/vault_log.py +0 -0
  155. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/vault_query_capture.py +0 -0
  156. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/vault_synthesis.py +0 -0
  157. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/verbatim_recall.py +0 -0
  158. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/verbatim_store.py +0 -0
  159. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/webhook.py +0 -0
  160. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/wiki_engine.py +0 -0
  161. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/wiki_freshness.py +0 -0
  162. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster/wiki_similarity.py +0 -0
  163. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster.egg-info/dependency_links.txt +0 -0
  164. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster.egg-info/entry_points.txt +0 -0
  165. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster.egg-info/requires.txt +0 -0
  166. {memorymaster-3.5.1 → memorymaster-3.6.0}/memorymaster.egg-info/top_level.txt +0 -0
  167. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/agg_recall_latency.py +0 -0
  168. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/alert_operator_metrics.py +0 -0
  169. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/autoresearch_daemon.py +0 -0
  170. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/backfill_entity_extraction.py +0 -0
  171. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/backfill_graph_store.py +0 -0
  172. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/backfill_stop_hook_citations.py +0 -0
  173. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/backtest_steward_classifier.py +0 -0
  174. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/build_steward_training_set.py +0 -0
  175. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/check_hook_template_drift.py +0 -0
  176. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/claude_to_turns.py +0 -0
  177. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/codex_live_to_turns.py +0 -0
  178. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/compaction_edge_cases.py +0 -0
  179. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/compaction_trace_report.py +0 -0
  180. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/compaction_trace_validate.py +0 -0
  181. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/confusion_matrix_eval.py +0 -0
  182. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/conversation_importer.py +0 -0
  183. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/conversation_to_turns.py +0 -0
  184. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/e2e_operator.py +0 -0
  185. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/email_live_to_turns.py +0 -0
  186. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/eval_bm25_sweep.py +0 -0
  187. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/eval_classify_f1.py +0 -0
  188. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/eval_memorymaster.py +0 -0
  189. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/eval_recall_precision_at_5.py +0 -0
  190. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/eval_recall_quality.py +0 -0
  191. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/eval_steward_pareto.py +0 -0
  192. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/eval_verbatim_recall.py +0 -0
  193. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/expand_recall_eval.py +0 -0
  194. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/generate_drill_signoff.py +0 -0
  195. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/git_to_turns.py +0 -0
  196. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/github_live_to_turns.py +0 -0
  197. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/gitnexus_to_claims.py +0 -0
  198. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/index_claims_to_qdrant.py +0 -0
  199. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/ingest_planning_docs.py +0 -0
  200. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/jira_live_to_turns.py +0 -0
  201. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/llm_benchmark.py +0 -0
  202. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/merge_scope_variants.py +0 -0
  203. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/messages_to_turns.py +0 -0
  204. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/operator_metrics.py +0 -0
  205. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/recurring_incident_drill.py +0 -0
  206. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/release_readiness.py +0 -0
  207. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/run_codex_autologger.py +0 -0
  208. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/run_incident_drill.py +0 -0
  209. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/run_longmemeval.py +0 -0
  210. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/scheduled_ingest.py +0 -0
  211. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/setup-hooks.py +0 -0
  212. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/slack_live_to_turns.py +0 -0
  213. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/sync_hook_templates.py +0 -0
  214. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/tickets_to_turns.py +0 -0
  215. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/train_steward_classifier.py +0 -0
  216. {memorymaster-3.5.1 → memorymaster-3.6.0}/scripts/webhook_to_turns.py +0 -0
  217. {memorymaster-3.5.1 → memorymaster-3.6.0}/setup.cfg +0 -0
  218. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/conftest.py +0 -0
  219. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/integration/test_extract_llm_ollama_live.py +0 -0
  220. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_access_control.py +0 -0
  221. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_auto_extractor.py +0 -0
  222. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_auto_ingest_hook_citations.py +0 -0
  223. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_auto_ingest_hook_schema.py +0 -0
  224. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_auto_resolver.py +0 -0
  225. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_auto_validate.py +0 -0
  226. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_bm25_per_field.py +0 -0
  227. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_claim_links.py +0 -0
  228. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_classify_hook_f1.py +0 -0
  229. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_classify_hook_latency.py +0 -0
  230. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_claude_to_turns.py +0 -0
  231. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_cli_json_flag.py +0 -0
  232. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_cli_ready.py +0 -0
  233. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_cli_review_queue.py +0 -0
  234. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_cli_subcommands.py +0 -0
  235. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_compact_summaries.py +0 -0
  236. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_compaction_trace.py +0 -0
  237. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_config.py +0 -0
  238. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_conflict_resolver.py +0 -0
  239. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_confusion_matrix_eval.py +0 -0
  240. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_connection_retry.py +0 -0
  241. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_connectors.py +0 -0
  242. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_context_hook.py +0 -0
  243. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_context_optimizer.py +0 -0
  244. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_conversation_to_turns.py +0 -0
  245. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_dashboard.py +0 -0
  246. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_dedup.py +0 -0
  247. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_deterministic_predicates.py +0 -0
  248. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_embeddings_coverage.py +0 -0
  249. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_entity_extractor.py +0 -0
  250. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_entity_extractor_llm.py +0 -0
  251. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_entity_graph.py +0 -0
  252. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_entity_new_kinds.py +0 -0
  253. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_entity_registry.py +0 -0
  254. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_eval_harness.py +0 -0
  255. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_events_schema.py +0 -0
  256. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_extract_llm_ollama.py +0 -0
  257. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_feedback.py +0 -0
  258. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_fts5_search.py +0 -0
  259. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_graph_distance.py +0 -0
  260. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_graph_store.py +0 -0
  261. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_handler_regressions.py +0 -0
  262. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_human_id.py +0 -0
  263. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_incident_drill_runner.py +0 -0
  264. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_integration_workflows.py +0 -0
  265. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_key_rotator.py +0 -0
  266. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_lifecycle.py +0 -0
  267. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_llm_fallback.py +0 -0
  268. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_llm_steward_coverage.py +0 -0
  269. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_llm_steward_key_rotation.py +0 -0
  270. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_mcp_helpers.py +0 -0
  271. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_metrics_exporter.py +0 -0
  272. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_obsidian_mind_patterns.py +0 -0
  273. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_operator.py +0 -0
  274. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_operator_queue.py +0 -0
  275. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_perf_smoke_config.py +0 -0
  276. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_plugins.py +0 -0
  277. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_policy_coverage.py +0 -0
  278. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_policy_mode_env.py +0 -0
  279. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_postgres_parity.py +0 -0
  280. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_qdrant_backend.py +0 -0
  281. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_qmd_bridge.py +0 -0
  282. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_query_classifier.py +0 -0
  283. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_query_expansion.py +0 -0
  284. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_recall_entity_fanout.py +0 -0
  285. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_recall_fusion.py +0 -0
  286. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_recall_latency.py +0 -0
  287. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_recall_precision_at_5.py +0 -0
  288. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_recall_tokenizer.py +0 -0
  289. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_recall_vector_fallback.py +0 -0
  290. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_reliability_hardening.py +0 -0
  291. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_review.py +0 -0
  292. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_rl_trainer.py +0 -0
  293. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_rrf_auto_gate.py +0 -0
  294. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_scheduler.py +0 -0
  295. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_schema.py +0 -0
  296. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_scope_boost.py +0 -0
  297. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_security_access.py +0 -0
  298. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_security_patterns.py +0 -0
  299. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_sensitivity_filter_adversarial.py +0 -0
  300. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_sensitivity_filter_adversarial_v2.py +0 -0
  301. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_service_coverage.py +0 -0
  302. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_session_tracker.py +0 -0
  303. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_snapshot.py +0 -0
  304. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_sqlite_core.py +0 -0
  305. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_staleness.py +0 -0
  306. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_stealth_mode.py +0 -0
  307. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_steward.py +0 -0
  308. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_steward_classifier.py +0 -0
  309. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_steward_features.py +0 -0
  310. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_steward_features_v3.py +0 -0
  311. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_steward_resolution_parity.py +0 -0
  312. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_store_factory.py +0 -0
  313. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_tenant_isolation.py +0 -0
  314. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_turn_schema.py +0 -0
  315. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_vault_exporter.py +0 -0
  316. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_vector_search.py +0 -0
  317. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_verbatim_recall.py +0 -0
  318. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_webhook.py +0 -0
  319. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_wiki_binding.py +0 -0
  320. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_wiki_freshness.py +0 -0
  321. {memorymaster-3.5.1 → memorymaster-3.6.0}/tests/test_wiki_similarity_multiscope.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: memorymaster
3
- Version: 3.5.1
3
+ Version: 3.6.0
4
4
  Summary: Production-grade memory reliability system for AI coding agents. Lifecycle-managed claims with citations, conflict detection, steward governance, and MCP integration.
5
5
  Author: wolverin0
6
6
  License: MIT
@@ -6,6 +6,19 @@ DB_PATH = os.path.join(PROJECT_ROOT, "memorymaster.db")
6
6
 
7
7
  sys.path.insert(0, PROJECT_ROOT)
8
8
  os.environ["MEMORYMASTER_DEFAULT_DB"] = DB_PATH
9
+
10
+ # LLM stack: claude_cli (Claude Code OAuth via local `claude --print`) is the
11
+ # primary, with Ollama gemma4:e4b as a defensive fallback. Direct assignment
12
+ # (NOT setdefault) — the hook MUST own these vars so an inherited shell env
13
+ # can't silently route LLM calls to a stale provider. Bug observed 2026-04-25:
14
+ # setdefault was a no-op when the inherited env already had MEMORYMASTER_LLM_PROVIDER
15
+ # set, so the new model name routed to the OLD provider → 50× HTTP 404 per cycle
16
+ # before the fallback chain saved it. Captured as v3.5.0 release notes.
17
+ os.environ["MEMORYMASTER_LLM_PROVIDER"] = "claude_cli"
18
+ os.environ["MEMORYMASTER_LLM_MODEL"] = "claude-haiku-4-5-20251001"
19
+ os.environ["MEMORYMASTER_LLM_FALLBACK_PROVIDER"] = "ollama"
20
+ os.environ["MEMORYMASTER_LLM_FALLBACK_MODEL"] = "gemma4:e4b"
21
+
9
22
  os.chdir(PROJECT_ROOT)
10
23
 
11
24
  try:
@@ -36,11 +49,9 @@ try:
36
49
  except Exception as e:
37
50
  print(f"[MemoryMaster] auto-archive error: {e}", file=sys.stderr)
38
51
 
39
- # Wiki absorb (compiled truth + timeline articles)
52
+ # Wiki absorb (compiled truth + timeline articles). Inherits the LLM provider
53
+ # block above — uses the same OAuth-backed haiku stack as the steward.
40
54
  try:
41
- # Keys come from the rotator file (~/.memorymaster/gemini-keys.env) or a
42
- # singular GEMINI_API_KEY env var. Hook must never hardcode credentials.
43
- os.environ.setdefault("MEMORYMASTER_LLM_PROVIDER", "google")
44
55
  from memorymaster.wiki_engine import absorb
45
56
  wiki_path = os.path.join(PROJECT_ROOT, "obsidian-vault", "wiki")
46
57
  stats = absorb(DB_PATH, wiki_path)
@@ -496,7 +496,7 @@ def extract_patterns(text: str) -> list[Entity]:
496
496
 
497
497
  # Version identifier baked into the prompt. Bump this string when the prompt
498
498
  # changes so that downstream idempotency / caching keys invalidate cleanly.
499
- LLM_PROMPT_VERSION = "entity-l2-v2-2026-04-25"
499
+ LLM_PROMPT_VERSION = "entity-l2-v3-2026-04-27"
500
500
 
501
501
  # Permitted entity kinds for Layer-2. Any `kind` returned by the LLM that is
502
502
  # not in this set is dropped to keep the registry schema predictable.
@@ -513,22 +513,50 @@ LLM_KINDS: frozenset[str] = frozenset(
513
513
 
514
514
  _LLM_ENV_FLAG = "MEMORYMASTER_ENTITY_LLM"
515
515
  _LLM_MAX_TEXT_CHARS = 4000 # Truncate long claims before sending to LLM.
516
- _LLM_MAX_ENTITIES = 8 # Hard cap to keep cost bounded per claim.
516
+ _LLM_MAX_ENTITIES = 5 # v3: tightened from 8 to 5 overgeneration was the
517
+ # dominant failure mode in v2 backfill batches.
517
518
 
518
- _LLM_PROMPT = f"""Extract entities from the snippet that regex cannot catch.
519
+ _LLM_PROMPT = f"""Extract HIGH-VALUE entities from the snippet only ones a future agent would search for by name. Be conservative.
519
520
  Prompt version: {LLM_PROMPT_VERSION}
520
521
 
521
- Allowed kinds: person_name, spanish_surname, time_expression, model_name, library_name, concept.
522
- Skip: file paths, env-vars, hostnames, ports, commit SHAs, tool names.
523
- Max {_LLM_MAX_ENTITIES} entities. Output STRICT JSON ARRAY only no prose, no code fence.
522
+ ALLOWED kinds (return ONE per entity): person_name, spanish_surname, time_expression, model_name, library_name, concept.
523
+
524
+ WHEN IN DOUBT, SKIP. A future agent searching memory for this claim should be searching by the entity name itself, not by a generic word.
525
+
526
+ ALWAYS SKIP:
527
+ - File paths, directories, env vars, hostnames, IPs, ports
528
+ - Commit SHAs, branch names, tool names like "git", "docker", "npm", "sqlite", "psql"
529
+ - Generic English words: "system", "config", "service", "module", "function", "component", "data", "process", "task", "user"
530
+ - Generic Spanish words: "sistema", "config", "servicio", "modulo", "funcion", "componente", "datos", "proceso", "tarea", "usuario", "cosa", "caso"
531
+ - Standalone numbers, percentages, dates already in YYYY-MM-DD form
532
+ - HTML/CSS class names, JSON keys, code identifiers in snake_case or camelCase
533
+
534
+ Quality bar by kind:
535
+ - person_name: full name (≥2 capitalized words) of a real person, NOT a role like "user" or "developer"
536
+ - spanish_surname: bare surname when it stands alone WITHOUT a first name
537
+ - time_expression: relative phrases like "next Thursday", "el lunes pasado", "Q3 2026" — NOT absolute YYYY-MM-DD dates
538
+ - model_name: AI model identifier with a recognizable family prefix (gpt-, claude-, gemini-, llama-, mistral-) AND a version
539
+ - library_name: a SPECIFIC named library/framework like "FastAPI", "React", "pyafipws" — NOT "the API" or "the framework"
540
+ - concept: a named domain concept (3+ words usually) that appears as a noun-phrase a person would research, like "RRF fusion", "byzantine consensus", "writer-lock contention" — NOT generic ideas
541
+
542
+ Output STRICT JSON ARRAY only — no prose, no code fence. Max {_LLM_MAX_ENTITIES} entities. If nothing in the snippet rises to the bar, return [].
524
543
 
525
544
  Schema (use EXACT field names):
526
545
  [{{"kind": "...", "surface_form": "exact substring from text", "aliases": []}}]
527
546
 
528
- Example input: "Ada Lovelace y Charles Babbage usaron FastAPI y gpt-4o-mini."
529
- Example output: [{{"kind":"person_name","surface_form":"Ada Lovelace","aliases":[]}},{{"kind":"person_name","surface_form":"Charles Babbage","aliases":[]}},{{"kind":"library_name","surface_form":"FastAPI","aliases":[]}},{{"kind":"model_name","surface_form":"gpt-4o-mini","aliases":[]}}]
547
+ POSITIVE example
548
+ Input: "Ada Lovelace y Charles Babbage usaron FastAPI y gpt-4o-mini el lunes pasado para implementar RRF fusion."
549
+ Output: [{{"kind":"person_name","surface_form":"Ada Lovelace","aliases":[]}},{{"kind":"person_name","surface_form":"Charles Babbage","aliases":[]}},{{"kind":"library_name","surface_form":"FastAPI","aliases":[]}},{{"kind":"model_name","surface_form":"gpt-4o-mini","aliases":[]}},{{"kind":"concept","surface_form":"RRF fusion","aliases":[]}}]
550
+
551
+ NEGATIVE example (bloat to AVOID)
552
+ Input: "El sistema usa la base de datos para guardar config del usuario en el modulo principal."
553
+ Output: []
554
+ (All terms are generic — system, database, config, user, module — none worth indexing.)
530
555
 
531
- If nothing fits, return: []
556
+ NEGATIVE example (path/SHA noise)
557
+ Input: "Bug fixed in commit a133bc6 in src/auth/login.py — see logs at /var/log/app.log"
558
+ Output: []
559
+ (Commit SHA, file path, log path — all skip.)
532
560
  """.strip()
533
561
 
534
562
 
@@ -453,17 +453,60 @@ def call_llm(prompt: str, text: str) -> str:
453
453
 
454
454
 
455
455
  def parse_json_response(text: str) -> list[dict]:
456
- """Parse LLM response as JSON array, handling markdown code fences."""
456
+ """Parse LLM response as JSON array, handling markdown code fences and prose preambles.
457
+
458
+ Resilient to four common LLM output shapes:
459
+ 1. raw JSON array: ``[{...}, {...}]``
460
+ 2. fenced JSON: ``\u0060\u0060\u0060json\\n[...]\\n\u0060\u0060\u0060``
461
+ 3. prose preamble + fenced: ``Here is the answer:\\n\u0060\u0060\u0060json\\n[...]\u0060\u0060\u0060``
462
+ 4. prose preamble + raw: ``The entities are: [...]``
463
+
464
+ Strategy: try direct parse, then try fenced-strip from start, then fall back
465
+ to greedy-extracting the largest ``[...]`` block in the text.
466
+ """
457
467
  text = text.strip()
468
+ # Shape 2 — strict fenced from the very start.
458
469
  if text.startswith("```"):
459
- text = re.sub(r"^```(?:json)?\n?", "", text)
460
- text = re.sub(r"\n?```$", "", text)
470
+ stripped = re.sub(r"^```(?:json)?\n?", "", text)
471
+ stripped = re.sub(r"\n?```$", "", stripped)
472
+ try:
473
+ result = json.loads(stripped)
474
+ return _coerce_to_list(result)
475
+ except (json.JSONDecodeError, ValueError):
476
+ pass
477
+
478
+ # Shape 1 — direct parse.
461
479
  try:
462
480
  result = json.loads(text)
463
- if isinstance(result, list):
464
- return result
465
- if isinstance(result, dict):
466
- return [result]
467
- return []
481
+ return _coerce_to_list(result)
468
482
  except (json.JSONDecodeError, ValueError):
469
- return []
483
+ pass
484
+
485
+ # Shapes 3 + 4 — find the first ``\u0060\u0060\u0060json``/``\u0060\u0060\u0060`` block; if absent, the largest ``[...]``.
486
+ fenced_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
487
+ if fenced_match:
488
+ try:
489
+ result = json.loads(fenced_match.group(1).strip())
490
+ return _coerce_to_list(result)
491
+ except (json.JSONDecodeError, ValueError):
492
+ pass
493
+
494
+ # Greedy: first ``[`` to last matching ``]``. Defensive against prose with stray brackets.
495
+ first = text.find("[")
496
+ last = text.rfind("]")
497
+ if first != -1 and last > first:
498
+ try:
499
+ result = json.loads(text[first : last + 1])
500
+ return _coerce_to_list(result)
501
+ except (json.JSONDecodeError, ValueError):
502
+ pass
503
+
504
+ return []
505
+
506
+
507
+ def _coerce_to_list(result) -> list[dict]:
508
+ if isinstance(result, list):
509
+ return result
510
+ if isinstance(result, dict):
511
+ return [result]
512
+ return []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: memorymaster
3
- Version: 3.5.1
3
+ Version: 3.6.0
4
4
  Summary: Production-grade memory reliability system for AI coding agents. Lifecycle-managed claims with citations, conflict detection, steward governance, and MCP integration.
5
5
  Author: wolverin0
6
6
  License: MIT
@@ -189,13 +189,16 @@ scripts/generate_drill_signoff.py
189
189
  scripts/git_to_turns.py
190
190
  scripts/github_live_to_turns.py
191
191
  scripts/gitnexus_to_claims.py
192
+ scripts/grid_recall_weights.py
192
193
  scripts/index_claims_to_qdrant.py
193
194
  scripts/ingest_planning_docs.py
194
195
  scripts/jira_live_to_turns.py
196
+ scripts/label_prompts_with_judge.py
195
197
  scripts/llm_benchmark.py
196
198
  scripts/merge_scope_variants.py
197
199
  scripts/messages_to_turns.py
198
200
  scripts/operator_metrics.py
201
+ scripts/precompute_candidates.py
199
202
  scripts/recurring_incident_drill.py
200
203
  scripts/release_readiness.py
201
204
  scripts/run_codex_autologger.py
@@ -251,12 +254,14 @@ tests/test_fts5_search.py
251
254
  tests/test_graph_distance.py
252
255
  tests/test_graph_store.py
253
256
  tests/test_handler_regressions.py
257
+ tests/test_hook_env_isolation.py
254
258
  tests/test_human_id.py
255
259
  tests/test_incident_drill_runner.py
256
260
  tests/test_integration_workflows.py
257
261
  tests/test_key_rotator.py
258
262
  tests/test_lifecycle.py
259
263
  tests/test_llm_fallback.py
264
+ tests/test_llm_provider_claude_cli.py
260
265
  tests/test_llm_steward_coverage.py
261
266
  tests/test_llm_steward_key_rotation.py
262
267
  tests/test_mcp_helpers.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "memorymaster"
7
- version = "3.5.1"
7
+ version = "3.6.0"
8
8
  description = "Production-grade memory reliability system for AI coding agents. Lifecycle-managed claims with citations, conflict detection, steward governance, and MCP integration."
9
9
  license = {text = "MIT"}
10
10
  authors = [{name = "wolverin0"}]
@@ -0,0 +1,191 @@
1
+ """Grid-search the recall weight knobs against precision@5.
2
+
3
+ Sweeps W_LEXICAL × W_FRESHNESS × W_GRAPH against the existing 100-prompt
4
+ evaluation harness (`scripts/eval_recall_precision_at_5.py`) and writes a
5
+ sorted markdown table + raw JSONL log so a future tweak is reproducible.
6
+
7
+ W_VECTOR is skipped because the local DB has no Qdrant; the stream is a
8
+ no-op without `MEMORYMASTER_USE_QDRANT=1` and a populated index.
9
+
10
+ Usage:
11
+ python scripts/grid_recall_weights.py \
12
+ --prompts artifacts/real-prompts-100.jsonl \
13
+ --db memorymaster.db \
14
+ --output artifacts/recall-weight-tuning-2026-04-26.md
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import itertools
20
+ import json
21
+ import os
22
+ import re
23
+ import subprocess
24
+ import sys
25
+ import time
26
+ from pathlib import Path
27
+
28
+ # Modest 3 × 3 × 4 = 36 grid. Bounded by ~10s/combo wall via subprocess startup.
29
+ W_LEXICAL_GRID = (0.2, 0.3, 0.4)
30
+ W_FRESHNESS_GRID = (0.0, 0.05, 0.1)
31
+ W_GRAPH_GRID = (0.0, 0.05, 0.1, 0.2)
32
+
33
+ METRIC_RE = {
34
+ "precision@5": re.compile(r"precision@5\s*=\s*([\d.]+)"),
35
+ "MAP@5": re.compile(r"MAP@5\s*=\s*([\d.]+)"),
36
+ "hit@5": re.compile(r"hit@5\s*=\s*([\d.]+)"),
37
+ "p95_ms": re.compile(r"p95\s*=\s*([\d.]+)\s*ms"),
38
+ }
39
+
40
+
41
+ def _run_eval(
42
+ eval_script: Path,
43
+ prompts: Path,
44
+ db: Path,
45
+ weights: dict,
46
+ json_out: Path,
47
+ label: str,
48
+ ) -> dict | None:
49
+ env = os.environ.copy()
50
+ for k, v in weights.items():
51
+ env[f"MEMORYMASTER_RECALL_{k}"] = str(v)
52
+ # The GRAPH stream is opt-in: W_GRAPH alone is a no-op unless the stream
53
+ # itself is enabled. Turn it on only when the weight is non-zero — keeps
54
+ # the latency-cost cells out of the grid when they can't possibly help.
55
+ if weights.get("W_GRAPH", 0) > 0:
56
+ env["MEMORYMASTER_RECALL_GRAPH"] = "1"
57
+ # Same for the freshness stream.
58
+ if weights.get("W_FRESHNESS", 0) > 0:
59
+ env["MEMORYMASTER_RECALL_FRESHNESS"] = "1"
60
+
61
+ proc = subprocess.run(
62
+ [
63
+ sys.executable,
64
+ str(eval_script),
65
+ "--prompts",
66
+ str(prompts),
67
+ "--db",
68
+ str(db),
69
+ "--json-out",
70
+ str(json_out),
71
+ "--label",
72
+ label,
73
+ ],
74
+ capture_output=True,
75
+ text=True,
76
+ env=env,
77
+ timeout=600,
78
+ )
79
+ if proc.returncode != 0:
80
+ return {"error": proc.stderr.strip()[:200] or "non-zero exit"}
81
+
82
+ out = proc.stdout
83
+ parsed = {"label": label, **{k: v for k, v in weights.items()}}
84
+ for metric, rgx in METRIC_RE.items():
85
+ m = rgx.search(out)
86
+ parsed[metric] = float(m.group(1)) if m else None
87
+ return parsed
88
+
89
+
90
+ def main() -> int:
91
+ p = argparse.ArgumentParser(description=__doc__)
92
+ p.add_argument("--prompts", type=Path, required=True)
93
+ p.add_argument("--db", type=Path, required=True)
94
+ p.add_argument("--output", type=Path, required=True)
95
+ p.add_argument(
96
+ "--eval-script",
97
+ type=Path,
98
+ default=Path("scripts/eval_recall_precision_at_5.py"),
99
+ )
100
+ p.add_argument(
101
+ "--per-run-json-dir",
102
+ type=Path,
103
+ default=Path("artifacts/grid-runs"),
104
+ help="Per-cell raw eval JSONL dump directory.",
105
+ )
106
+ args = p.parse_args()
107
+
108
+ args.per_run_json_dir.mkdir(parents=True, exist_ok=True)
109
+
110
+ combos = list(
111
+ itertools.product(W_LEXICAL_GRID, W_FRESHNESS_GRID, W_GRAPH_GRID)
112
+ )
113
+ print(f"[grid] running {len(combos)} cells over W_LEXICAL × W_FRESHNESS × W_GRAPH")
114
+
115
+ rows: list[dict] = []
116
+ t_total = time.monotonic()
117
+ for i, (w_lex, w_fresh, w_graph) in enumerate(combos, 1):
118
+ weights = {"W_LEXICAL": w_lex, "W_FRESHNESS": w_fresh, "W_GRAPH": w_graph}
119
+ label = f"L{w_lex}_F{w_fresh}_G{w_graph}"
120
+ json_out = args.per_run_json_dir / f"{label}.jsonl"
121
+ t0 = time.monotonic()
122
+ row = _run_eval(args.eval_script, args.prompts, args.db, weights, json_out, label)
123
+ wall = time.monotonic() - t0
124
+ if row is None:
125
+ row = {"error": "no output"}
126
+ row["wall_s"] = round(wall, 1)
127
+ rows.append(row)
128
+ prec = row.get("precision@5")
129
+ prec_str = f"{prec:.3f}" if isinstance(prec, float) else "ERR"
130
+ print(f"[grid] {i}/{len(combos)} {label} wall={wall:.1f}s p@5={prec_str}")
131
+
132
+ # Pick best by precision@5 (tie-break MAP@5 desc, then p95 asc)
133
+ valid = [r for r in rows if isinstance(r.get("precision@5"), float)]
134
+ valid.sort(
135
+ key=lambda r: (
136
+ -r["precision@5"],
137
+ -(r.get("MAP@5") or 0.0),
138
+ r.get("p95_ms") or 1e9,
139
+ )
140
+ )
141
+
142
+ # Write markdown report
143
+ lines = [
144
+ "# Recall weight grid — precision@5 tuning",
145
+ "",
146
+ f"- Eval prompts: `{args.prompts}` (100, 70 labeled)",
147
+ f"- DB: `{args.db}` (post-L2-backfill snapshot)",
148
+ f"- Grid: W_LEXICAL × W_FRESHNESS × W_GRAPH = "
149
+ f"{len(W_LEXICAL_GRID)} × {len(W_FRESHNESS_GRID)} × {len(W_GRAPH_GRID)} = {len(combos)} cells",
150
+ f"- Total wall: {round(time.monotonic()-t_total, 1)}s",
151
+ "",
152
+ "## Top 10 by precision@5",
153
+ "",
154
+ "| W_LEXICAL | W_FRESHNESS | W_GRAPH | precision@5 | MAP@5 | hit@5 | p95 ms | wall s |",
155
+ "|---|---|---|---|---|---|---|---|",
156
+ ]
157
+ for r in valid[:10]:
158
+ lines.append(
159
+ f"| {r['W_LEXICAL']} | {r['W_FRESHNESS']} | {r['W_GRAPH']} "
160
+ f"| {r['precision@5']:.3f} | {r.get('MAP@5'):.3f} | {r.get('hit@5'):.3f} "
161
+ f"| {r.get('p95_ms')} | {r['wall_s']} |"
162
+ )
163
+ if not valid:
164
+ lines.append("| — | — | — | NO VALID RUNS | | | | |")
165
+ else:
166
+ winner = valid[0]
167
+ lines += [
168
+ "",
169
+ "## Winner",
170
+ "",
171
+ f"`MEMORYMASTER_RECALL_W_LEXICAL={winner['W_LEXICAL']}` "
172
+ f"`MEMORYMASTER_RECALL_W_FRESHNESS={winner['W_FRESHNESS']}` "
173
+ f"`MEMORYMASTER_RECALL_W_GRAPH={winner['W_GRAPH']}`",
174
+ "",
175
+ f"precision@5 = **{winner['precision@5']:.3f}** "
176
+ f"(baseline 0.152, delta = {(winner['precision@5'] - 0.152):+.3f})",
177
+ ]
178
+
179
+ args.output.write_text("\n".join(lines) + "\n", encoding="utf-8")
180
+
181
+ # Also dump rows as JSON for downstream automation
182
+ json_path = args.output.with_suffix(".json")
183
+ json_path.write_text(json.dumps(rows, indent=2), encoding="utf-8")
184
+
185
+ print(f"[grid] wrote {args.output}")
186
+ print(f"[grid] wrote {json_path}")
187
+ return 0
188
+
189
+
190
+ if __name__ == "__main__":
191
+ sys.exit(main())
@@ -0,0 +1,211 @@
1
+ """LLM-judge: label which retrieved claims actually answer each synthetic prompt.
2
+
3
+ For each prompt in the input JSONL:
4
+ 1. Run the production recall hook to get the top-K (default 20) candidate claims.
5
+ 2. Send (prompt + candidate snippets) to a haiku judge.
6
+ 3. Judge returns the subset of claim IDs that genuinely answer the prompt.
7
+ 4. Write {sha1_16(prompt): [claim_ids]} into the labels JSON.
8
+
9
+ Usage:
10
+ python scripts/label_prompts_with_judge.py \
11
+ --prompts artifacts/real-prompts-1000.jsonl \
12
+ --db memorymaster.db \
13
+ --labels-out artifacts/real-prompts-1000-labels.json \
14
+ --top-k 20 \
15
+ --max-prompts 1000
16
+
17
+ The output is consumed by scripts/eval_recall_precision_at_5.py via the
18
+ ``<prompts>-labels.json`` convention.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import hashlib
24
+ import json
25
+ import os
26
+ import sys
27
+ import time
28
+ from pathlib import Path
29
+
30
+
31
+ def _sha1_16(text: str) -> str:
32
+ return hashlib.sha1(text.encode("utf-8")).hexdigest()[:16]
33
+
34
+
35
+ def _judge_prompt(prompt: str, candidates: list[dict]) -> str:
36
+ candidate_lines = "\n".join(
37
+ f"[{c['id']}] {c['text'][:300]}" for c in candidates
38
+ )
39
+ return f"""You are a relevance judge. Given a USER QUERY and a list of CANDIDATE memory claims, return the subset of claim IDs that genuinely answer the query.
40
+
41
+ USER QUERY: {prompt}
42
+
43
+ CANDIDATES (id and snippet):
44
+ {candidate_lines}
45
+
46
+ Rules:
47
+ - Return ONLY claim IDs that DIRECTLY answer the query (not tangentially related).
48
+ - An empty list is a valid answer if no candidate genuinely answers.
49
+ - Return JSON ARRAY ONLY of integer IDs, no prose, no fence. Example: [123, 456]
50
+ - Be strict — pick at most 5, prefer 0-3 high-quality matches over many weak ones."""
51
+
52
+
53
+ def _get_candidates(db_path: str, prompt: str, top_k: int) -> list[dict]:
54
+ """Run production recall via context_hook and return top-K candidates."""
55
+ # Use the same return_ids=True path as the eval harness.
56
+ from memorymaster import context_hook
57
+
58
+ # Recall returns rendered bullet text; we need ids + raw claim text.
59
+ # Easiest: get the IDs from recall, then fetch claim text from DB.
60
+ try:
61
+ # context_hook.recall signature:
62
+ # recall(query, *, db_path='', budget=2000, format='text', skip_qdrant=False, return_ids=False)
63
+ result = context_hook.recall(
64
+ prompt,
65
+ db_path=db_path,
66
+ return_ids=True,
67
+ )
68
+ if isinstance(result, tuple):
69
+ _, ids = result
70
+ else:
71
+ ids = []
72
+ except Exception as exc:
73
+ print(f"[label] recall() raised: {exc}", flush=True)
74
+ ids = []
75
+
76
+ if not ids:
77
+ return []
78
+
79
+ import sqlite3
80
+
81
+ conn = sqlite3.connect(db_path)
82
+ try:
83
+ rows = []
84
+ for cid in ids[:top_k]:
85
+ row = conn.execute(
86
+ "SELECT id, text FROM claims WHERE id = ?", (cid,)
87
+ ).fetchone()
88
+ if row:
89
+ rows.append({"id": row[0], "text": row[1] or ""})
90
+ return rows
91
+ finally:
92
+ conn.close()
93
+
94
+
95
+ def _call_judge(prompt: str, candidates: list[dict]) -> list[int]:
96
+ """Single LLM call to the judge. Returns list of claim IDs."""
97
+ from memorymaster.llm_provider import call_llm, parse_json_response
98
+
99
+ judge_text = _judge_prompt(prompt, candidates)
100
+ raw = call_llm(judge_text, "")
101
+ if not raw:
102
+ return []
103
+
104
+ parsed = parse_json_response(raw)
105
+ # parse_json_response returns list of dicts; we want bare ints.
106
+ # If it returns [{"id": 123}, ...] coerce; otherwise try raw int parsing.
107
+ ids: list[int] = []
108
+ for item in parsed:
109
+ if isinstance(item, int):
110
+ ids.append(item)
111
+ elif isinstance(item, dict):
112
+ v = item.get("id") or item.get("claim_id")
113
+ if isinstance(v, int):
114
+ ids.append(v)
115
+ elif isinstance(item, str) and item.strip().lstrip("-").isdigit():
116
+ ids.append(int(item.strip()))
117
+
118
+ # Fallback: regex-extract integers from raw if parser missed it
119
+ if not ids:
120
+ import re
121
+
122
+ ids = [int(m) for m in re.findall(r"\b\d{2,8}\b", raw)]
123
+ return ids
124
+
125
+
126
+ def main() -> int:
127
+ p = argparse.ArgumentParser(description=__doc__)
128
+ p.add_argument("--prompts", type=Path, required=True)
129
+ p.add_argument("--db", type=str, required=True)
130
+ p.add_argument("--labels-out", type=Path, required=True)
131
+ p.add_argument("--top-k", type=int, default=20)
132
+ p.add_argument("--max-prompts", type=int, default=1000)
133
+ p.add_argument(
134
+ "--checkpoint-every",
135
+ type=int,
136
+ default=25,
137
+ help="Flush labels JSON every N prompts (resume-safe).",
138
+ )
139
+ args = p.parse_args()
140
+
141
+ # Force claude_cli for the judge — Gemini API is rate-limited and slow.
142
+ # Direct assignment (NOT setdefault) — avoid the v3.5.0 hook bug where
143
+ # an inherited shell env left the provider stale.
144
+ os.environ["MEMORYMASTER_LLM_PROVIDER"] = "claude_cli"
145
+ os.environ["MEMORYMASTER_LLM_MODEL"] = "claude-haiku-4-5-20251001"
146
+
147
+ prompts: list[dict] = []
148
+ with args.prompts.open(encoding="utf-8") as f:
149
+ for line in f:
150
+ line = line.strip()
151
+ if not line:
152
+ continue
153
+ prompts.append(json.loads(line))
154
+ prompts = prompts[: args.max_prompts]
155
+
156
+ # Resume from existing labels file if present
157
+ labels: dict[str, list[int]] = {}
158
+ if args.labels_out.exists():
159
+ labels = json.loads(args.labels_out.read_text(encoding="utf-8")).get(
160
+ "labels", {}
161
+ )
162
+ print(f"[label] resuming from {len(labels)} existing labels", flush=True)
163
+
164
+ t_start = time.monotonic()
165
+ for i, p_obj in enumerate(prompts, 1):
166
+ text = p_obj["text"]
167
+ sha = _sha1_16(text)
168
+ if sha in labels:
169
+ continue
170
+ try:
171
+ cands = _get_candidates(args.db, text, args.top_k)
172
+ if not cands:
173
+ labels[sha] = []
174
+ else:
175
+ ids = _call_judge(text, cands)
176
+ # Filter to only IDs that were actually in the candidate set
177
+ cand_ids = {c["id"] for c in cands}
178
+ labels[sha] = [i for i in ids if i in cand_ids][:5]
179
+ except Exception as exc:
180
+ print(f"[label] {i}: ERROR {exc}", flush=True)
181
+ labels[sha] = []
182
+
183
+ if i % 5 == 0:
184
+ elapsed = time.monotonic() - t_start
185
+ avg = elapsed / i
186
+ eta = avg * (len(prompts) - i)
187
+ print(
188
+ f"[label] {i}/{len(prompts)} avg={avg:.1f}s eta={eta/60:.1f}min "
189
+ f"last={labels[sha]}",
190
+ flush=True,
191
+ )
192
+
193
+ if i % args.checkpoint_every == 0:
194
+ args.labels_out.write_text(
195
+ json.dumps({"labels": labels}, indent=2), encoding="utf-8"
196
+ )
197
+
198
+ args.labels_out.write_text(
199
+ json.dumps({"labels": labels}, indent=2), encoding="utf-8"
200
+ )
201
+ n_labeled = sum(1 for v in labels.values() if v)
202
+ print(
203
+ f"[label] DONE wrote {len(labels)} labels "
204
+ f"({n_labeled} non-empty) to {args.labels_out}",
205
+ flush=True,
206
+ )
207
+ return 0
208
+
209
+
210
+ if __name__ == "__main__":
211
+ sys.exit(main())