@simbimbo/memory-ocmemog 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/README.md +85 -18
  3. package/brain/runtime/__init__.py +2 -12
  4. package/brain/runtime/config.py +1 -24
  5. package/brain/runtime/inference.py +1 -151
  6. package/brain/runtime/instrumentation.py +1 -15
  7. package/brain/runtime/memory/__init__.py +3 -13
  8. package/brain/runtime/memory/api.py +1 -1219
  9. package/brain/runtime/memory/candidate.py +1 -185
  10. package/brain/runtime/memory/conversation_state.py +1 -1823
  11. package/brain/runtime/memory/distill.py +1 -344
  12. package/brain/runtime/memory/embedding_engine.py +1 -92
  13. package/brain/runtime/memory/freshness.py +1 -112
  14. package/brain/runtime/memory/health.py +1 -40
  15. package/brain/runtime/memory/integrity.py +1 -186
  16. package/brain/runtime/memory/memory_consolidation.py +1 -58
  17. package/brain/runtime/memory/memory_links.py +1 -107
  18. package/brain/runtime/memory/memory_salience.py +1 -233
  19. package/brain/runtime/memory/memory_synthesis.py +1 -31
  20. package/brain/runtime/memory/memory_taxonomy.py +1 -33
  21. package/brain/runtime/memory/pondering_engine.py +1 -654
  22. package/brain/runtime/memory/promote.py +1 -277
  23. package/brain/runtime/memory/provenance.py +1 -406
  24. package/brain/runtime/memory/reinforcement.py +1 -71
  25. package/brain/runtime/memory/retrieval.py +1 -210
  26. package/brain/runtime/memory/semantic_search.py +1 -64
  27. package/brain/runtime/memory/store.py +1 -429
  28. package/brain/runtime/memory/unresolved_state.py +1 -91
  29. package/brain/runtime/memory/vector_index.py +1 -323
  30. package/brain/runtime/model_roles.py +1 -9
  31. package/brain/runtime/model_router.py +1 -22
  32. package/brain/runtime/providers.py +1 -66
  33. package/brain/runtime/security/redaction.py +1 -12
  34. package/brain/runtime/state_store.py +1 -23
  35. package/brain/runtime/storage_paths.py +1 -39
  36. package/docs/architecture/memory.md +20 -24
  37. package/docs/release-checklist.md +19 -6
  38. package/docs/usage.md +33 -17
  39. package/index.ts +8 -1
  40. package/ocmemog/__init__.py +11 -0
  41. package/ocmemog/doctor.py +1255 -0
  42. package/ocmemog/runtime/__init__.py +18 -0
  43. package/ocmemog/runtime/_compat_bridge.py +28 -0
  44. package/ocmemog/runtime/config.py +35 -0
  45. package/ocmemog/runtime/identity.py +115 -0
  46. package/ocmemog/runtime/inference.py +164 -0
  47. package/ocmemog/runtime/instrumentation.py +20 -0
  48. package/ocmemog/runtime/memory/__init__.py +91 -0
  49. package/ocmemog/runtime/memory/api.py +1431 -0
  50. package/ocmemog/runtime/memory/candidate.py +192 -0
  51. package/ocmemog/runtime/memory/conversation_state.py +1831 -0
  52. package/ocmemog/runtime/memory/distill.py +282 -0
  53. package/ocmemog/runtime/memory/embedding_engine.py +151 -0
  54. package/ocmemog/runtime/memory/freshness.py +114 -0
  55. package/ocmemog/runtime/memory/health.py +57 -0
  56. package/ocmemog/runtime/memory/integrity.py +208 -0
  57. package/ocmemog/runtime/memory/memory_consolidation.py +60 -0
  58. package/ocmemog/runtime/memory/memory_links.py +109 -0
  59. package/ocmemog/runtime/memory/memory_salience.py +235 -0
  60. package/ocmemog/runtime/memory/memory_synthesis.py +33 -0
  61. package/ocmemog/runtime/memory/memory_taxonomy.py +35 -0
  62. package/ocmemog/runtime/memory/pondering_engine.py +681 -0
  63. package/ocmemog/runtime/memory/promote.py +279 -0
  64. package/ocmemog/runtime/memory/provenance.py +408 -0
  65. package/ocmemog/runtime/memory/reinforcement.py +73 -0
  66. package/ocmemog/runtime/memory/retrieval.py +224 -0
  67. package/ocmemog/runtime/memory/semantic_search.py +66 -0
  68. package/ocmemog/runtime/memory/store.py +433 -0
  69. package/ocmemog/runtime/memory/unresolved_state.py +93 -0
  70. package/ocmemog/runtime/memory/vector_index.py +411 -0
  71. package/ocmemog/runtime/model_roles.py +16 -0
  72. package/ocmemog/runtime/model_router.py +29 -0
  73. package/ocmemog/runtime/providers.py +79 -0
  74. package/ocmemog/runtime/roles.py +92 -0
  75. package/ocmemog/runtime/security/__init__.py +8 -0
  76. package/ocmemog/runtime/security/redaction.py +17 -0
  77. package/ocmemog/runtime/state_store.py +34 -0
  78. package/ocmemog/runtime/storage_paths.py +70 -0
  79. package/ocmemog/sidecar/app.py +311 -23
  80. package/ocmemog/sidecar/compat.py +50 -13
  81. package/ocmemog/sidecar/transcript_watcher.py +391 -190
  82. package/openclaw.plugin.json +4 -0
  83. package/package.json +1 -1
  84. package/scripts/ocmemog-backfill-vectors.py +5 -3
  85. package/scripts/ocmemog-continuity-benchmark.py +1 -1
  86. package/scripts/ocmemog-demo.py +1 -1
  87. package/scripts/ocmemog-doctor.py +15 -0
  88. package/scripts/ocmemog-install.sh +29 -7
  89. package/scripts/ocmemog-integrated-proof.py +373 -0
  90. package/scripts/ocmemog-reindex-vectors.py +5 -3
  91. package/scripts/ocmemog-release-check.sh +330 -0
  92. package/scripts/ocmemog-sidecar.sh +4 -2
  93. package/scripts/ocmemog-test-rig.py +5 -3
  94. package/brain/runtime/memory/artifacts.py +0 -33
  95. package/brain/runtime/memory/context_builder.py +0 -112
  96. package/brain/runtime/memory/interaction_memory.py +0 -57
  97. package/brain/runtime/memory/memory_gate.py +0 -38
  98. package/brain/runtime/memory/memory_graph.py +0 -54
  99. package/brain/runtime/memory/person_identity.py +0 -83
  100. package/brain/runtime/memory/person_memory.py +0 -138
  101. package/brain/runtime/memory/sentiment_memory.py +0 -67
  102. package/brain/runtime/memory/tool_catalog.py +0 -68
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import uuid
6
+ from difflib import SequenceMatcher
7
+ from typing import Any, Dict
8
+
9
+ from ocmemog.runtime.security import redaction
10
+ from ocmemog.runtime.memory import provenance, store
11
+ from ocmemog.runtime.instrumentation import emit_event
12
+ from ocmemog.runtime import state_store
13
+
14
+ LOGFILE = state_store.report_log_path()
15
+ _NEAR_DUPLICATE_SIMILARITY = 0.85
16
+
17
+
18
+ def _normalize_summary(text: str) -> str:
19
+ return re.sub(r"\s+", " ", str(text or "").strip().lower())
20
+
21
+
22
+ def _tokenize(text: str) -> set[str]:
23
+ return {token for token in re.findall(r"[a-z0-9]+", _normalize_summary(text))}
24
+
25
+
26
+ def _summary_similarity(left: str, right: str) -> float:
27
+ left_tokens = _tokenize(left)
28
+ right_tokens = _tokenize(right)
29
+ token_similarity = 0.0
30
+ if left_tokens and right_tokens:
31
+ overlap = len(left_tokens & right_tokens)
32
+ union = len(left_tokens | right_tokens)
33
+ token_similarity = overlap / max(1, union)
34
+ sequence_similarity = SequenceMatcher(None, _normalize_summary(left), _normalize_summary(right)).ratio()
35
+ return max(token_similarity, sequence_similarity)
36
+
37
+
38
+ def _ranges_overlap(left: Dict[str, Any], right: Dict[str, Any]) -> bool:
39
+ if str(left.get("path") or "") != str(right.get("path") or ""):
40
+ return False
41
+
42
+ def _as_int(value: Any) -> int | None:
43
+ try:
44
+ return int(value) if value is not None else None
45
+ except Exception:
46
+ return None
47
+
48
+ left_start = _as_int(left.get("start_line"))
49
+ left_end = _as_int(left.get("end_line")) or left_start
50
+ right_start = _as_int(right.get("start_line"))
51
+ right_end = _as_int(right.get("end_line")) or right_start
52
+
53
+ if left_start is None and right_start is None:
54
+ return True
55
+ if left_start is None or right_start is None:
56
+ return False
57
+ return max(left_start, right_start) <= min(left_end or left_start, right_end or right_start)
58
+
59
+
60
+ def _shares_provenance_anchor(left: Dict[str, Any], right: Dict[str, Any]) -> bool:
61
+ left_meta = provenance.normalize_metadata(left)
62
+ right_meta = provenance.normalize_metadata(right)
63
+ left_prov = left_meta.get("provenance") if isinstance(left_meta.get("provenance"), dict) else {}
64
+ right_prov = right_meta.get("provenance") if isinstance(right_meta.get("provenance"), dict) else {}
65
+
66
+ left_conv = left_prov.get("conversation") if isinstance(left_prov.get("conversation"), dict) else {}
67
+ right_conv = right_prov.get("conversation") if isinstance(right_prov.get("conversation"), dict) else {}
68
+ if left_conv.get("message_id") and left_conv.get("message_id") == right_conv.get("message_id"):
69
+ return True
70
+
71
+ left_transcript = left_prov.get("transcript_anchor") if isinstance(left_prov.get("transcript_anchor"), dict) else {}
72
+ right_transcript = right_prov.get("transcript_anchor") if isinstance(right_prov.get("transcript_anchor"), dict) else {}
73
+ if left_transcript.get("path") and right_transcript.get("path") and _ranges_overlap(left_transcript, right_transcript):
74
+ return True
75
+
76
+ left_refs = {str(item) for item in left_prov.get("source_references") or [] if str(item).strip()}
77
+ right_refs = {str(item) for item in right_prov.get("source_references") or [] if str(item).strip()}
78
+ return bool(left_refs & right_refs)
79
+
80
+
81
+ def _find_near_duplicate_candidate(conn, source_event_id: int, summary: str, metadata: Dict[str, Any]) -> str | None:
82
+ rows = conn.execute(
83
+ """
84
+ SELECT candidate_id, distilled_summary, metadata_json
85
+ FROM candidates
86
+ WHERE source_event_id != ?
87
+ ORDER BY created_at DESC, candidate_id DESC
88
+ LIMIT 250
89
+ """,
90
+ (source_event_id,),
91
+ ).fetchall()
92
+ normalized_summary = _normalize_summary(summary)
93
+ for row in rows:
94
+ existing_summary = str(row["distilled_summary"] if isinstance(row, dict) else row[1] or "")
95
+ similarity = _summary_similarity(normalized_summary, existing_summary)
96
+ if similarity < _NEAR_DUPLICATE_SIMILARITY:
97
+ continue
98
+ try:
99
+ existing_metadata = json.loads(row["metadata_json"] if isinstance(row, dict) else row[2] or "{}")
100
+ except Exception:
101
+ existing_metadata = {}
102
+ if _shares_provenance_anchor(metadata, existing_metadata):
103
+ return str(row["candidate_id"] if isinstance(row, dict) else row[0])
104
+ return None
105
+
106
+
107
+ def create_candidate(
108
+ source_event_id: int,
109
+ distilled_summary: str,
110
+ verification_points: list[str],
111
+ confidence_score: float,
112
+ metadata: Dict[str, Any] | None = None,
113
+ ) -> Dict[str, Any]:
114
+ summary, redacted = redaction.redact_text(distilled_summary)
115
+ verification_lines = []
116
+ for point in verification_points:
117
+ clean, _ = redaction.redact_text(str(point))
118
+ verification_lines.append(clean)
119
+
120
+ normalized_metadata = provenance.normalize_metadata(metadata, source="candidate")
121
+
122
+ conn = store.connect()
123
+ exact_row = conn.execute(
124
+ "SELECT candidate_id FROM candidates WHERE source_event_id=? AND distilled_summary=?",
125
+ (source_event_id, summary),
126
+ ).fetchone()
127
+ if exact_row:
128
+ conn.close()
129
+ emit_event(LOGFILE, "brain_memory_candidate_duplicate", status="ok", source_event_id=source_event_id)
130
+ return {"candidate_id": exact_row[0], "duplicate": True}
131
+
132
+ near_duplicate_id = _find_near_duplicate_candidate(conn, source_event_id, summary, normalized_metadata)
133
+ if near_duplicate_id:
134
+ conn.close()
135
+ emit_event(
136
+ LOGFILE,
137
+ "brain_memory_candidate_duplicate",
138
+ status="ok",
139
+ source_event_id=source_event_id,
140
+ duplicate_kind="near",
141
+ )
142
+ return {"candidate_id": near_duplicate_id, "duplicate": True}
143
+
144
+ candidate_id = str(uuid.uuid4())
145
+ verification_status = "verified" if verification_lines else "unverified"
146
+ conn.execute(
147
+ """
148
+ INSERT INTO candidates (
149
+ candidate_id, source_event_id, distilled_summary, verification_points,
150
+ confidence_score, status, verification_status, metadata_json, schema_version
151
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
152
+ """,
153
+ (
154
+ candidate_id,
155
+ source_event_id,
156
+ summary,
157
+ "\n".join(verification_lines),
158
+ confidence_score,
159
+ "pending",
160
+ verification_status,
161
+ json.dumps(normalized_metadata, ensure_ascii=False),
162
+ store.SCHEMA_VERSION,
163
+ ),
164
+ )
165
+ conn.execute(
166
+ "INSERT INTO memory_events (event_type, source, details_json, schema_version) VALUES (?, ?, ?, ?)",
167
+ (
168
+ "candidate_created",
169
+ str(source_event_id),
170
+ json.dumps({"candidate_id": candidate_id, "redacted": redacted, "verification_status": verification_status}),
171
+ store.SCHEMA_VERSION,
172
+ ),
173
+ )
174
+ conn.commit()
175
+ conn.close()
176
+ emit_event(LOGFILE, "brain_memory_candidate_created", status="ok", source_event_id=source_event_id, redacted=redacted)
177
+ return {"candidate_id": candidate_id, "duplicate": False}
178
+
179
+
180
+ def get_candidate(candidate_id: str) -> Dict[str, Any] | None:
181
+ conn = store.connect()
182
+ row = conn.execute(
183
+ """
184
+ SELECT candidate_id, source_event_id, distilled_summary, verification_points,
185
+ confidence_score, status, verification_status, metadata_json
186
+ FROM candidates
187
+ WHERE candidate_id=?
188
+ """,
189
+ (candidate_id,),
190
+ ).fetchone()
191
+ conn.close()
192
+ return dict(row) if row else None