opencode-semantic-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,448 @@
1
+ """OpenCode database observer for automatic conversation tracking."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import re
7
+ import sqlite3
8
+ from collections import Counter
9
+ from collections.abc import Callable
10
+ from datetime import UTC, datetime
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from opencode_memory.ingestion.extractors import extract_all_insights
15
+ from opencode_memory.models import Memory, MemoryCategory
16
+ from opencode_memory.project import detect_project_from_path, detect_project_from_git
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ CONVERSATION_ENTITY_PATTERNS = [
21
+ (r"!(\d+)", "mr"),
22
+ (r"#(\d+)", "issue"),
23
+ (r"&(\d+)", "epic"),
24
+ (r"@([\w\.-]+)", "person"),
25
+ ]
26
+
27
+
28
+ class OpenCodeDBObserver:
29
+ """Observe the OpenCode SQLite database for new sessions and messages."""
30
+
31
+ def __init__(self, db_path: Path):
32
+ self.db_path = db_path
33
+ self.last_processed_time: datetime | None = None
34
+ self.last_message_id: str | None = None
35
+ self.tool_usage_counts: Counter[str] = Counter()
36
+ self._polling_task: asyncio.Task | None = None
37
+ self._stop_polling = False
38
+ self._on_memories_callback: Callable[[list[Memory]], None] | None = None
39
+
40
+ def db_exists(self) -> bool:
41
+ """Check if the OpenCode database file exists."""
42
+ return self.db_path.exists()
43
+
44
+ def _get_conn(self) -> sqlite3.Connection:
45
+ """Get a read-only connection to the OpenCode database."""
46
+ if not self.db_exists():
47
+ raise FileNotFoundError(f"OpenCode database not found: {self.db_path}")
48
+ conn = sqlite3.connect(f"file:{self.db_path}?mode=ro", uri=True)
49
+ conn.row_factory = sqlite3.Row
50
+ return conn
51
+
52
+ def get_new_sessions(self, since: datetime | None = None) -> list[dict[str, Any]]:
53
+ """Get sessions created since the given time."""
54
+ with self._get_conn() as conn:
55
+ if since:
56
+ since_ts = int(since.timestamp() * 1000)
57
+ cursor = conn.execute(
58
+ """
59
+ SELECT * FROM session
60
+ WHERE time_created > ?
61
+ ORDER BY time_created ASC
62
+ """,
63
+ (since_ts,),
64
+ )
65
+ else:
66
+ cursor = conn.execute("SELECT * FROM session ORDER BY time_created ASC")
67
+
68
+ sessions = []
69
+ for row in cursor.fetchall():
70
+ sessions.append(dict(row))
71
+ return sessions
72
+
73
+ def get_session_messages(
74
+ self, session_id: str, since_message_id: str | None = None
75
+ ) -> list[dict[str, Any]]:
76
+ """Get messages for a session."""
77
+ with self._get_conn() as conn:
78
+ if since_message_id:
79
+ cursor = conn.execute(
80
+ """
81
+ SELECT * FROM message
82
+ WHERE session_id = ? AND id > ?
83
+ ORDER BY time_created ASC
84
+ """,
85
+ (session_id, since_message_id),
86
+ )
87
+ else:
88
+ cursor = conn.execute(
89
+ """
90
+ SELECT * FROM message
91
+ WHERE session_id = ?
92
+ ORDER BY time_created ASC
93
+ """,
94
+ (session_id,),
95
+ )
96
+
97
+ messages = []
98
+ for row in cursor.fetchall():
99
+ msg = dict(row)
100
+ if msg.get("data"):
101
+ try:
102
+ msg["data"] = json.loads(msg["data"])
103
+ except json.JSONDecodeError:
104
+ pass
105
+ messages.append(msg)
106
+ return messages
107
+
108
+ def get_message_parts(self, message_id: str) -> list[dict[str, Any]]:
109
+ """Get parts (content) for a message."""
110
+ with self._get_conn() as conn:
111
+ cursor = conn.execute(
112
+ "SELECT * FROM part WHERE message_id = ? ORDER BY time_created ASC",
113
+ (message_id,),
114
+ )
115
+
116
+ parts = []
117
+ for row in cursor.fetchall():
118
+ part = dict(row)
119
+ if part.get("data"):
120
+ try:
121
+ part["data"] = json.loads(part["data"])
122
+ except json.JSONDecodeError:
123
+ pass
124
+ parts.append(part)
125
+ return parts
126
+
127
+ def extract_session_memories(
128
+ self, session: dict[str, Any]
129
+ ) -> tuple[Memory | None, Memory | None]:
130
+ """Extract full conversation and summary memories from a session.
131
+
132
+ Returns:
133
+ Tuple of (full_conversation, summary) memories. Either may be None.
134
+ """
135
+ title = session.get("title", "Untitled session")
136
+ session_id = session["id"]
137
+
138
+ messages = self.get_session_messages(session_id)
139
+ if not messages:
140
+ return None, None
141
+
142
+ user_messages = []
143
+ assistant_messages = []
144
+ tool_calls = []
145
+ all_text = []
146
+ conversation_parts = [] # Full conversation with roles
147
+
148
+ for msg in messages:
149
+ data = msg.get("data", {})
150
+ role = data.get("role")
151
+
152
+ parts = self.get_message_parts(msg["id"])
153
+ for part in parts:
154
+ part_data = part.get("data", {})
155
+ if part_data.get("type") == "text":
156
+ text = part_data.get("text", "")
157
+ all_text.append(text)
158
+ if role == "user":
159
+ user_messages.append(text)
160
+ conversation_parts.append(f"USER: {text}")
161
+ elif role == "assistant":
162
+ assistant_messages.append(text)
163
+ conversation_parts.append(f"ASSISTANT: {text}")
164
+ elif part_data.get("type") == "tool":
165
+ tool_name = part_data.get("tool", "unknown")
166
+ tool_calls.append(tool_name)
167
+ self.tool_usage_counts[tool_name] += 1
168
+
169
+ if not user_messages:
170
+ return None, None
171
+
172
+ combined_text = "\n".join(all_text)
173
+ entities = self.extract_entities_from_text(combined_text)
174
+
175
+ entity_summary = self._summarize_entities(entities)
176
+ topics = self._extract_topics(user_messages)
177
+ outcome = self._detect_outcome(assistant_messages, tool_calls)
178
+
179
+ # Build summary
180
+ summary_parts = [f"Session: {title}"]
181
+ summary_parts.append(
182
+ f"Messages: {len(user_messages)} user, {len(assistant_messages)} assistant"
183
+ )
184
+
185
+ if tool_calls:
186
+ tool_counts: dict[str, int] = {}
187
+ for tool in tool_calls:
188
+ tool_counts[tool] = tool_counts.get(tool, 0) + 1
189
+ top_tools = sorted(tool_counts.items(), key=lambda x: -x[1])[:5]
190
+ summary_parts.append(f"Tools: {', '.join(f'{t[0]}({t[1]})' for t in top_tools)}")
191
+
192
+ if entity_summary:
193
+ summary_parts.append(f"Entities: {entity_summary}")
194
+
195
+ if topics:
196
+ summary_parts.append(f"Topics: {', '.join(topics[:5])}")
197
+
198
+ if outcome:
199
+ summary_parts.append(f"Outcome: {outcome}")
200
+
201
+ first_query = user_messages[0][:150] if user_messages else ""
202
+ summary_parts.append(f"Started with: {first_query}")
203
+
204
+ directory = session.get("directory")
205
+ project = None
206
+ if directory:
207
+ project = detect_project_from_path(directory) or detect_project_from_git(directory)
208
+
209
+ created_at = datetime.fromtimestamp(session["time_created"] / 1000)
210
+ entity_refs = [ref for _, ref in entities[:10]]
211
+
212
+ # Full conversation memory
213
+ full_content = f"Session: {title}\n\n" + "\n\n".join(conversation_parts)
214
+ full_memory = Memory(
215
+ source_file=f"opencode:session:{session_id}",
216
+ project=project,
217
+ category=MemoryCategory.CONVERSATION,
218
+ content=full_content,
219
+ what=title,
220
+ entities=entity_refs,
221
+ created_at=created_at,
222
+ )
223
+
224
+ # Summary memory
225
+ summary_memory = Memory(
226
+ source_file=f"opencode:session:{session_id}:summary",
227
+ project=project,
228
+ category=MemoryCategory.CONVERSATION_SUMMARY,
229
+ content="\n".join(summary_parts),
230
+ what=title,
231
+ entities=entity_refs,
232
+ created_at=created_at,
233
+ )
234
+
235
+ return full_memory, summary_memory
236
+
237
+ def extract_session_summary(self, session: dict[str, Any]) -> Memory | None:
238
+ """Extract a summary memory from a session (legacy compatibility)."""
239
+ _, summary = self.extract_session_memories(session)
240
+ return summary
241
+
242
+ def _summarize_entities(self, entities: list[tuple[str, str]]) -> str:
243
+ """Summarize entities by type."""
244
+ by_type: dict[str, list[str]] = {}
245
+ for entity_type, ref in entities:
246
+ by_type.setdefault(entity_type, []).append(ref)
247
+
248
+ parts = []
249
+ for etype in ["mr", "issue", "epic", "person"]:
250
+ if etype in by_type:
251
+ refs = by_type[etype][:3]
252
+ if len(by_type[etype]) > 3:
253
+ parts.append(f"{', '.join(refs)} +{len(by_type[etype]) - 3} more {etype}s")
254
+ else:
255
+ parts.append(", ".join(refs))
256
+
257
+ return "; ".join(parts)
258
+
259
+ def _extract_topics(self, user_messages: list[str]) -> list[str]:
260
+ """Extract key topics from user messages."""
261
+ topic_keywords = [
262
+ "review",
263
+ "fix",
264
+ "bug",
265
+ "feature",
266
+ "test",
267
+ "deploy",
268
+ "merge",
269
+ "pipeline",
270
+ "migration",
271
+ "database",
272
+ "api",
273
+ "security",
274
+ "performance",
275
+ "refactor",
276
+ "documentation",
277
+ "config",
278
+ "error",
279
+ "issue",
280
+ "MR",
281
+ ]
282
+
283
+ combined = " ".join(user_messages).lower()
284
+ found_topics = []
285
+ for keyword in topic_keywords:
286
+ if keyword.lower() in combined:
287
+ found_topics.append(keyword)
288
+
289
+ return found_topics
290
+
291
+ def _detect_outcome(self, assistant_messages: list[str], tool_calls: list[str]) -> str:
292
+ """Detect likely session outcome from assistant responses and tools used."""
293
+ if not assistant_messages:
294
+ return ""
295
+
296
+ last_messages = " ".join(assistant_messages[-3:]).lower()
297
+
298
+ if any(word in last_messages for word in ["committed", "pushed", "merged"]):
299
+ return "Code changes committed"
300
+ if any(
301
+ word in last_messages for word in ["created mr", "opened mr", "merge request created"]
302
+ ):
303
+ return "MR created"
304
+ if any(word in last_messages for word in ["test", "passed", "all tests"]):
305
+ return "Tests run"
306
+ if any(word in last_messages for word in ["fixed", "resolved", "solved"]):
307
+ return "Issue resolved"
308
+ if any(word in last_messages for word in ["reviewed", "lgtm", "approved"]):
309
+ return "Review completed"
310
+
311
+ if "gitlab_create_merge_request" in tool_calls:
312
+ return "MR created"
313
+ if "gitlab_approve_merge_request" in tool_calls:
314
+ return "MR approved"
315
+ if any("commit" in t for t in tool_calls):
316
+ return "Code committed"
317
+
318
+ return ""
319
+
320
+ def extract_session_insights(self, session: dict[str, Any]) -> list[Memory]:
321
+ """Extract decisions, blockers, and learnings from session messages."""
322
+ session_id = session["id"]
323
+ created_at = datetime.fromtimestamp(session["time_created"] / 1000)
324
+
325
+ directory = session.get("directory")
326
+ project = None
327
+ if directory:
328
+ project = detect_project_from_path(directory) or detect_project_from_git(directory)
329
+
330
+ messages = self.get_session_messages(session_id)
331
+ if not messages:
332
+ return []
333
+
334
+ all_text = []
335
+ for msg in messages:
336
+ parts = self.get_message_parts(msg["id"])
337
+ for part in parts:
338
+ part_data = part.get("data", {})
339
+ if part_data.get("type") == "text":
340
+ all_text.append(part_data.get("text", ""))
341
+
342
+ combined_text = "\n".join(all_text)
343
+ insights = extract_all_insights(combined_text)
344
+
345
+ memories = []
346
+ seen_contents: set[str] = set()
347
+
348
+ for insight in insights:
349
+ content_key = insight.content[:100].lower()
350
+ if content_key in seen_contents:
351
+ continue
352
+ seen_contents.add(content_key)
353
+
354
+ category = {
355
+ "decision": MemoryCategory.DECISION,
356
+ "blocker": MemoryCategory.BLOCKER,
357
+ "fact": MemoryCategory.FACT,
358
+ }.get(insight.category, MemoryCategory.FACT)
359
+
360
+ memories.append(
361
+ Memory(
362
+ source_file=f"opencode:session:{session_id}",
363
+ project=project,
364
+ category=category,
365
+ content=insight.content,
366
+ what=f"Extracted from session: {insight.category}",
367
+ created_at=created_at,
368
+ )
369
+ )
370
+
371
+ return memories
372
+
373
+ def poll_new_content(self, since: datetime | None = None) -> list[Memory]:
374
+ """Poll for new content and return memories."""
375
+ if not self.db_exists():
376
+ logger.debug(f"OpenCode database not found at {self.db_path}, skipping poll")
377
+ return []
378
+
379
+ memories = []
380
+
381
+ sessions = self.get_new_sessions(since)
382
+ for session in sessions:
383
+ memory = self.extract_session_summary(session)
384
+ if memory:
385
+ memories.append(memory)
386
+
387
+ return memories
388
+
389
+ def extract_entities_from_text(self, text: str) -> list[tuple[str, str]]:
390
+ """Extract entity references (!123, #456, @user) from text."""
391
+ entities: list[tuple[str, str]] = []
392
+ seen: set[str] = set()
393
+
394
+ for pattern, entity_type in CONVERSATION_ENTITY_PATTERNS:
395
+ for match in re.finditer(pattern, text):
396
+ ref = match.group(0)
397
+ if ref not in seen:
398
+ seen.add(ref)
399
+ entities.append((entity_type, ref))
400
+
401
+ return entities
402
+
403
+ def get_tool_usage_stats(self) -> dict[str, int]:
404
+ """Return tool usage counts from tracked sessions."""
405
+ return dict(self.tool_usage_counts.most_common())
406
+
407
+ async def start_polling(
408
+ self,
409
+ interval_seconds: float = 30.0,
410
+ on_memories: Callable[[list[Memory]], None] | None = None,
411
+ ) -> None:
412
+ """Start background polling for new content.
413
+
414
+ Args:
415
+ interval_seconds: How often to poll (default 30s)
416
+ on_memories: Optional callback invoked with new memories
417
+ """
418
+ self._on_memories_callback = on_memories
419
+ self._stop_polling = False
420
+
421
+ logger.info(f"Starting OpenCode DB polling every {interval_seconds}s")
422
+
423
+ while not self._stop_polling:
424
+ try:
425
+ memories = self.poll_new_content(self.last_processed_time)
426
+
427
+ for memory in memories:
428
+ entities = self.extract_entities_from_text(memory.content)
429
+ for _, ref in entities:
430
+ if ref not in memory.entities:
431
+ memory.entities.append(ref)
432
+
433
+ if memories:
434
+ self.last_processed_time = datetime.now(UTC)
435
+ logger.debug(f"Polled {len(memories)} new memories")
436
+
437
+ if self._on_memories_callback:
438
+ self._on_memories_callback(memories)
439
+
440
+ except Exception as e:
441
+ logger.error(f"Error polling OpenCode DB: {e}")
442
+
443
+ await asyncio.sleep(interval_seconds)
444
+
445
+ def stop_polling(self) -> None:
446
+ """Stop the background polling."""
447
+ self._stop_polling = True
448
+ logger.info("Stopping OpenCode DB polling")