opencode-semantic-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,171 @@
1
+ """Data models for opencode-memory."""
2
+
3
+ from datetime import UTC, datetime
4
+ from enum import Enum
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ def _utc_now() -> datetime:
11
+ """Return timezone-aware UTC datetime."""
12
+ return datetime.now(UTC)
13
+
14
+
15
+ class EntityType(str, Enum):
16
+ """Types of entities we track."""
17
+
18
+ MR = "mr"
19
+ ISSUE = "issue"
20
+ EPIC = "epic"
21
+ PERSON = "person"
22
+ CONCEPT = "concept"
23
+ FILE = "file"
24
+ SESSION = "session"
25
+
26
+
27
+ class MemoryCategory(str, Enum):
28
+ """Categories of memories."""
29
+
30
+ DECISION = "decision"
31
+ BLOCKER = "blocker"
32
+ PROCEDURE = "procedure"
33
+ FACT = "fact"
34
+ EVENT = "event"
35
+ CONVERSATION = "conversation" # Full conversation content
36
+ CONVERSATION_SUMMARY = "conversation_summary" # Compact summary of a conversation
37
+ DIRECTIVE = "directive"
38
+ PLAN = "plan" # Long-term goals and strategies to achieve them
39
+ IDEA = "idea" # Future possibilities, deferred considerations, things to try later
40
+
41
+
42
+ class LinkType(str, Enum):
43
+ """Types of relationships between memories."""
44
+
45
+ RELATED = "related" # Semantically similar content
46
+ EXTENDS = "extends" # Builds on or elaborates another memory
47
+ SUPERSEDES = "supersedes" # Replaces/updates an older memory
48
+ CONTRADICTS = "contradicts" # Conflicts with another memory
49
+ SAME_ENTITY = "same_entity" # About the same MR/issue/epic/person
50
+ SEQUENCE = "sequence" # Sequential chunks from same source (strong link)
51
+
52
+
53
+ class Entity(BaseModel):
54
+ """An entity (MR, issue, person, etc.)."""
55
+
56
+ id: int | None = None
57
+ type: EntityType
58
+ ref: str
59
+ project: str | None = None
60
+ title: str | None = None
61
+ metadata: dict[str, Any] = Field(default_factory=dict)
62
+ created_at: datetime = Field(default_factory=_utc_now)
63
+ updated_at: datetime = Field(default_factory=_utc_now)
64
+
65
+ @classmethod
66
+ def from_ref(cls, ref: str) -> "Entity | None":
67
+ """Parse an entity reference string.
68
+
69
+ Supports both simple refs (!123, #456, &789, @user) and
70
+ cross-project refs (gitlab-org/gitlab!123, group/project#456).
71
+ """
72
+ import re
73
+
74
+ # Cross-project pattern: project/path!123 or project/path#456
75
+ cross_project = re.match(r"^([\w\-./]+)([!#&])(\d+)$", ref)
76
+ if cross_project:
77
+ project, symbol, num = cross_project.groups()
78
+ entity_ref = f"{symbol}{num}"
79
+ if symbol == "!":
80
+ return cls(type=EntityType.MR, ref=entity_ref, project=project)
81
+ elif symbol == "#":
82
+ return cls(type=EntityType.ISSUE, ref=entity_ref, project=project)
83
+ elif symbol == "&":
84
+ return cls(type=EntityType.EPIC, ref=entity_ref, project=project)
85
+
86
+ # Simple patterns
87
+ if ref.startswith("!"):
88
+ return cls(type=EntityType.MR, ref=ref)
89
+ elif ref.startswith("#"):
90
+ return cls(type=EntityType.ISSUE, ref=ref)
91
+ elif ref.startswith("&"):
92
+ return cls(type=EntityType.EPIC, ref=ref)
93
+ elif ref.startswith("@"):
94
+ return cls(type=EntityType.PERSON, ref=ref)
95
+ return None
96
+
97
+
98
+ class Memory(BaseModel):
99
+ """A single memory/fact."""
100
+
101
+ id: int | None = None
102
+ source_file: str | None = None
103
+ source_line: int | None = None
104
+ project: str | None = None # e.g. "gitlab-org/gitlab", "personal/financial_planner"
105
+ category: MemoryCategory
106
+ content: str
107
+ what: str | None = None
108
+ why: str | None = None
109
+ learned: str | None = None
110
+ created_at: datetime = Field(default_factory=_utc_now)
111
+ expires_at: datetime | None = None
112
+ resolved_at: datetime | None = None
113
+ embedding_id: str | None = None
114
+ entities: list[str] = Field(default_factory=list)
115
+
116
+ def embedding_content(self) -> str:
117
+ """Get content for embedding, including project and entity refs.
118
+
119
+ This helps with:
120
+ - Separating memories from different projects in vector space
121
+ - Finding memories related to specific MRs/issues/epics
122
+ """
123
+ parts = []
124
+ if self.project:
125
+ parts.append(f"[{self.project}]")
126
+ if self.entities:
127
+ parts.append(" ".join(self.entities))
128
+ parts.append(self.content)
129
+ return " ".join(parts)
130
+
131
+
132
+ class Session(BaseModel):
133
+ """An active OpenCode session."""
134
+
135
+ id: str
136
+ started_at: datetime = Field(default_factory=_utc_now)
137
+ last_heartbeat: datetime = Field(default_factory=_utc_now)
138
+ working_on: str | None = None
139
+ claimed_items: list[str] = Field(default_factory=list)
140
+
141
+
142
+ class SearchResult(BaseModel):
143
+ """A search result combining memory and relevance."""
144
+
145
+ memory: Memory
146
+ score: float
147
+ match_type: str
148
+ entities: list[Entity] = Field(default_factory=list)
149
+
150
+
151
+ class MemoryLink(BaseModel):
152
+ """A link between two memories."""
153
+
154
+ id: int | None = None
155
+ source_memory_id: int
156
+ target_memory_id: int
157
+ link_type: LinkType
158
+ strength: float = 0.5 # 0-1 confidence score
159
+ reason: str | None = None # Why linked (for debugging/transparency)
160
+ created_at: datetime = Field(default_factory=_utc_now)
161
+
162
+
163
+ class BootContext(BaseModel):
164
+ """Context returned at session boot."""
165
+
166
+ identity: dict[str, Any] | None = None
167
+ active_sessions: list[Session] = Field(default_factory=list)
168
+ hot_items: list[dict[str, Any]] = Field(default_factory=list)
169
+ recent_decisions: list[Memory] = Field(default_factory=list)
170
+ unresolved_blockers: list[Memory] = Field(default_factory=list)
171
+ directives: list[Memory] = Field(default_factory=list)
@@ -0,0 +1,86 @@
1
+ """Project detection utilities."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+
7
+ def detect_project_from_path(path: str | Path | None) -> str | None:
8
+ """Detect project identifier from a file path.
9
+
10
+ Returns project identifiers like:
11
+ - "gitlab-org/gitlab" for GitLab monorepo
12
+ - "personal/financial_planner" for personal projects
13
+ - "ghavenga/opencode-memory" for this project
14
+ """
15
+ if not path:
16
+ return None
17
+
18
+ path_str = str(path)
19
+
20
+ # Handle opencode session sources
21
+ if path_str.startswith("opencode:session:"):
22
+ return None # Will be derived from session's working dir
23
+
24
+ # Common project roots to check
25
+ project_patterns = [
26
+ # GitLab projects under gdk
27
+ (r"/gdk/gitlab(?:/|$)", "gitlab-org/gitlab"),
28
+ (r"/gdk/gitaly(?:/|$)", "gitlab-org/gitaly"),
29
+ (r"/gdk/gitlab-runner(?:/|$)", "gitlab-org/gitlab-runner"),
30
+ # Projects under gitlab_projects
31
+ (r"/gitlab_projects/opencode-memory(?:/|$)", "ghavenga/opencode-memory"),
32
+ (r"/gitlab_projects/gdk/gitlab(?:/|$)", "gitlab-org/gitlab"),
33
+ # Personal projects
34
+ (r"/financial_planner(?:/|$)", "personal/financial_planner"),
35
+ # Generic .opencode notes - try to extract from path
36
+ (r"/\.opencode/gitlab-org/", "gitlab-org/gitlab"),
37
+ ]
38
+
39
+ for pattern, project in project_patterns:
40
+ if re.search(pattern, path_str):
41
+ return project
42
+
43
+ # Try to extract from git remote or path structure
44
+ # Look for patterns like /home/user/projects/org/repo/
45
+ match = re.search(r"/([^/]+)/([^/]+)(?:/\.opencode)?(?:/|$)", path_str)
46
+ if match:
47
+ # Could be org/repo structure
48
+ pass
49
+
50
+ return None
51
+
52
+
53
+ def detect_project_from_cwd(cwd: str | Path | None) -> str | None:
54
+ """Detect project from current working directory."""
55
+ if not cwd:
56
+ return None
57
+ return detect_project_from_path(cwd)
58
+
59
+
60
+ def detect_project_from_git(path: str | Path | None) -> str | None:
61
+ """Detect project from git remote in the given path."""
62
+ if not path:
63
+ return None
64
+
65
+ import subprocess
66
+
67
+ try:
68
+ result = subprocess.run(
69
+ ["git", "remote", "get-url", "origin"],
70
+ cwd=path,
71
+ capture_output=True,
72
+ text=True,
73
+ timeout=5,
74
+ )
75
+ if result.returncode == 0:
76
+ remote_url = result.stdout.strip()
77
+ # Extract org/repo from various URL formats
78
+ # git@gitlab.com:gitlab-org/gitlab.git
79
+ # https://gitlab.com/gitlab-org/gitlab.git
80
+ match = re.search(r"[:/]([^/]+)/([^/]+?)(?:\.git)?$", remote_url)
81
+ if match:
82
+ return f"{match.group(1)}/{match.group(2)}"
83
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
84
+ pass
85
+
86
+ return None
@@ -0,0 +1,5 @@
1
+ """Query modules for opencode-memory."""
2
+
3
+ from opencode_memory.query.hybrid import HybridSearchEngine
4
+
5
+ __all__ = ["HybridSearchEngine"]
@@ -0,0 +1,196 @@
1
+ """Hybrid search combining FTS and vector similarity."""
2
+
3
+ from datetime import UTC, datetime, timedelta
4
+
5
+ from opencode_memory.ingestion.embeddings import EmbeddingEngine
6
+ from opencode_memory.models import MemoryCategory, SearchResult
7
+ from opencode_memory.storage.sqlite import SQLiteStorage
8
+ from opencode_memory.storage.vectors import VectorStorage
9
+
10
+ # Category importance weights (higher = more important)
11
+ CATEGORY_WEIGHTS = {
12
+ MemoryCategory.DIRECTIVE: 1.5, # Standing instructions - highest
13
+ MemoryCategory.PLAN: 1.4, # Long-term goals guide priorities
14
+ MemoryCategory.BLOCKER: 1.3, # Active blockers are critical
15
+ MemoryCategory.DECISION: 1.2, # Past decisions inform current work
16
+ MemoryCategory.PROCEDURE: 1.1, # How-to knowledge
17
+ MemoryCategory.FACT: 1.0, # Baseline
18
+ MemoryCategory.EVENT: 0.9, # Historical events
19
+ MemoryCategory.CONVERSATION_SUMMARY: 0.8, # Compact session summaries
20
+ MemoryCategory.CONVERSATION: 0.6, # Full session logs - lowest (large, noisy)
21
+ }
22
+
23
+ # Recency decay: memories lose relevance over time
24
+ RECENCY_HALF_LIFE_DAYS = 30 # Score halves every 30 days
25
+
26
+
27
+ class HybridSearchEngine:
28
+ """Combine FTS and vector search for best results."""
29
+
30
+ def __init__(
31
+ self,
32
+ sqlite: SQLiteStorage,
33
+ vectors: VectorStorage,
34
+ embeddings: EmbeddingEngine,
35
+ ):
36
+ self.sqlite = sqlite
37
+ self.vectors = vectors
38
+ self.embeddings = embeddings
39
+
40
+ def search(
41
+ self,
42
+ query: str,
43
+ limit: int = 20,
44
+ fts_weight: float = 0.3,
45
+ vector_weight: float = 0.7,
46
+ ) -> list[SearchResult]:
47
+ """Search using both FTS and vector similarity."""
48
+ fts_results = self._search_fts(query, limit * 2)
49
+ vector_results = self._search_vectors(query, limit * 2)
50
+
51
+ combined = self._merge_results(fts_results, vector_results, fts_weight, vector_weight)
52
+
53
+ return sorted(combined, key=lambda x: -x.score)[:limit]
54
+
55
+ async def search_async(
56
+ self,
57
+ query: str,
58
+ limit: int = 20,
59
+ fts_weight: float = 0.3,
60
+ vector_weight: float = 0.7,
61
+ project: str | None = None,
62
+ ) -> list[SearchResult]:
63
+ """Search using both FTS and vector similarity (async-safe)."""
64
+ fts_results = self._search_fts(query, limit * 2, project=project)
65
+ vector_results = await self._search_vectors_async(query, limit * 2, project=project)
66
+
67
+ combined = self._merge_results(fts_results, vector_results, fts_weight, vector_weight)
68
+
69
+ return sorted(combined, key=lambda x: -x.score)[:limit]
70
+
71
+ def _search_fts(self, query: str, limit: int, project: str | None = None) -> list[SearchResult]:
72
+ """Perform FTS search."""
73
+ memories = self.sqlite.search_fts(query, limit, project=project)
74
+ return [
75
+ SearchResult(
76
+ memory=m,
77
+ score=1.0 / (i + 1),
78
+ match_type="fts",
79
+ )
80
+ for i, m in enumerate(memories)
81
+ ]
82
+
83
+ def _search_vectors(self, query: str, limit: int) -> list[SearchResult]:
84
+ """Perform vector similarity search."""
85
+ query_embedding = self.embeddings.embed(query)
86
+ return self._process_vector_results(query_embedding, limit)
87
+
88
+ async def _search_vectors_async(
89
+ self, query: str, limit: int, project: str | None = None
90
+ ) -> list[SearchResult]:
91
+ """Perform vector similarity search (async-safe)."""
92
+ query_embedding = await self.embeddings.embed_async(query)
93
+ return self._process_vector_results(query_embedding, limit, project=project)
94
+
95
+ def _process_vector_results(
96
+ self, query_embedding: list[float], limit: int, project: str | None = None
97
+ ) -> list[SearchResult]:
98
+ """Process vector search results into SearchResult objects."""
99
+ results = self.vectors.search(query_embedding, limit * 2 if project else limit)
100
+
101
+ # Batch fetch all memories in a single query (avoid N+1)
102
+ memory_ids = [r.get("memory_id") for r in results if r.get("memory_id")]
103
+ memories_map = self.sqlite.get_memories_by_ids(memory_ids)
104
+
105
+ search_results = []
106
+ for r in results:
107
+ memory_id = r.get("memory_id")
108
+ if memory_id and memory_id in memories_map:
109
+ memory = memories_map[memory_id]
110
+ if project and memory.project != project:
111
+ continue
112
+ distance = r.get("_distance", 1.0)
113
+ score = 1.0 / (1.0 + distance)
114
+ search_results.append(
115
+ SearchResult(
116
+ memory=memory,
117
+ score=score,
118
+ match_type="vector",
119
+ )
120
+ )
121
+ if len(search_results) >= limit:
122
+ break
123
+ return search_results
124
+
125
+ def _merge_results(
126
+ self,
127
+ fts_results: list[SearchResult],
128
+ vector_results: list[SearchResult],
129
+ fts_weight: float,
130
+ vector_weight: float,
131
+ ) -> list[SearchResult]:
132
+ """Merge and deduplicate results."""
133
+ seen_ids: set[int] = set()
134
+ merged: list[SearchResult] = []
135
+
136
+ scores_by_id: dict[int, tuple[float, SearchResult]] = {}
137
+
138
+ for r in fts_results:
139
+ if r.memory.id is not None:
140
+ scores_by_id[r.memory.id] = (
141
+ r.score * fts_weight,
142
+ SearchResult(
143
+ memory=r.memory,
144
+ score=r.score * fts_weight,
145
+ match_type="fts",
146
+ ),
147
+ )
148
+
149
+ for r in vector_results:
150
+ if r.memory.id is not None:
151
+ existing_score, existing_result = scores_by_id.get(r.memory.id, (0, None))
152
+ new_score = existing_score + (r.score * vector_weight)
153
+ scores_by_id[r.memory.id] = (
154
+ new_score,
155
+ SearchResult(
156
+ memory=r.memory,
157
+ score=new_score,
158
+ match_type="hybrid" if existing_result else "vector",
159
+ ),
160
+ )
161
+
162
+ for memory_id, (score, result) in scores_by_id.items():
163
+ if memory_id not in seen_ids:
164
+ seen_ids.add(memory_id)
165
+ # Apply importance and recency adjustments
166
+ adjusted_score = self._apply_scoring_adjustments(result.memory, score)
167
+ merged.append(
168
+ SearchResult(
169
+ memory=result.memory,
170
+ score=adjusted_score,
171
+ match_type=result.match_type,
172
+ )
173
+ )
174
+
175
+ return merged
176
+
177
+ def _apply_scoring_adjustments(self, memory, base_score: float) -> float:
178
+ """Apply category importance and recency boosts to score."""
179
+ # Category importance
180
+ category_weight = CATEGORY_WEIGHTS.get(memory.category, 1.0)
181
+
182
+ # Recency boost: exponential decay based on age
183
+ now = datetime.now(UTC)
184
+ # Handle naive datetimes
185
+ created_at = memory.created_at
186
+ if created_at.tzinfo is None:
187
+ created_at = created_at.replace(tzinfo=UTC)
188
+
189
+ age_days = (now - created_at).days
190
+ # Exponential decay: score * 2^(-age/half_life)
191
+ # At half_life days, multiplier is 0.5; at 0 days, multiplier is 1.0
192
+ recency_factor = 2 ** (-age_days / RECENCY_HALF_LIFE_DAYS)
193
+ # Clamp to minimum 0.3 so old memories aren't completely buried
194
+ recency_factor = max(0.3, recency_factor)
195
+
196
+ return base_score * category_weight * recency_factor