PyPI - opencode-semantic-memory - Versions diffs - 0.1.0__py3-none-any.whl - Mend

opencode-semantic-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

opencode_memory/__init__.py +3 -0
opencode_memory/cache.py +261 -0
opencode_memory/cli.py +794 -0
opencode_memory/config.py +89 -0
opencode_memory/daemon.py +879 -0
opencode_memory/enrichment/__init__.py +0 -0
opencode_memory/enrichment/gitlab.py +237 -0
opencode_memory/extraction.py +225 -0
opencode_memory/historical_ingest.py +142 -0
opencode_memory/http_server.py +464 -0
opencode_memory/ingestion/__init__.py +7 -0
opencode_memory/ingestion/embeddings.py +211 -0
opencode_memory/ingestion/extractors.py +287 -0
opencode_memory/ingestion/opencode_db.py +448 -0
opencode_memory/ingestion/parser.py +344 -0
opencode_memory/ingestion/watcher.py +88 -0
opencode_memory/linking/__init__.py +5 -0
opencode_memory/linking/linker.py +323 -0
opencode_memory/metrics.py +273 -0
opencode_memory/models.py +171 -0
opencode_memory/project.py +86 -0
opencode_memory/query/__init__.py +5 -0
opencode_memory/query/hybrid.py +196 -0
opencode_memory/server.py +2795 -0
opencode_memory/session/__init__.py +5 -0
opencode_memory/session/registry.py +57 -0
opencode_memory/storage/__init__.py +6 -0
opencode_memory/storage/sqlite.py +1608 -0
opencode_memory/storage/vectors.py +199 -0
opencode_semantic_memory-0.1.0.dist-info/METADATA +531 -0
opencode_semantic_memory-0.1.0.dist-info/RECORD +33 -0
opencode_semantic_memory-0.1.0.dist-info/WHEEL +4 -0
opencode_semantic_memory-0.1.0.dist-info/entry_points.txt +3 -0

opencode_memory/models.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""Data models for opencode-memory."""
+from datetime import UTC, datetime
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, Field
+def _utc_now() -> datetime:
+    """Return timezone-aware UTC datetime."""
+    return datetime.now(UTC)
+class EntityType(str, Enum):
+    """Types of entities we track."""
+    MR = "mr"
+    ISSUE = "issue"
+    EPIC = "epic"
+    PERSON = "person"
+    CONCEPT = "concept"
+    FILE = "file"
+    SESSION = "session"
+class MemoryCategory(str, Enum):
+    """Categories of memories."""
+    DECISION = "decision"
+    BLOCKER = "blocker"
+    PROCEDURE = "procedure"
+    FACT = "fact"
+    EVENT = "event"
+    CONVERSATION = "conversation"  # Full conversation content
+    CONVERSATION_SUMMARY = "conversation_summary"  # Compact summary of a conversation
+    DIRECTIVE = "directive"
+    PLAN = "plan"  # Long-term goals and strategies to achieve them
+    IDEA = "idea"  # Future possibilities, deferred considerations, things to try later
+class LinkType(str, Enum):
+    """Types of relationships between memories."""
+    RELATED = "related"  # Semantically similar content
+    EXTENDS = "extends"  # Builds on or elaborates another memory
+    SUPERSEDES = "supersedes"  # Replaces/updates an older memory
+    CONTRADICTS = "contradicts"  # Conflicts with another memory
+    SAME_ENTITY = "same_entity"  # About the same MR/issue/epic/person
+    SEQUENCE = "sequence"  # Sequential chunks from same source (strong link)
+class Entity(BaseModel):
+    """An entity (MR, issue, person, etc.)."""
+    id: int | None = None
+    type: EntityType
+    ref: str
+    project: str | None = None
+    title: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    created_at: datetime = Field(default_factory=_utc_now)
+    updated_at: datetime = Field(default_factory=_utc_now)
+    @classmethod
+    def from_ref(cls, ref: str) -> "Entity | None":
+        """Parse an entity reference string.
+        Supports both simple refs (!123, #456, &789, @user) and
+        cross-project refs (gitlab-org/gitlab!123, group/project#456).
+        """
+        import re
+        # Cross-project pattern: project/path!123 or project/path#456
+        cross_project = re.match(r"^([\w\-./]+)([!#&])(\d+)$", ref)
+        if cross_project:
+            project, symbol, num = cross_project.groups()
+            entity_ref = f"{symbol}{num}"
+            if symbol == "!":
+                return cls(type=EntityType.MR, ref=entity_ref, project=project)
+            elif symbol == "#":
+                return cls(type=EntityType.ISSUE, ref=entity_ref, project=project)
+            elif symbol == "&":
+                return cls(type=EntityType.EPIC, ref=entity_ref, project=project)
+        # Simple patterns
+        if ref.startswith("!"):
+            return cls(type=EntityType.MR, ref=ref)
+        elif ref.startswith("#"):
+            return cls(type=EntityType.ISSUE, ref=ref)
+        elif ref.startswith("&"):
+            return cls(type=EntityType.EPIC, ref=ref)
+        elif ref.startswith("@"):
+            return cls(type=EntityType.PERSON, ref=ref)
+        return None
+class Memory(BaseModel):
+    """A single memory/fact."""
+    id: int | None = None
+    source_file: str | None = None
+    source_line: int | None = None
+    project: str | None = None  # e.g. "gitlab-org/gitlab", "personal/financial_planner"
+    category: MemoryCategory
+    content: str
+    what: str | None = None
+    why: str | None = None
+    learned: str | None = None
+    created_at: datetime = Field(default_factory=_utc_now)
+    expires_at: datetime | None = None
+    resolved_at: datetime | None = None
+    embedding_id: str | None = None
+    entities: list[str] = Field(default_factory=list)
+    def embedding_content(self) -> str:
+        """Get content for embedding, including project and entity refs.
+        This helps with:
+        - Separating memories from different projects in vector space
+        - Finding memories related to specific MRs/issues/epics
+        """
+        parts = []
+        if self.project:
+            parts.append(f"[{self.project}]")
+        if self.entities:
+            parts.append(" ".join(self.entities))
+        parts.append(self.content)
+        return " ".join(parts)
+class Session(BaseModel):
+    """An active OpenCode session."""
+    id: str
+    started_at: datetime = Field(default_factory=_utc_now)
+    last_heartbeat: datetime = Field(default_factory=_utc_now)
+    working_on: str | None = None
+    claimed_items: list[str] = Field(default_factory=list)
+class SearchResult(BaseModel):
+    """A search result combining memory and relevance."""
+    memory: Memory
+    score: float
+    match_type: str
+    entities: list[Entity] = Field(default_factory=list)
+class MemoryLink(BaseModel):
+    """A link between two memories."""
+    id: int | None = None
+    source_memory_id: int
+    target_memory_id: int
+    link_type: LinkType
+    strength: float = 0.5  # 0-1 confidence score
+    reason: str | None = None  # Why linked (for debugging/transparency)
+    created_at: datetime = Field(default_factory=_utc_now)
+class BootContext(BaseModel):
+    """Context returned at session boot."""
+    identity: dict[str, Any] | None = None
+    active_sessions: list[Session] = Field(default_factory=list)
+    hot_items: list[dict[str, Any]] = Field(default_factory=list)
+    recent_decisions: list[Memory] = Field(default_factory=list)
+    unresolved_blockers: list[Memory] = Field(default_factory=list)
+    directives: list[Memory] = Field(default_factory=list)

opencode_memory/project.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""Project detection utilities."""
+import re
+from pathlib import Path
+def detect_project_from_path(path: str | Path | None) -> str | None:
+    """Detect project identifier from a file path.
+    Returns project identifiers like:
+    - "gitlab-org/gitlab" for GitLab monorepo
+    - "personal/financial_planner" for personal projects
+    - "ghavenga/opencode-memory" for this project
+    """
+    if not path:
+        return None
+    path_str = str(path)
+    # Handle opencode session sources
+    if path_str.startswith("opencode:session:"):
+        return None  # Will be derived from session's working dir
+    # Common project roots to check
+    project_patterns = [
+        # GitLab projects under gdk
+        (r"/gdk/gitlab(?:/|$)", "gitlab-org/gitlab"),
+        (r"/gdk/gitaly(?:/|$)", "gitlab-org/gitaly"),
+        (r"/gdk/gitlab-runner(?:/|$)", "gitlab-org/gitlab-runner"),
+        # Projects under gitlab_projects
+        (r"/gitlab_projects/opencode-memory(?:/|$)", "ghavenga/opencode-memory"),
+        (r"/gitlab_projects/gdk/gitlab(?:/|$)", "gitlab-org/gitlab"),
+        # Personal projects
+        (r"/financial_planner(?:/|$)", "personal/financial_planner"),
+        # Generic .opencode notes - try to extract from path
+        (r"/\.opencode/gitlab-org/", "gitlab-org/gitlab"),
+    ]
+    for pattern, project in project_patterns:
+        if re.search(pattern, path_str):
+            return project
+    # Try to extract from git remote or path structure
+    # Look for patterns like /home/user/projects/org/repo/
+    match = re.search(r"/([^/]+)/([^/]+)(?:/\.opencode)?(?:/|$)", path_str)
+    if match:
+        # Could be org/repo structure
+        pass
+    return None
+def detect_project_from_cwd(cwd: str | Path | None) -> str | None:
+    """Detect project from current working directory."""
+    if not cwd:
+        return None
+    return detect_project_from_path(cwd)
+def detect_project_from_git(path: str | Path | None) -> str | None:
+    """Detect project from git remote in the given path."""
+    if not path:
+        return None
+    import subprocess
+    try:
+        result = subprocess.run(
+            ["git", "remote", "get-url", "origin"],
+            cwd=path,
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode == 0:
+            remote_url = result.stdout.strip()
+            # Extract org/repo from various URL formats
+            # git@gitlab.com:gitlab-org/gitlab.git
+            # https://gitlab.com/gitlab-org/gitlab.git
+            match = re.search(r"[:/]([^/]+)/([^/]+?)(?:\.git)?$", remote_url)
+            if match:
+                return f"{match.group(1)}/{match.group(2)}"
+    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+        pass
+    return None

opencode_memory/query/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Query modules for opencode-memory."""
+from opencode_memory.query.hybrid import HybridSearchEngine
+__all__ = ["HybridSearchEngine"]

opencode_memory/query/hybrid.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""Hybrid search combining FTS and vector similarity."""
+from datetime import UTC, datetime, timedelta
+from opencode_memory.ingestion.embeddings import EmbeddingEngine
+from opencode_memory.models import MemoryCategory, SearchResult
+from opencode_memory.storage.sqlite import SQLiteStorage
+from opencode_memory.storage.vectors import VectorStorage
+# Category importance weights (higher = more important)
+CATEGORY_WEIGHTS = {
+    MemoryCategory.DIRECTIVE: 1.5,  # Standing instructions - highest
+    MemoryCategory.PLAN: 1.4,  # Long-term goals guide priorities
+    MemoryCategory.BLOCKER: 1.3,  # Active blockers are critical
+    MemoryCategory.DECISION: 1.2,  # Past decisions inform current work
+    MemoryCategory.PROCEDURE: 1.1,  # How-to knowledge
+    MemoryCategory.FACT: 1.0,  # Baseline
+    MemoryCategory.EVENT: 0.9,  # Historical events
+    MemoryCategory.CONVERSATION_SUMMARY: 0.8,  # Compact session summaries
+    MemoryCategory.CONVERSATION: 0.6,  # Full session logs - lowest (large, noisy)
+}
+# Recency decay: memories lose relevance over time
+RECENCY_HALF_LIFE_DAYS = 30  # Score halves every 30 days
+class HybridSearchEngine:
+    """Combine FTS and vector search for best results."""
+    def __init__(
+        self,
+        sqlite: SQLiteStorage,
+        vectors: VectorStorage,
+        embeddings: EmbeddingEngine,
+    ):
+        self.sqlite = sqlite
+        self.vectors = vectors
+        self.embeddings = embeddings
+    def search(
+        self,
+        query: str,
+        limit: int = 20,
+        fts_weight: float = 0.3,
+        vector_weight: float = 0.7,
+    ) -> list[SearchResult]:
+        """Search using both FTS and vector similarity."""
+        fts_results = self._search_fts(query, limit * 2)
+        vector_results = self._search_vectors(query, limit * 2)
+        combined = self._merge_results(fts_results, vector_results, fts_weight, vector_weight)
+        return sorted(combined, key=lambda x: -x.score)[:limit]
+    async def search_async(
+        self,
+        query: str,
+        limit: int = 20,
+        fts_weight: float = 0.3,
+        vector_weight: float = 0.7,
+        project: str | None = None,
+    ) -> list[SearchResult]:
+        """Search using both FTS and vector similarity (async-safe)."""
+        fts_results = self._search_fts(query, limit * 2, project=project)
+        vector_results = await self._search_vectors_async(query, limit * 2, project=project)
+        combined = self._merge_results(fts_results, vector_results, fts_weight, vector_weight)
+        return sorted(combined, key=lambda x: -x.score)[:limit]
+    def _search_fts(self, query: str, limit: int, project: str | None = None) -> list[SearchResult]:
+        """Perform FTS search."""
+        memories = self.sqlite.search_fts(query, limit, project=project)
+        return [
+            SearchResult(
+                memory=m,
+                score=1.0 / (i + 1),
+                match_type="fts",
+            )
+            for i, m in enumerate(memories)
+        ]
+    def _search_vectors(self, query: str, limit: int) -> list[SearchResult]:
+        """Perform vector similarity search."""
+        query_embedding = self.embeddings.embed(query)
+        return self._process_vector_results(query_embedding, limit)
+    async def _search_vectors_async(
+        self, query: str, limit: int, project: str | None = None
+    ) -> list[SearchResult]:
+        """Perform vector similarity search (async-safe)."""
+        query_embedding = await self.embeddings.embed_async(query)
+        return self._process_vector_results(query_embedding, limit, project=project)
+    def _process_vector_results(
+        self, query_embedding: list[float], limit: int, project: str | None = None
+    ) -> list[SearchResult]:
+        """Process vector search results into SearchResult objects."""
+        results = self.vectors.search(query_embedding, limit * 2 if project else limit)
+        # Batch fetch all memories in a single query (avoid N+1)
+        memory_ids = [r.get("memory_id") for r in results if r.get("memory_id")]
+        memories_map = self.sqlite.get_memories_by_ids(memory_ids)
+        search_results = []
+        for r in results:
+            memory_id = r.get("memory_id")
+            if memory_id and memory_id in memories_map:
+                memory = memories_map[memory_id]
+                if project and memory.project != project:
+                    continue
+                distance = r.get("_distance", 1.0)
+                score = 1.0 / (1.0 + distance)
+                search_results.append(
+                    SearchResult(
+                        memory=memory,
+                        score=score,
+                        match_type="vector",
+                    )
+                )
+                if len(search_results) >= limit:
+                    break
+        return search_results
+    def _merge_results(
+        self,
+        fts_results: list[SearchResult],
+        vector_results: list[SearchResult],
+        fts_weight: float,
+        vector_weight: float,
+    ) -> list[SearchResult]:
+        """Merge and deduplicate results."""
+        seen_ids: set[int] = set()
+        merged: list[SearchResult] = []
+        scores_by_id: dict[int, tuple[float, SearchResult]] = {}
+        for r in fts_results:
+            if r.memory.id is not None:
+                scores_by_id[r.memory.id] = (
+                    r.score * fts_weight,
+                    SearchResult(
+                        memory=r.memory,
+                        score=r.score * fts_weight,
+                        match_type="fts",
+                    ),
+                )
+        for r in vector_results:
+            if r.memory.id is not None:
+                existing_score, existing_result = scores_by_id.get(r.memory.id, (0, None))
+                new_score = existing_score + (r.score * vector_weight)
+                scores_by_id[r.memory.id] = (
+                    new_score,
+                    SearchResult(
+                        memory=r.memory,
+                        score=new_score,
+                        match_type="hybrid" if existing_result else "vector",
+                    ),
+                )
+        for memory_id, (score, result) in scores_by_id.items():
+            if memory_id not in seen_ids:
+                seen_ids.add(memory_id)
+                # Apply importance and recency adjustments
+                adjusted_score = self._apply_scoring_adjustments(result.memory, score)
+                merged.append(
+                    SearchResult(
+                        memory=result.memory,
+                        score=adjusted_score,
+                        match_type=result.match_type,
+                    )
+                )
+        return merged
+    def _apply_scoring_adjustments(self, memory, base_score: float) -> float:
+        """Apply category importance and recency boosts to score."""
+        # Category importance
+        category_weight = CATEGORY_WEIGHTS.get(memory.category, 1.0)
+        # Recency boost: exponential decay based on age
+        now = datetime.now(UTC)
+        # Handle naive datetimes
+        created_at = memory.created_at
+        if created_at.tzinfo is None:
+            created_at = created_at.replace(tzinfo=UTC)
+        age_days = (now - created_at).days
+        # Exponential decay: score * 2^(-age/half_life)
+        # At half_life days, multiplier is 0.5; at 0 days, multiplier is 1.0
+        recency_factor = 2 ** (-age_days / RECENCY_HALF_LIFE_DAYS)
+        # Clamp to minimum 0.3 so old memories aren't completely buried
+        recency_factor = max(0.3, recency_factor)
+        return base_score * category_weight * recency_factor