PyPI - opencode-semantic-memory - Versions diffs - 0.1.0__py3-none-any.whl - Mend

opencode-semantic-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

opencode_memory/__init__.py +3 -0
opencode_memory/cache.py +261 -0
opencode_memory/cli.py +794 -0
opencode_memory/config.py +89 -0
opencode_memory/daemon.py +879 -0
opencode_memory/enrichment/__init__.py +0 -0
opencode_memory/enrichment/gitlab.py +237 -0
opencode_memory/extraction.py +225 -0
opencode_memory/historical_ingest.py +142 -0
opencode_memory/http_server.py +464 -0
opencode_memory/ingestion/__init__.py +7 -0
opencode_memory/ingestion/embeddings.py +211 -0
opencode_memory/ingestion/extractors.py +287 -0
opencode_memory/ingestion/opencode_db.py +448 -0
opencode_memory/ingestion/parser.py +344 -0
opencode_memory/ingestion/watcher.py +88 -0
opencode_memory/linking/__init__.py +5 -0
opencode_memory/linking/linker.py +323 -0
opencode_memory/metrics.py +273 -0
opencode_memory/models.py +171 -0
opencode_memory/project.py +86 -0
opencode_memory/query/__init__.py +5 -0
opencode_memory/query/hybrid.py +196 -0
opencode_memory/server.py +2795 -0
opencode_memory/session/__init__.py +5 -0
opencode_memory/session/registry.py +57 -0
opencode_memory/storage/__init__.py +6 -0
opencode_memory/storage/sqlite.py +1608 -0
opencode_memory/storage/vectors.py +199 -0
opencode_semantic_memory-0.1.0.dist-info/METADATA +531 -0
opencode_semantic_memory-0.1.0.dist-info/RECORD +33 -0
opencode_semantic_memory-0.1.0.dist-info/WHEEL +4 -0
opencode_semantic_memory-0.1.0.dist-info/entry_points.txt +3 -0

opencode_memory/linking/linker.py ADDED Viewed

@@ -0,0 +1,323 @@
+"""Memory linker that discovers relationships between memories."""
+import logging
+import re
+from datetime import datetime, timedelta
+from opencode_memory.ingestion.embeddings import EmbeddingEngine
+from opencode_memory.models import LinkType, Memory, MemoryCategory, MemoryLink
+from opencode_memory.storage.sqlite import SQLiteStorage
+from opencode_memory.storage.vectors import VectorStorage
+logger = logging.getLogger(__name__)
+# Similarity thresholds - lower threshold allows more links, strength reflects actual similarity
+SIMILARITY_THRESHOLD = 0.50  # Minimum to create any link
+STRONG_LINK_THRESHOLD = 0.70  # Links above this are "strong"
+SUPERSEDES_THRESHOLD = 0.85  # Very high similarity + same category = supersedes
+SUPERSEDES_TIME_WINDOW_DAYS = 30
+ENTITY_PATTERN = re.compile(r"[!#&@]\d+|@\w+")
+# Category-specific thresholds (some categories should link more readily)
+CATEGORY_THRESHOLDS = {
+    # High-value categories should link at lower similarity (more recall)
+    MemoryCategory.DIRECTIVE: 0.45,
+    MemoryCategory.PROCEDURE: 0.48,
+    MemoryCategory.PLAN: 0.45,  # Plans should link readily to related content
+    MemoryCategory.FACT: 0.50,
+    # Lower-value categories need higher similarity (more precision)
+    MemoryCategory.CONVERSATION_SUMMARY: 0.50,  # Summaries link at baseline
+    MemoryCategory.CONVERSATION: 0.60,  # Full convos need higher similarity (noisy)
+    MemoryCategory.EVENT: 0.50,
+    MemoryCategory.DECISION: 0.50,
+    MemoryCategory.BLOCKER: 0.50,
+}
+class MemoryLinker:
+    """Discovers and creates links between memories based on various signals."""
+    def __init__(
+        self,
+        sqlite: SQLiteStorage,
+        vectors: VectorStorage,
+        embeddings: EmbeddingEngine,
+    ):
+        self.sqlite = sqlite
+        self.vectors = vectors
+        self.embeddings = embeddings
+    async def link_memory(
+        self, memory: Memory, embedding: list[float] | None = None
+    ) -> list[MemoryLink]:
+        """Find and create links for a single memory.
+        Returns list of links created.
+        """
+        if memory.id is None:
+            return []
+        links_created: list[MemoryLink] = []
+        if embedding is None:
+            embedding = await self.embeddings.embed_async(memory.embedding_content())
+        links_created.extend(self._link_by_similarity(memory, embedding))
+        links_created.extend(self._link_by_entity_overlap(memory))
+        links_created.extend(self._link_by_temporal_category(memory))
+        return links_created
+    def _link_by_similarity(self, memory: Memory, embedding: list[float]) -> list[MemoryLink]:
+        """Create links based on vector similarity.
+        Link strength equals the actual similarity score, providing a natural
+        quality indicator. Category-specific thresholds allow high-value memories
+        (directives, procedures) to link more readily than conversations.
+        """
+        links: list[MemoryLink] = []
+        similar = self.vectors.search(embedding, limit=10)
+        # Use category-specific threshold, fall back to default
+        threshold = CATEGORY_THRESHOLDS.get(memory.category, SIMILARITY_THRESHOLD)
+        for match in similar:
+            target_id = match.get("memory_id")
+            if not target_id or target_id == memory.id:
+                continue
+            distance = match.get("_distance", 1.0)
+            similarity = 1.0 / (1.0 + distance)
+            if similarity < threshold:
+                continue
+            # Skip if any link already exists between these memories
+            if self.sqlite.any_link_exists(memory.id, target_id):
+                continue
+            target = self.sqlite.get_memory_by_id(target_id)
+            if not target:
+                continue
+            link_type, reason = self._classify_similarity_link(memory, target, similarity)
+            link = MemoryLink(
+                source_memory_id=memory.id,
+                target_memory_id=target_id,
+                link_type=link_type,
+                strength=similarity,
+                reason=reason,
+            )
+            link_id = self.sqlite.insert_link(link)
+            if link_id:
+                link.id = link_id
+                links.append(link)
+                logger.debug(
+                    f"Created {link_type.value} link: {memory.id} -> {target_id} "
+                    f"(strength={similarity:.3f})"
+                )
+        return links
+    def _classify_similarity_link(
+        self, source: Memory, target: Memory, similarity: float
+    ) -> tuple[LinkType, str]:
+        """Determine the specific link type based on memory characteristics."""
+        # Normalize timestamps for comparison (strip timezone if present)
+        source_time = (
+            source.created_at.replace(tzinfo=None)
+            if source.created_at.tzinfo
+            else source.created_at
+        )
+        target_time = (
+            target.created_at.replace(tzinfo=None)
+            if target.created_at.tzinfo
+            else target.created_at
+        )
+        if (
+            source.category == target.category
+            and similarity > SUPERSEDES_THRESHOLD
+            and source_time > target_time
+        ):
+            age_diff = (source_time - target_time).days
+            if age_diff <= SUPERSEDES_TIME_WINDOW_DAYS:
+                if target.category in (MemoryCategory.FACT, MemoryCategory.PROCEDURE):
+                    return (
+                        LinkType.SUPERSEDES,
+                        f"Same category, high similarity ({similarity:.2f}), newer by {age_diff}d",
+                    )
+        source_words = set(re.findall(r"\w{4,}", source.content.lower()))
+        target_words = set(re.findall(r"\w{4,}", target.content.lower()))
+        if source_words and target_words:
+            source_only = source_words - target_words
+            target_only = target_words - source_words
+            common = source_words & target_words
+            if len(source_only) > len(common) * 0.5:
+                return (
+                    LinkType.EXTENDS,
+                    f"Extends with additional content ({len(source_only)} new concepts)",
+                )
+        # Classify link strength for the reason string
+        if similarity >= STRONG_LINK_THRESHOLD:
+            strength_desc = "strong"
+        elif similarity >= 0.60:
+            strength_desc = "moderate"
+        else:
+            strength_desc = "weak"
+        return LinkType.RELATED, f"Semantic similarity ({similarity:.2f}, {strength_desc})"
+    def _link_by_entity_overlap(self, memory: Memory) -> list[MemoryLink]:
+        """Create links based on shared entity references."""
+        links: list[MemoryLink] = []
+        entities_in_content = set(ENTITY_PATTERN.findall(memory.content))
+        entities_in_memory = set(memory.entities) if memory.entities else set()
+        all_entities = entities_in_content | entities_in_memory
+        if not all_entities:
+            return links
+        for entity_ref in all_entities:
+            entity = self._parse_entity_ref(entity_ref)
+            if not entity:
+                continue
+            db_entity = self.sqlite.get_entity(entity.ref, entity.type)
+            if not db_entity or db_entity.id is None:
+                continue
+            related_memories = self.sqlite.get_memories_for_entity(db_entity.id)
+            for related in related_memories:
+                if related.id == memory.id or related.id is None:
+                    continue
+                if self.sqlite.link_exists(memory.id, related.id, LinkType.SAME_ENTITY):
+                    continue
+                link = MemoryLink(
+                    source_memory_id=memory.id,
+                    target_memory_id=related.id,
+                    link_type=LinkType.SAME_ENTITY,
+                    strength=0.8,
+                    reason=f"Both reference {entity_ref}",
+                )
+                link_id = self.sqlite.insert_link(link)
+                if link_id:
+                    link.id = link_id
+                    links.append(link)
+        return links
+    def _link_by_temporal_category(self, memory: Memory) -> list[MemoryLink]:
+        """Create links based on temporal proximity within same category.
+        For example: decisions made in the same session are likely related.
+        """
+        links: list[MemoryLink] = []
+        if memory.category not in (MemoryCategory.DECISION, MemoryCategory.EVENT):
+            return links
+        time_window = timedelta(hours=2)
+        # Make timestamps naive for comparison (strip timezone if present)
+        memory_time = (
+            memory.created_at.replace(tzinfo=None)
+            if memory.created_at.tzinfo
+            else memory.created_at
+        )
+        start_time = memory_time - time_window
+        end_time = memory_time + time_window
+        category_memories = self.sqlite.get_memories_by_category(
+            memory.category, limit=20, include_resolved=True
+        )
+        for other in category_memories:
+            if other.id == memory.id or other.id is None:
+                continue
+            other_time = (
+                other.created_at.replace(tzinfo=None)
+                if other.created_at.tzinfo
+                else other.created_at
+            )
+            if not (start_time <= other_time <= end_time):
+                continue
+            if self.sqlite.link_exists(memory.id, other.id, LinkType.RELATED):
+                continue
+            time_diff = abs((memory_time - other_time).total_seconds())
+            strength = max(0.5, 1.0 - (time_diff / time_window.total_seconds()))
+            link = MemoryLink(
+                source_memory_id=memory.id,
+                target_memory_id=other.id,
+                link_type=LinkType.RELATED,
+                strength=strength,
+                reason=f"Same session (within {int(time_diff / 60)}min)",
+            )
+            link_id = self.sqlite.insert_link(link)
+            if link_id:
+                link.id = link_id
+                links.append(link)
+        return links
+    async def process_batch(self, memories: list[Memory], batch_size: int = 10) -> int:
+        """Process a batch of memories for linking.
+        Returns total number of links created.
+        """
+        total_links = 0
+        for memory in memories:
+            if memory.id is None:
+                continue
+            links = await self.link_memory(memory)
+            total_links += len(links)
+        return total_links
+    async def run_linking_pass(self, limit: int = 50) -> dict:
+        """Run a single pass of the linking process.
+        Finds memories that need linking and processes them.
+        Returns stats about what was done.
+        """
+        unlinked = self.sqlite.get_unlinked_memories(limit=limit // 2)
+        recent = self.sqlite.get_memories_needing_links(since_hours=24, limit=limit // 2)
+        all_memories = {m.id: m for m in unlinked + recent if m.id is not None}
+        memories_to_process = list(all_memories.values())
+        if not memories_to_process:
+            return {"processed": 0, "links_created": 0, "status": "no_work"}
+        links_created = await self.process_batch(memories_to_process)
+        return {
+            "processed": len(memories_to_process),
+            "links_created": links_created,
+            "status": "completed",
+        }
+    def _parse_entity_ref(self, ref: str):
+        """Parse an entity reference string."""
+        from opencode_memory.models import Entity
+        return Entity.from_ref(ref)

opencode_memory/metrics.py ADDED Viewed

@@ -0,0 +1,273 @@
+"""Prometheus-compatible metrics for opencode-memory.
+Exposes metrics in Prometheus text format at /metrics endpoint.
+No external dependencies required - uses simple text format.
+"""
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class Counter:
+    """A simple counter metric."""
+    name: str
+    help: str
+    labels: list[str] = field(default_factory=list)
+    _values: dict[tuple, float] = field(default_factory=lambda: defaultdict(float))
+    def inc(self, value: float = 1.0, **label_values: str) -> None:
+        """Increment counter."""
+        key = tuple(label_values.get(lbl, "") for lbl in self.labels)
+        self._values[key] += value
+    def get(self, **label_values: str) -> float:
+        """Get current value."""
+        key = tuple(label_values.get(lbl, "") for lbl in self.labels)
+        return self._values[key]
+@dataclass
+class Gauge:
+    """A simple gauge metric."""
+    name: str
+    help: str
+    labels: list[str] = field(default_factory=list)
+    _values: dict[tuple, float] = field(default_factory=lambda: defaultdict(float))
+    def set(self, value: float, **label_values: str) -> None:
+        """Set gauge value."""
+        key = tuple(label_values.get(lbl, "") for lbl in self.labels)
+        self._values[key] = value
+    def inc(self, value: float = 1.0, **label_values: str) -> None:
+        """Increment gauge."""
+        key = tuple(label_values.get(lbl, "") for lbl in self.labels)
+        self._values[key] += value
+    def dec(self, value: float = 1.0, **label_values: str) -> None:
+        """Decrement gauge."""
+        key = tuple(label_values.get(lbl, "") for lbl in self.labels)
+        self._values[key] -= value
+    def get(self, **label_values: str) -> float:
+        """Get current value."""
+        key = tuple(label_values.get(lbl, "") for lbl in self.labels)
+        return self._values[key]
+@dataclass
+class Histogram:
+    """A simple histogram metric with fixed buckets."""
+    name: str
+    help: str
+    buckets: list[float] = field(default_factory=lambda: [0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0])
+    labels: list[str] = field(default_factory=list)
+    _bucket_counts: dict[tuple, list[int]] = field(default_factory=dict)
+    _sums: dict[tuple, float] = field(default_factory=lambda: defaultdict(float))
+    _counts: dict[tuple, int] = field(default_factory=lambda: defaultdict(int))
+    def observe(self, value: float, **label_values: str) -> None:
+        """Observe a value."""
+        key = tuple(label_values.get(lbl, "") for lbl in self.labels)
+        if key not in self._bucket_counts:
+            self._bucket_counts[key] = [0] * len(self.buckets)
+        self._sums[key] += value
+        self._counts[key] += 1
+        for i, bucket in enumerate(self.buckets):
+            if value <= bucket:
+                self._bucket_counts[key][i] += 1
+class MetricsRegistry:
+    """Registry for all metrics."""
+    def __init__(self):
+        self.metrics: dict[str, Counter | Gauge | Histogram] = {}
+    def counter(self, name: str, help: str, labels: list[str] | None = None) -> Counter:
+        """Create or get a counter."""
+        if name not in self.metrics:
+            self.metrics[name] = Counter(name, help, labels or [])
+        return self.metrics[name]
+    def gauge(self, name: str, help: str, labels: list[str] | None = None) -> Gauge:
+        """Create or get a gauge."""
+        if name not in self.metrics:
+            self.metrics[name] = Gauge(name, help, labels or [])
+        return self.metrics[name]
+    def histogram(
+        self,
+        name: str,
+        help: str,
+        buckets: list[float] | None = None,
+        labels: list[str] | None = None,
+    ) -> Histogram:
+        """Create or get a histogram."""
+        if name not in self.metrics:
+            self.metrics[name] = Histogram(
+                name, help, buckets or [0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0], labels or []
+            )
+        return self.metrics[name]
+    def render(self) -> str:
+        """Render all metrics in Prometheus text format."""
+        lines = []
+        for metric in self.metrics.values():
+            lines.append(f"# HELP {metric.name} {metric.help}")
+            if isinstance(metric, Counter):
+                lines.append(f"# TYPE {metric.name} counter")
+                for labels_tuple, value in metric._values.items():
+                    label_str = self._format_labels(metric.labels, labels_tuple)
+                    lines.append(f"{metric.name}{label_str} {value}")
+            elif isinstance(metric, Gauge):
+                lines.append(f"# TYPE {metric.name} gauge")
+                for labels_tuple, value in metric._values.items():
+                    label_str = self._format_labels(metric.labels, labels_tuple)
+                    lines.append(f"{metric.name}{label_str} {value}")
+            elif isinstance(metric, Histogram):
+                lines.append(f"# TYPE {metric.name} histogram")
+                for labels_tuple, bucket_counts in metric._bucket_counts.items():
+                    label_str = self._format_labels(metric.labels, labels_tuple)
+                    cumulative = 0
+                    for i, bucket in enumerate(metric.buckets):
+                        cumulative += bucket_counts[i]
+                        le_label = f'le="{bucket}"'
+                        if label_str:
+                            full_label = label_str[:-1] + "," + le_label + "}"
+                        else:
+                            full_label = "{" + le_label + "}"
+                        lines.append(f"{metric.name}_bucket{full_label} {cumulative}")
+                    # +Inf bucket
+                    le_label = 'le="+Inf"'
+                    if label_str:
+                        full_label = label_str[:-1] + "," + le_label + "}"
+                    else:
+                        full_label = "{" + le_label + "}"
+                    lines.append(f"{metric.name}_bucket{full_label} {metric._counts[labels_tuple]}")
+                    lines.append(f"{metric.name}_sum{label_str} {metric._sums[labels_tuple]}")
+                    lines.append(f"{metric.name}_count{label_str} {metric._counts[labels_tuple]}")
+        return "\n".join(lines) + "\n"
+    def _format_labels(self, label_names: list[str], label_values: tuple) -> str:
+        """Format labels for Prometheus output."""
+        if not label_names:
+            return ""
+        pairs = [f'{name}="{value}"' for name, value in zip(label_names, label_values)]
+        return "{" + ",".join(pairs) + "}"
+# Global registry
+registry = MetricsRegistry()
+# Define metrics
+requests_total = registry.counter(
+    "opencode_memory_requests_total",
+    "Total number of MCP requests",
+    ["tool"],
+)
+request_duration = registry.histogram(
+    "opencode_memory_request_duration_seconds",
+    "Request duration in seconds",
+    [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0],
+    ["tool"],
+)
+memories_total = registry.gauge(
+    "opencode_memory_memories_total",
+    "Total number of memories",
+    ["category"],
+)
+embedding_queue_size = registry.gauge(
+    "opencode_memory_embedding_queue_size",
+    "Number of pending embedding tasks",
+)
+storage_bytes = registry.gauge(
+    "opencode_memory_storage_bytes",
+    "Storage size in bytes",
+    ["type"],
+)
+rate_limit_rejections = registry.counter(
+    "opencode_memory_rate_limit_rejections_total",
+    "Total number of rate-limited requests",
+)
+auth_failures = registry.counter(
+    "opencode_memory_auth_failures_total",
+    "Total number of authentication failures",
+)
+# Cache metrics
+cache_size = registry.gauge(
+    "opencode_memory_cache_size",
+    "Number of entries in memory cache",
+)
+cache_hits = registry.counter(
+    "opencode_memory_cache_hits_total",
+    "Total cache hits",
+)
+cache_misses = registry.counter(
+    "opencode_memory_cache_misses_total",
+    "Total cache misses",
+)
+cache_hit_rate = registry.gauge(
+    "opencode_memory_cache_hit_rate",
+    "Cache hit rate (0-1)",
+)
+# Link metrics
+links_total = registry.gauge(
+    "opencode_memory_links_total",
+    "Total number of memory links",
+    ["type"],
+)
+def update_from_status(status: dict[str, Any]) -> None:
+    """Update metrics from server status."""
+    # Update embedding queue
+    eq = status.get("embedding_queue", {})
+    embedding_queue_size.set(eq.get("pending", 0))
+    # Update storage
+    storage = status.get("storage", {})
+    storage_bytes.set(storage.get("db_size_mb", 0) * 1024 * 1024, type="sqlite")
+    storage_bytes.set(storage.get("vectors_size_mb", 0) * 1024 * 1024, type="vectors")
+    # Update memory counts
+    for category, count in status.get("memories", {}).items():
+        memories_total.set(count, category=category)
+    # Update cache metrics
+    cache_stats = status.get("cache", {})
+    if cache_stats:
+        cache_size.set(cache_stats.get("size", 0))
+        cache_hits._values[()] = cache_stats.get("hits", 0)
+        cache_misses._values[()] = cache_stats.get("misses", 0)
+        cache_hit_rate.set(cache_stats.get("hit_rate", 0))
+    # Update link metrics
+    link_stats = status.get("links", {})
+    if link_stats:
+        for link_type, count in link_stats.get("by_type", {}).items():
+            links_total.set(count, type=link_type)