npm - loki-mode - Versions diffs - 7.66.1 → 7.68.0 - Mend

loki-mode 7.66.1 → 7.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/app-runner.sh +128 -7
package/autonomy/loki +202 -87
package/autonomy/run.sh +161 -87
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +25 -4
package/docs/INSTALLATION.md +2 -2
package/loki-ts/dist/loki.js +112 -110
package/mcp/__init__.py +1 -1
package/memory/consolidation.py +86 -12
package/memory/retrieval.py +18 -1
package/memory/storage.py +161 -1
package/package.json +1 -1
package/plugins/loki-mode/.claude-plugin/plugin.json +1 -1

package/mcp/__init__.py CHANGED Viewed

@@ -57,4 +57,4 @@ try:
 except ImportError:
     __all__ = ['mcp']
-__version__ = '7.66.1'
+__version__ = '7.68.0'

package/memory/consolidation.py CHANGED Viewed

@@ -223,7 +223,17 @@ class ConsolidationPipeline:
                     merged = False
                     for idx, existing in enumerate(existing_patterns):
                         if self._patterns_similar(new_pattern, existing):
-                            merged_pattern = self.merge_with_existing(new_pattern, [existing])
+                            # Re-read the target pattern fresh immediately before
+                            # merging (BUG-MEM C1, lost-update). The whole-run
+                            # snapshot at step 4 can be stale by now: a concurrent
+                            # engine.increment_pattern_usage() (load_pattern then
+                            # save_pattern) may have bumped usage_count/last_used
+                            # AFTER the snapshot. merge_with_existing() builds the
+                            # merged record from best_match.usage_count/last_used,
+                            # so merging from the stale snapshot clobbers that bump.
+                            # Re-reading narrows the window to this single write.
+                            merge_base = self._reload_pattern(existing)
+                            merged_pattern = self.merge_with_existing(new_pattern, [merge_base])
                             self.storage.update_pattern(merged_pattern)
                             # Refresh the in-memory copy so a later new pattern in
                             # this same run that also merges into this existing
@@ -262,7 +272,11 @@ class ConsolidationPipeline:
             for idx, existing in enumerate(existing_patterns):
                 if (existing.incorrect_approach and
                     self._patterns_similar(anti_pattern, existing, threshold=0.6)):
-                    merged_pattern = self.merge_with_existing(anti_pattern, [existing])
+                    # Re-read fresh before merge (same C1 lost-update guard as the
+                    # cluster merge loop above): merge from current on-disk state,
+                    # not the potentially-stale step-4 snapshot.
+                    merge_base = self._reload_pattern(existing)
+                    merged_pattern = self.merge_with_existing(anti_pattern, [merge_base])
                     self.storage.update_pattern(merged_pattern)
                     # Refresh in-memory copy (same data-loss guard as the cluster
                     # merge loop above): a later anti-pattern merging into this same
@@ -305,6 +319,35 @@ class ConsolidationPipeline:
         result.duration_seconds = time.time() - start_time
         return result
+    def _reload_pattern(self, fallback: SemanticPattern) -> SemanticPattern:
+        """Re-read a pattern fresh from storage immediately before merging.
+        Used by the merge branches to avoid the C1 lost-update: the step-4
+        snapshot may be stale (a concurrent usage bump can land after it), and
+        merge_with_existing() copies usage_count/last_used from the base. Reading
+        the current on-disk record makes the merge build on live state.
+        Mirrors the snapshot's dict/object handling. If load_pattern returns
+        nothing (e.g. the record vanished), fall back to the in-memory copy so the
+        merge still proceeds rather than crashing.
+        Residual limitation (honest): load_pattern and update_pattern are SEPARATE
+        lock acquisitions, so a bump landing between this re-read and the write is
+        still lost. This narrows the race window from the whole run to a single
+        write; it is a mitigation, not cross-process atomicity. A full fix needs a
+        compare-and-set or merge-callback update in storage, which is out of scope
+        for this file (storage.py is frozen this batch).
+        """
+        try:
+            fresh = self.storage.load_pattern(fallback.id)
+        except Exception:
+            return fallback
+        if not fresh:
+            return fallback
+        if isinstance(fresh, dict):
+            return SemanticPattern.from_dict(fresh)
+        return fresh
     # -------------------------------------------------------------------------
     # Clustering Methods
     # -------------------------------------------------------------------------
@@ -475,7 +518,10 @@ class ConsolidationPipeline:
     def _episode_to_text(self, episode: EpisodeTrace) -> str:
         """Convert episode to text for embedding."""
-        parts = [episode.goal]
+        # Guard explicit-null goal (C2): from_dict's .get(key, default) returns
+        # None on a JSON null, and the " ".join(parts) below crashes on a None
+        # member. Mirror the wave-6 (episode.goal or "") idiom.
+        parts = [episode.goal or ""]
         # Add action summaries (handle both ActionEntry objects and dicts)
         for action in episode.action_log[:5]:  # Limit to first 5 actions
@@ -505,7 +551,8 @@ class ConsolidationPipeline:
         # Find common words in goals
         all_words: Dict[str, int] = defaultdict(int)
         for episode in episodes:
-            for word in episode.goal.lower().split():
+            # Guard explicit-null goal (C2): None has no .lower().
+            for word in (episode.goal or "").lower().split():
                 if len(word) > 3:
                     all_words[word] += 1
@@ -562,7 +609,11 @@ class ConsolidationPipeline:
         tool_counts: Dict[str, int] = defaultdict(int)
         for episode in cluster:
             for action in episode.action_log:
-                tool_counts[action.tool] += 1
+                # Skip explicit-null tools (C2): an explicit JSON null tool would
+                # become a None dict key here, harmless until ", ".join(common_tools)
+                # at the end of _extract_correct_approach crashes on it.
+                if action.tool:
+                    tool_counts[action.tool] += 1
         # Filter to tools used in most episodes
         common_tools = [
@@ -641,8 +692,10 @@ class ConsolidationPipeline:
             for episode in episodes:
                 # Get actions before error
                 if episode.action_log:
+                    # Filter explicit-null tools (C2): pre_error_actions feeds
+                    # _summarize_actions, whose ", ".join crashes on a None member.
                     pre_error_actions.extend(
-                        [a.tool for a in episode.action_log[-3:]]  # Last 3 actions
+                        [a.tool for a in episode.action_log[-3:] if a.tool]  # Last 3 actions
                     )
                 # Collect resolutions
@@ -718,7 +771,9 @@ class ConsolidationPipeline:
             for i, tool in enumerate(common_seq[:5], 1):
                 steps.append(f"{i}. Use {tool}")
-        return "; ".join(steps) if steps else f"Use: {', '.join(common_tools)}"
+        # Defensively drop any None tool before join (C2): callers filter Nones,
+        # but guard here too since this join crashes on a None member.
+        return "; ".join(steps) if steps else f"Use: {', '.join(t for t in common_tools if t)}"
     def _summarize_actions(self, actions: List[str]) -> str:
         """Summarize a list of actions into a description."""
@@ -784,6 +839,20 @@ class ConsolidationPipeline:
         if best_match is None or best_similarity < 0.5:
             return new_pattern
+        # Idempotency guard (consolidation-C4): only boost confidence when the
+        # merge actually introduces NEW evidence. consolidate() reloads every
+        # episode in the since-window on each run (storage.list_episodes has no
+        # consolidated-state filter), so re-running over an unchanged episode set
+        # re-extracts identical patterns that re-match this existing pattern. A
+        # flat +0.05 every time would ratchet confidence up artificially with no
+        # new data. Comparing source_episodes (which round-trips through storage)
+        # makes the merge a no-op for confidence when no new source episode is
+        # present, while still rewarding a genuinely new similar episode.
+        new_source_episodes = (
+            set(new_pattern.source_episodes) - set(best_match.source_episodes)
+        )
+        confidence_boost = 0.05 if new_source_episodes else 0.0
         # Merge patterns
         merged = SemanticPattern(
             id=best_match.id,
@@ -792,7 +861,7 @@ class ConsolidationPipeline:
             conditions=list(set(best_match.conditions + new_pattern.conditions)),
             correct_approach=best_match.correct_approach or new_pattern.correct_approach,
             incorrect_approach=best_match.incorrect_approach or new_pattern.incorrect_approach,
-            confidence=min(best_match.confidence + 0.05, 0.99),
+            confidence=min(best_match.confidence + confidence_boost, 0.99),
             source_episodes=list(set(best_match.source_episodes + new_pattern.source_episodes)),
             usage_count=best_match.usage_count,
             last_used=best_match.last_used,
@@ -981,7 +1050,8 @@ def compress_episode_to_summary(episode: EpisodeTrace) -> str:
     action_count = len(episode.action_log)
     error_count = len(episode.errors_encountered)
-    summary = f"Task '{episode.goal[:50]}' {outcome}"
+    # Guard explicit-null goal (C2): None is not subscriptable.
+    summary = f"Task '{(episode.goal or '')[:50]}' {outcome}"
     if action_count > 0:
         summary += f" after {action_count} actions"
@@ -1012,10 +1082,13 @@ def compress_episodes_to_pattern_desc(episodes: List[EpisodeTrace]) -> str:
         return "Unknown pattern"
     if len(episodes) == 1:
-        return f"Pattern from: {episodes[0].goal}"
+        # No slice here, so a None goal would f-string as "None" without crashing;
+        # normalize to "" for cleaner output (C2).
+        return f"Pattern from: {episodes[0].goal or ''}"
     # Find common goal elements
-    goals = [ep.goal.lower() for ep in episodes]
+    # Guard explicit-null goal (C2): None has no .lower().
+    goals = [(ep.goal or "").lower() for ep in episodes]
     # Find common words
     word_counts: Dict[str, int] = defaultdict(int)
@@ -1035,4 +1108,5 @@ def compress_episodes_to_pattern_desc(episodes: List[EpisodeTrace]) -> str:
         return f"Pattern for {theme} tasks ({len(episodes)} instances)"
     # Fallback to first episode's goal
-    return f"Pattern: {episodes[0].goal[:100]} (and {len(episodes)-1} similar)"
+    # Guard explicit-null goal (C2): None is not subscriptable.
+    return f"Pattern: {(episodes[0].goal or '')[:100]} (and {len(episodes)-1} similar)"

package/memory/retrieval.py CHANGED Viewed

@@ -414,6 +414,7 @@ class MemoryRetrieval:
         context: Dict[str, Any],
         top_k: int = 5,
         token_budget: Optional[int] = None,
+        persist_boost: bool = False,
     ) -> List[Dict[str, Any]]:
         """
         Retrieve memories with task-type-aware weighting.
@@ -427,6 +428,12 @@ class MemoryRetrieval:
             token_budget: Optional maximum token budget for returned memories.
                          If specified, results will be optimized to fit within
                          this budget using importance/recency/relevance scoring.
+            persist_boost: When True, persist the retrieval-time importance boost
+                         to disk ("use it or lose it" reinforcement). Default
+                         False so manual/on-demand retrievals (dashboard, MCP)
+                         do NOT silently reinforce importance; only the autonomous
+                         RARV loop opts in. The in-memory boost that shapes the
+                         returned ranking is applied either way.
         Returns:
             List of memory items with source field indicating origin
@@ -476,10 +483,20 @@ class MemoryRetrieval:
         # Apply recency boost
         merged = self._apply_recency_boost(merged, boost_factor=0.1)
-        # Boost importance for retrieved memories (use it or lose it)
+        # Boost importance for retrieved memories (use it or lose it). The
+        # in-memory boost shapes the returned ranking; persist_boost writes the
+        # reinforcement to disk (retrieval-F1: boost_on_retrieval alone never
+        # persisted). Persistence is best-effort: a locked/missing record must
+        # never break retrieval, so failures are swallowed (mirrors other
+        # best-effort writes).
         if hasattr(self.storage, 'boost_on_retrieval'):
             for memory in merged[:top_k]:
                 self.storage.boost_on_retrieval(memory, boost=0.05)
+                if persist_boost and hasattr(self.storage, 'persist_boost'):
+                    try:
+                        self.storage.persist_boost(memory, boost=0.05)
+                    except Exception:
+                        pass
         # Apply token budget optimization if specified
         if token_budget is not None and token_budget > 0:

package/memory/storage.py CHANGED Viewed

@@ -595,8 +595,32 @@ class MemoryStorage:
                             lock_path.unlink()
                     except OSError:
                         pass
-                    # Clean up any remaining lock files before checking if dir is empty
+                    # Clean up any remaining lock files before checking if dir
+                    # is empty. A blanket unlink of every *.lock here is the same
+                    # flock+unlink inode-replacement race fixed in _file_lock and
+                    # _cleanup_stale_locks: a lock held by a concurrent writer of
+                    # a DIFFERENT episode in this same date dir would have its
+                    # inode unlinked, letting a third writer create a new inode
+                    # and enter the critical section concurrently (data loss).
+                    # Only unlink a lock we can take ourselves (nobody holds it);
+                    # held locks are left in place (their writer is still active).
                     for stale_lock in date_dir.glob("*.lock"):
+                        probe_fd = None
+                        try:
+                            probe_fd = open(stale_lock, "a")
+                            fcntl.flock(probe_fd.fileno(),
+                                        fcntl.LOCK_EX | fcntl.LOCK_NB)
+                        except (OSError, BlockingIOError):
+                            # Held by a live writer -- leave it alone.
+                            continue
+                        finally:
+                            if probe_fd is not None:
+                                try:
+                                    fcntl.flock(probe_fd.fileno(),
+                                                fcntl.LOCK_UN)
+                                except OSError:
+                                    pass
+                                probe_fd.close()
                         try:
                             stale_lock.unlink()
                         except OSError:
@@ -1359,6 +1383,142 @@ class MemoryStorage:
         return memory
+    def persist_boost(
+        self,
+        memory: Dict[str, Any],
+        boost: float = 0.1,
+    ) -> bool:
+        """
+        Persist a retrieval-time boost to disk ("use it or lose it").
+        boost_on_retrieval mutates an in-memory dict only; without this the
+        stored importance/access_count never rises, so repeated retrieval can
+        never reinforce a memory against decay (retrieval-F1). This method
+        applies the SAME boost math to the record as it currently exists on
+        disk, under one exclusive _file_lock spanning a FRESH read -> mutate
+        -> _atomic_write (mirrors _decay_episodic / _decay_semantic).
+        Race-safety: the boost is applied to the freshly-read record, NOT to
+        the passed-in `memory` dict. So a concurrent content edit landed by
+        another writer is preserved (we only overwrite importance,
+        access_count, last_accessed), and no retrieval-only transient fields
+        (_score, _source, _collection) leak into the stored record. This is
+        the lost-update-safe pattern WAVE6 established for decay.
+        Keyed by memory["id"] and the collection marker retrieval attaches
+        (_source, falling back to _collection). Covers episodic (per-file) and
+        semantic patterns.json. Collections without an updater degrade
+        gracefully (return False, no crash):
+          - skills are keyed on disk by name, not id, so an id-keyed boost
+            cannot reliably target the file; skipped honestly.
+          - the legacy semantic/anti-patterns.json store has NO updater
+            anywhere in this module, so there is nothing to write back to;
+            skipped honestly rather than fabricating a writer.
+        Args:
+            memory: A retrieved memory dict (must carry "id" and a source
+                marker). The dict itself is not written to disk.
+            boost: Amount to boost importance (default 0.1).
+        Returns:
+            True if a record was found and persisted, False otherwise.
+        """
+        memory_id = memory.get("id")
+        if not memory_id:
+            return False
+        source = memory.get("_source") or memory.get("_collection") or ""
+        if source == "episodic":
+            return self._persist_boost_episodic(str(memory_id), boost)
+        if source == "semantic":
+            return self._persist_boost_semantic(str(memory_id), boost)
+        # skills (keyed by name on disk) and the legacy anti-patterns.json
+        # store (no updater exists in this module) cannot be safely targeted
+        # by an id-keyed boost; skip rather than fabricate a writer.
+        return False
+    def _persist_boost_episodic(self, memory_id: str, boost: float) -> bool:
+        """Apply and persist a boost to one episodic record, keyed by id.
+        Locates the per-file record (task-<id>.json across date dirs) then does
+        a lock-spanning fresh-read -> boost -> atomic-write, mirroring
+        _decay_episodic. The id is sanitized exactly as save_episode does so a
+        sanitized-on-write filename is still found.
+        """
+        episodic_dir = self.base_path / "episodic"
+        if not episodic_dir.exists():
+            return False
+        safe_id = "".join(
+            c if c.isalnum() or c in "-_" else "_"
+            for c in memory_id
+        )
+        for date_dir in episodic_dir.iterdir():
+            if not date_dir.is_dir():
+                continue
+            file_path = date_dir / f"task-{safe_id}.json"
+            if not file_path.exists():
+                continue
+            # One exclusive lock spanning read-mutate-write. boost_on_retrieval
+            # mutates the freshly-read record in place (importance/access_count/
+            # last_accessed only), so a concurrent content edit on disk is
+            # preserved. _atomic_write re-enters the same reentrant lock.
+            with self._file_lock(file_path, exclusive=True):
+                if not file_path.exists():
+                    return False
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        data = json.load(f)
+                except (json.JSONDecodeError, OSError, UnicodeDecodeError):
+                    return False
+                if not data:
+                    return False
+                self.boost_on_retrieval(data, boost=boost)
+                self._atomic_write(file_path, data)
+            return True
+        return False
+    def _persist_boost_semantic(self, memory_id: str, boost: float) -> bool:
+        """Apply and persist a boost to one semantic pattern, keyed by id.
+        Patterns live in a single semantic/patterns.json list. Lock-spanning
+        fresh read -> boost the matching entry -> atomic write, mirroring
+        _decay_semantic / save_pattern.
+        """
+        patterns_path = self.base_path / "semantic" / "patterns.json"
+        if not patterns_path.exists():
+            return False
+        with self._file_lock(patterns_path, exclusive=True):
+            if not patterns_path.exists():
+                return False
+            try:
+                with open(patterns_path, "r", encoding="utf-8") as f:
+                    patterns_file = json.load(f)
+            except (json.JSONDecodeError, OSError, UnicodeDecodeError):
+                return False
+            if not patterns_file:
+                return False
+            patterns = patterns_file.get("patterns", [])
+            for pattern in patterns:
+                if not isinstance(pattern, dict):
+                    continue
+                if pattern.get("id") == memory_id:
+                    self.boost_on_retrieval(pattern, boost=boost)
+                    patterns_file["last_updated"] = datetime.now(
+                        timezone.utc
+                    ).isoformat()
+                    self._atomic_write(patterns_path, patterns_file)
+                    return True
+        return False
     def batch_apply_decay(
         self,
         collection: str = "all",

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "loki-mode",
   "mcpName": "io.github.asklokesh/loki-mode",
-  "version": "7.66.1",
+  "version": "7.68.0",
   "description": "Loki Mode by Autonomi. Autonomous spec-to-product system: takes a PRD, GitHub issue, OpenAPI/JSON/YAML, or one-line brief to a deployed app via the RARV-C closure loop with 8 quality gates. Provider-agnostic (Claude Code, OpenAI Codex, Cline, Aider).",
   "keywords": [
     "agent",

package/plugins/loki-mode/.claude-plugin/plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
   "name": "loki-mode",
   "displayName": "Loki Mode",
-  "version": "7.66.1",
+  "version": "7.68.0",
   "description": "Autonomous spec-to-product build system with a built-in trust layer (RARV-C closure loop, 8 quality gates, completion council). Ships Loki's spec-hardening, drift-detection, and deterministic PR verification commands plus the Loki MCP server.",
   "author": {
     "name": "Autonomi",