RubyGems - claude_memory - Versions diffs - 0.12.1 → 0.13.1 - Mend

claude_memory 0.12.1 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/.claude/memory.sqlite3 +0 -0
data/.claude/rules/claude_memory.generated.md +6 -1
data/.claude/settings.local.json +2 -1
data/.claude-plugin/marketplace.json +2 -2
data/.claude-plugin/plugin.json +2 -2
data/CHANGELOG.md +38 -0
data/CLAUDE.md +11 -6
data/README.md +35 -0
data/db/migrations/019_add_observations.rb +43 -0
data/db/migrations/020_add_observation_promotion.rb +33 -0
data/docs/GETTING_STARTED.md +38 -0
data/docs/api_stability.md +16 -5
data/docs/architecture.md +18 -6
data/docs/audit_runbook.md +67 -0
data/docs/dashboard.md +28 -0
data/docs/improvements.md +173 -1
data/docs/influence/mastra-observational-memory.md +198 -0
data/docs/influence/strands-agent-sops.md +163 -0
data/docs/quality_review.md +45 -0
data/lib/claude_memory/audit/checks.rb +149 -0
data/lib/claude_memory/audit/runner.rb +4 -0
data/lib/claude_memory/commands/census_command.rb +1 -1
data/lib/claude_memory/commands/hook_command.rb +16 -3
data/lib/claude_memory/commands/initializers/hooks_configurator.rb +3 -1
data/lib/claude_memory/commands/install_skill_command.rb +4 -0
data/lib/claude_memory/commands/observations_command.rb +367 -0
data/lib/claude_memory/commands/registry.rb +1 -0
data/lib/claude_memory/commands/skills/reflect.md +68 -0
data/lib/claude_memory/commands/stats_command.rb +60 -1
data/lib/claude_memory/dashboard/api.rb +4 -0
data/lib/claude_memory/dashboard/index.html +154 -2
data/lib/claude_memory/dashboard/observations.rb +115 -0
data/lib/claude_memory/dashboard/server.rb +1 -0
data/lib/claude_memory/distill/extraction.rb +6 -4
data/lib/claude_memory/distill/null_distiller.rb +108 -3
data/lib/claude_memory/distill/reference_material_detector.rb +4 -1
data/lib/claude_memory/domain/observation.rb +118 -0
data/lib/claude_memory/embeddings/generator.rb +1 -1
data/lib/claude_memory/hook/context_injector.rb +125 -2
data/lib/claude_memory/mcp/handlers/management_handlers.rb +113 -2
data/lib/claude_memory/mcp/handlers/query_handlers.rb +48 -1
data/lib/claude_memory/mcp/instructions_builder.rb +1 -0
data/lib/claude_memory/mcp/query_guide.rb +28 -0
data/lib/claude_memory/mcp/tool_definitions.rb +58 -0
data/lib/claude_memory/mcp/tools.rb +3 -0
data/lib/claude_memory/observe/observations_renderer.rb +49 -0
data/lib/claude_memory/observe/reflector.rb +107 -0
data/lib/claude_memory/observe/token_overlap_matcher.rb +55 -0
data/lib/claude_memory/publish.rb +53 -1
data/lib/claude_memory/resolve/resolver.rb +45 -8
data/lib/claude_memory/store/schema_manager.rb +1 -1
data/lib/claude_memory/store/sqlite_store.rb +181 -0
data/lib/claude_memory/sweep/maintenance.rb +15 -1
data/lib/claude_memory/sweep/sweeper.rb +7 -1
data/lib/claude_memory/version.rb +1 -1
data/lib/claude_memory.rb +6 -0
metadata +12 -1

data/lib/claude_memory/dashboard/index.html CHANGED Viewed

@@ -163,6 +163,39 @@
   .delta.flat { color: var(--text-dim); }
   .moments-sub { font-size: 12px; color: var(--text-dim); margin-top: 6px; }
+  /* Observations sidebar panel */
+  .obs-headline {
+    display: flex;
+    gap: 14px;
+    align-items: baseline;
+    margin-bottom: 6px;
+  }
+  .obs-headline .n {
+    font-size: 32px;
+    font-weight: 600;
+    letter-spacing: -0.02em;
+    line-height: 1;
+  }
+  .obs-headline .obs-stat { display: flex; flex-direction: column; gap: 2px; }
+  .obs-headline .obs-stat-n { font-size: 18px; font-weight: 600; color: var(--text); line-height: 1; }
+  .obs-headline .obs-stat-label { font-size: 10px; color: var(--text-faint); text-transform: uppercase; letter-spacing: 0.06em; }
+  .obs-sub { font-size: 12px; color: var(--text-dim); margin: 8px 0; }
+  .obs-breakdowns { display: flex; flex-direction: column; gap: 8px; margin-top: 10px; }
+  .obs-breakdown-label { font-size: 10px; text-transform: uppercase; letter-spacing: 0.06em; color: var(--text-faint); margin-bottom: 4px; }
+  .obs-badges { display: flex; flex-wrap: wrap; gap: 6px; }
+  .obs-badge {
+    display: inline-flex;
+    align-items: center;
+    gap: 4px;
+    padding: 2px 8px;
+    border-radius: 999px;
+    border: 1px solid var(--border);
+    background: var(--surface2);
+    font-size: 11px;
+    color: var(--text-dim);
+  }
+  .obs-badge .obs-badge-n { font-weight: 600; color: var(--text); font-variant-numeric: tabular-nums; }
   /* Knowledge-base sidebar panel */
   .kb-totals {
     display: flex;
@@ -1157,6 +1190,13 @@
       <div class="moments-sub" id="moments-sub"></div>
     </div>
+    <div class="panel" id="panel-observations">
+      <div class="panel-label">Episodic observations
+        <span class="panel-hint" title="The 'what happened' log that complements facts ('what is true'). Observations accrue as the Observer runs, and become facts once corroborated ≥2×.">ⓘ</span>
+      </div>
+      <div id="obs-panel-body"></div>
+    </div>
     <div class="panel" id="panel-review">
       <div class="panel-label">Needs review</div>
       <div class="review-rows" id="review-rows"></div>
@@ -1234,6 +1274,7 @@
   </div>
   <div class="drawer-tabs">
     <div class="drawer-tab active" data-adv="knowledge">Knowledge</div>
+    <div class="drawer-tab" data-adv="observations">Observations</div>
     <div class="drawer-tab" data-adv="overview">Overview</div>
     <div class="drawer-tab" data-adv="facts">Facts</div>
     <div class="drawer-tab" data-adv="explore">Explore</div>
@@ -1253,6 +1294,13 @@
     <div class="knowledge-body" id="knowledge-body"></div>
   </div>
+  <div class="drawer-panel" id="adv-observations">
+    <div id="obs-summary" class="adv-card"></div>
+    <div class="adv-card" style="padding: 0;">
+      <div id="obs-recent"></div>
+    </div>
+  </div>
   <div class="drawer-panel" id="adv-overview">
     <div id="overview-rows"></div>
     <div class="adv-card">
@@ -1444,7 +1492,77 @@ async function api(path, params = {}) {
 // ==================== Load cycle ====================
 async function loadAll() {
-  await Promise.all([loadHealth(), loadTrust(), loadMoments(), loadKnowledgePanel(), loadReusePanel()]);
+  await Promise.all([loadHealth(), loadTrust(), loadMoments(), loadKnowledgePanel(), loadReusePanel(), loadObservationsPanel()]);
+}
+// Sidebar "Episodic observations" panel — headline numbers + breakdowns.
+// The richer recent timeline lives in the Advanced drawer (loadObservations).
+async function loadObservationsPanel() {
+  const data = await api('observations');
+  const t = data.totals || {}, c = data.compression || {}, corr = data.corroboration || {};
+  const body = document.getElementById('obs-panel-body');
+  if (!body) return;
+  const active = t.active || 0;
+  if (!active) {
+    body.innerHTML = `
+      <div class="feed-empty" style="padding: 24px 16px; text-align: left;">
+        <h3>No observations yet</h3>
+        <p>The episodic log — "what happened" — accrues as the Observer runs over your sessions. Once an observation is corroborated ≥2×, it becomes a fact.</p>
+      </div>`;
+    return;
+  }
+  const promotable = corr.promotable || 0;
+  body.innerHTML = `
+    <div class="obs-headline">
+      <div class="obs-stat">
+        <span class="n">${active.toLocaleString()}</span>
+        <span class="obs-stat-label">active</span>
+      </div>
+      <div class="obs-stat">
+        <span class="obs-stat-n">${promotable.toLocaleString()}</span>
+        <span class="obs-stat-label">ready to promote</span>
+      </div>
+      <div class="obs-stat">
+        <span class="obs-stat-n">${c.ratio ? c.ratio + '×' : '—'}</span>
+        <span class="obs-stat-label">compression</span>
+      </div>
+    </div>
+    <div class="obs-sub">${promotable
+      ? `${promotable.toLocaleString()} ready to promote — observations corroborated ≥2× become facts (max seen ×${corr.max || 0}).`
+      : `None ready yet — observations corroborated ≥2× become facts (max seen ×${corr.max || 0}).`}</div>
+    <div class="obs-breakdowns">
+      ${renderObsBreakdown('By priority', priorityBadges(data.by_priority || {}))}
+      ${renderObsBreakdown('By kind', kindBadges(data.by_kind || {}))}
+    </div>`;
+}
+const PRIORITY_MARKERS = {1: '🔴', 2: '🟡', 3: '🟢'};
+const PRIORITY_LABELS = {1: 'important', 2: 'maybe', 3: 'info'};
+function priorityBadges(byPriority) {
+  return [1, 2, 3]
+    .filter(p => byPriority[p] || byPriority[String(p)])
+    .map(p => {
+      const n = byPriority[p] || byPriority[String(p)] || 0;
+      return `<span class="obs-badge">${PRIORITY_MARKERS[p]} ${PRIORITY_LABELS[p]} <span class="obs-badge-n">${n}</span></span>`;
+    }).join('');
+}
+function kindBadges(byKind) {
+  return Object.entries(byKind)
+    .sort((a, b) => b[1] - a[1])
+    .map(([kind, n]) => `<span class="obs-badge">${esc(kind)} <span class="obs-badge-n">${n}</span></span>`)
+    .join('');
+}
+function renderObsBreakdown(label, badgesHtml) {
+  if (!badgesHtml) return '';
+  return `<div>
+    <div class="obs-breakdown-label">${label}</div>
+    <div class="obs-badges">${badgesHtml}</div>
+  </div>`;
 }
 async function loadKnowledgePanel() {
@@ -2180,7 +2298,41 @@ function switchAdvTab(name) {
 // ==================== Advanced drawer loaders ====================
 async function loadAdvanced() {
-  await Promise.all([loadKnowledge(), loadOverview(), loadFacts(), loadEfficacy(), loadConflicts(), loadActivityLog(), loadTelemetry()]);
+  await Promise.all([loadKnowledge(), loadObservations(), loadOverview(), loadFacts(), loadEfficacy(), loadConflicts(), loadActivityLog(), loadTelemetry()]);
+}
+async function loadObservations() {
+  const data = await api('observations');
+  const t = data.totals || {}, c = data.compression || {}, corr = data.corroboration || {};
+  const promotable = corr.promotable || 0;
+  const summary = document.getElementById('obs-summary');
+  if (summary) {
+    summary.innerHTML = `
+      <h3>Episodic observations <span style="color: var(--text-faint); font-weight: normal;">— what happened</span></h3>
+      <div style="display: flex; flex-wrap: wrap; gap: 16px; color: var(--text-dim); font-size: 13px;">
+        <span><span class="big">${(t.active || 0).toLocaleString()}</span> active</span>
+        <span>${t.consolidated || 0} consolidated · ${t.expired || 0} expired · ${t.promoted || 0} promoted</span>
+        <span>${promotable} ready to promote (max ×${corr.max || 0}) — observations corroborated ≥2× become facts</span>
+        <span>compression ${c.ratio ? c.ratio + '×' : '—'} <span style="color: var(--text-faint);">(${(c.source_tokens || 0).toLocaleString()} → ${(c.observation_tokens || 0).toLocaleString()} tok)</span></span>
+      </div>
+      <div class="obs-breakdowns" style="margin-top: 12px;">
+        ${renderObsBreakdown('By priority', priorityBadges(data.by_priority || {}))}
+        ${renderObsBreakdown('By kind', kindBadges(data.by_kind || {}))}
+      </div>`;
+  }
+  const recentEl = document.getElementById('obs-recent');
+  const recent = data.recent || [];
+  if (recentEl) {
+    recentEl.innerHTML = recent.length ? recent.map(o => `
+      <div style="padding: 8px 12px; border-bottom: 1px solid var(--border);">
+        <span style="color: var(--text-faint); font-size: 11px;">#${o.id} · ${esc(o.kind)} · p${o.priority}${o.corroboration_count > 1 ? ' · ×' + o.corroboration_count : ''} · ${esc(o.observed_ago || '')}</span>
+        <div style="color: var(--text); font-size: 13px;">${PRIORITY_MARKERS[o.priority] ? PRIORITY_MARKERS[o.priority] + ' ' : ''}${esc(o.body || '')}</div>
+      </div>`).join('') : `
+      <div class="feed-empty" style="text-align: left;">
+        <h3>No observations yet</h3>
+        <p>The episodic log — "what happened" — accrues from your sessions as the Observer runs. Once an observation is corroborated ≥2×, it gets promoted into a fact.</p>
+      </div>`;
+  }
 }
 async function loadTelemetry() {

data/lib/claude_memory/dashboard/observations.rb ADDED Viewed

@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+module ClaudeMemory
+  module Dashboard
+    # Observability for the episodic observation layer. Surfaces counts by
+    # status/kind/priority, corroboration + promotion readiness, a Mastra-style
+    # compression ratio (source content tokens ÷ observation tokens), and a
+    # recent timeline. Aggregated across the project and global stores.
+    #
+    # Pulled out of Dashboard::API so the queries live next to the data.
+    class Observations
+      RECENT_LIMIT = 20
+      def initialize(manager)
+        @manager = manager
+      end
+      def report
+        stores = observation_stores
+        return empty_report if stores.empty?
+        {
+          totals: totals(stores),
+          by_kind: by_field(stores, :kind),
+          by_priority: by_field(stores, :priority),
+          corroboration: corroboration(stores),
+          compression: compression(stores),
+          recent: recent(stores)
+        }
+      end
+      private
+      def observation_stores
+        [@manager.project_store, @manager.global_store].compact.select { |s| s.db.table_exists?(:observations) }
+      end
+      def empty_report
+        {
+          totals: {active: 0, consolidated: 0, expired: 0, promoted: 0},
+          by_kind: {}, by_priority: {},
+          corroboration: {max: 0, promotable: 0},
+          compression: {observation_tokens: 0, source_tokens: 0, ratio: nil},
+          recent: []
+        }
+      end
+      def totals(stores)
+        {
+          active: count_where(stores, status: "active"),
+          consolidated: count_where(stores, status: "consolidated"),
+          expired: count_where(stores, status: "expired"),
+          promoted: stores.sum { |s| s.observations.exclude(promoted_at: nil).count }
+        }
+      end
+      def count_where(stores, **filter)
+        stores.sum { |s| s.observations.where(**filter).count }
+      end
+      def by_field(stores, field)
+        merged = Hash.new(0)
+        stores.each do |store|
+          store.observations.where(status: "active").group_and_count(field).each do |row|
+            merged[row[field]] += row[:count]
+          end
+        end
+        merged
+      end
+      def corroboration(stores)
+        threshold = Domain::Observation::PROMOTION_THRESHOLD
+        {
+          max: stores.map { |s| s.observations.where(status: "active").max(:corroboration_count) || 0 }.max,
+          promotable: stores.sum { |s|
+            s.observations.where(status: "active", promoted_at: nil).where { corroboration_count >= threshold }.count
+          }
+        }
+      end
+      # Source content tokens vs the tokens the observations distilled them into.
+      # ratio > 1 means the episodic log is a compression of its source.
+      def compression(stores)
+        obs_tokens = stores.sum { |s| s.observations.where(status: "active").sum(:token_count) || 0 }
+        source_tokens = stores.sum { |s| source_tokens_for(s) }
+        ratio = obs_tokens.zero? ? nil : (source_tokens.to_f / obs_tokens).round(1)
+        {observation_tokens: obs_tokens, source_tokens: source_tokens, ratio: ratio}
+      end
+      def source_tokens_for(store)
+        ids = store.observations
+          .where(status: "active").exclude(source_content_item_id: nil)
+          .distinct.select(:source_content_item_id)
+          .map { |r| r[:source_content_item_id] }
+        return 0 if ids.empty?
+        bytes = store.content_items.where(id: ids).sum(:byte_len) || 0
+        (bytes / 4.0).round
+      end
+      def recent(stores)
+        stores
+          .flat_map { |s| s.recent_observations(limit: RECENT_LIMIT) }
+          .sort_by { |o| o[:observed_at].to_s }.reverse.first(RECENT_LIMIT)
+          .map do |o|
+            {
+              id: o[:id], kind: o[:kind], priority: o[:priority],
+              corroboration_count: o[:corroboration_count], body: o[:body],
+              observed_ago: Core::RelativeTime.format(o[:observed_at])
+            }
+          end
+      end
+    end
+  end
+end

data/lib/claude_memory/dashboard/server.rb CHANGED Viewed

@@ -62,6 +62,7 @@ module ClaudeMemory
           }
         }
         @server.mount_proc("/api/timeline") { |_req, res| with_fresh_connections { json_response(res, api.timeline) } }
+        @server.mount_proc("/api/observations") { |_req, res| with_fresh_connections { json_response(res, api.observations) } }
         @server.mount_proc("/api/recall") { |req, res| with_fresh_connections { json_response(res, api.recall(req.query)) } }
         @server.mount_proc("/api/conflicts") { |req, res| with_fresh_connections { handle_conflicts(api, req, res) } }
         @server.mount_proc("/api/moments") { |req, res| with_fresh_connections { handle_moments(api, req, res) } }

data/lib/claude_memory/distill/extraction.rb CHANGED Viewed

@@ -3,17 +3,18 @@
 module ClaudeMemory
   module Distill
     class Extraction
-      attr_reader :entities, :facts, :decisions, :signals
+      attr_reader :entities, :facts, :decisions, :signals, :observations
-      def initialize(entities: [], facts: [], decisions: [], signals: [])
+      def initialize(entities: [], facts: [], decisions: [], signals: [], observations: [])
         @entities = entities
         @facts = facts
         @decisions = decisions
         @signals = signals
+        @observations = observations
       end
       def empty?
-        entities.empty? && facts.empty? && decisions.empty? && signals.empty?
+        entities.empty? && facts.empty? && decisions.empty? && signals.empty? && observations.empty?
       end
       def to_h
@@ -21,7 +22,8 @@ module ClaudeMemory
           entities: entities,
           facts: facts,
           decisions: decisions,
-          signals: signals
+          signals: signals,
+          observations: observations
         }
       end
     end

data/lib/claude_memory/distill/null_distiller.rb CHANGED Viewed

@@ -21,7 +21,10 @@ module ClaudeMemory
       ENTITY_PATTERNS = {
         "database" => /\b(postgresql|postgres|mysql|sqlite|mongodb|redis)\b/i,
         "framework" => /\b(rails|sinatra|django|express|next\.?js|react|vue)\b/i,
-        "language" => /\b(ruby|python|javascript|typescript|go|rust)\b/i,
+        # `Go` is matched case-sensitively (via the inline (?-i:) flag) so the
+        # English verb "go" / "go-to" doesn't masquerade as the language; the
+        # other languages stay case-insensitive. `golang` normalizes to `go`.
+        "language" => /\b(ruby|python|javascript|typescript|rust|(?-i:Go)|golang)\b/i,
         "platform" => /\b(aws|gcp|azure|heroku|vercel|netlify|docker|kubernetes)\b/i
       }.freeze
@@ -35,17 +38,49 @@ module ClaudeMemory
         /\buniversally\b/i
       ].freeze
+      # Observation-specific convention patterns: stricter than the shared
+      # CONVENTION_PATTERNS. Bare `always (.+)` / `never (.+)` / `we use (.+)`
+      # match code, prose, and instruction text ("never answer from memory",
+      # "never nil. def …"), so observations require explicit convention
+      # framing or a first-person "we always/never".
+      OBSERVATION_CONVENTION_PATTERNS = [
+        /\bconvention[:\s]+(.+)/i,
+        /\bstandard[:\s]+(.+)/i,
+        /\bwe\s+(?:should\s+)?(?:always|never)\s+(.+)/i
+      ].freeze
+      # Bodies that look like code / JSON / shell / markup / transcript rather
+      # than a prose statement. High-precision gate: the Layer-1 observer scrapes
+      # raw transcript spans, which on a code-heavy project are dominated by
+      # source, specs, docs, and tool output — none of which are observations.
+      # (2026-06-23 audit, improvements #74: the prior signature let 38/117
+      # obvious-noise rows through — spec fixtures like `kind: "decision"`,
+      # CHANGELOG table rows, benchmark tree output, the distiller's own source
+      # comments — and they were being injected into SessionStart.)
+      NOISE_BODY_SIGNATURE = Regexp.union(
+        /\bdef\s|\bclass\s|\bmodule\s/,                      # Ruby definitions
+        /=>|::|","|":\s*"|[{}]|\$\(|&&|\|\|/,                # code / JSON / shell punctuation
+        /\w+:\s*["\[{\d]/,                                   # code/JSON key: "value" / key: 1 / key: [
+        /\w\(/,                                              # method/function call: expect(, insert_observation(
+        /\s\|\s/,                                            # spaced table pipe (doc / CHANGELOG rows)
+        /[\u{2500}-\u{257f}]/,                               # box-drawing glyphs (tree / benchmark output)
+        /\(vector\)|\(text\)/,                               # benchmark mode labels
+        /parentUuid|isSidechain|toolUseID|hookName|"type":/  # raw JSONL transcript fields
+      )
       def distill(text, content_item_id: nil)
         entities = extract_entities(text)
         facts = extract_facts(text, entities)
         decisions = extract_decisions(text)
         signals = extract_signals(text)
+        observations = extract_observations(text)
         Extraction.new(
           entities: entities,
           facts: facts,
           decisions: decisions,
-          signals: signals
+          signals: signals,
+          observations: observations
         )
       end
@@ -55,7 +90,9 @@ module ClaudeMemory
         found = []
         ENTITY_PATTERNS.each do |type, pattern|
           text.scan(pattern).flatten.uniq.each do |name|
-            found << {type: type, name: name.downcase, confidence: 0.7}
+            normalized = name.downcase
+            normalized = "go" if normalized == "golang"
+            found << {type: type, name: normalized, confidence: 0.7}
           end
         end
         found.uniq { |e| [e[:type], e[:name]] }
@@ -103,6 +140,74 @@ module ClaudeMemory
         signals
       end
+      # Layer-1 Observer: emit episodic observations for the same signals the
+      # distiller can detect by regex. A decision being made and a convention
+      # being stated are both "things that happened" worth logging in the
+      # episodic layer, independent of the semantic facts they also produce.
+      # Decisions are 🔴 (important), conventions 🟡 (maybe) — the priority is
+      # an internal Observer/Reflector signal. Richer observations come from
+      # the Layer-2 Claude-as-observer pass (a later phase).
+      def extract_observations(text)
+        observations = []
+        scope_hint = global_scope_signal?(text) ? "global" : "project"
+        DECISION_PATTERNS.each do |pattern|
+          text.scan(pattern).flatten.each do |match|
+            observations << build_observation("decision", Domain::Observation::IMPORTANT, "decided to #{match.strip}", scope_hint)
+          end
+        end
+        OBSERVATION_CONVENTION_PATTERNS.each do |pattern|
+          text.scan(pattern).flatten.each do |match|
+            observations << build_observation("preference", Domain::Observation::MAYBE, match.strip, scope_hint)
+          end
+        end
+        observations.compact.uniq { |o| [o[:kind], o[:body]] }.first(10)
+      end
+      # Returns nil for content that isn't a usable statement: code/JSON noise,
+      # or fewer than three words after trimming to the first sentence.
+      def build_observation(kind, priority, body, scope_hint)
+        cleaned = trim_to_statement(clean_observation_body(body))
+        return nil if cleaned.empty? || noise_body?(cleaned) || cleaned.split.size < 3
+        {kind: kind, priority: priority, body: cleaned.slice(0, 500), scope_hint: scope_hint}
+      end
+      # Cap a captured span to its first sentence (and a hard length limit) so a
+      # greedy `.+` match can't swallow a whole code block or JSON line.
+      def trim_to_statement(text)
+        s = text.to_s.strip
+        (s[/\A.{0,240}?[.!?](?=\s|\z)/m] || s[0, 240]).to_s.strip
+      end
+      # A usable observation reads as a prose sentence. Reject anything that
+      # doesn't begin like one (leading /, |, ·, or box-drawing glyphs from a
+      # code comment or tool output) or that carries a code/markup/transcript
+      # signature.
+      def noise_body?(body)
+        return true unless body.match?(/\A[A-Za-z]/)
+        body.match?(NOISE_BODY_SIGNATURE)
+      end
+      # The distiller scans raw transcript text, which is JSONL — so a captured
+      # body can carry JSON/escaping artifacts (`\n`, `\"`, a trailing `"}`,
+      # a leading `= `/`### ` from injected memory/markdown). Normalize them out:
+      # cleaner bodies read better in the injected log AND normalize more
+      # consistently, which is what the Reflector's dedup/corroboration keys off.
+      def clean_observation_body(body)
+        body.to_s
+          .gsub(/\\+[ntr]/, " ")   # literal \n \t \r (even multiply-escaped) -> space
+          .gsub(/\\+"/, '"')       # escaped quote -> quote
+          .gsub(/\\+/, "")         # residual backslashes
+          .gsub(/\s+/, " ")        # collapse whitespace
+          .sub(/\A[\s"'`,:=#*>\-\]}]+/, "")  # leading JSON/markdown artifacts
+          .sub(/[\s"'`,:\-\]}]+\z/, "")       # trailing JSON artifacts
+          .strip
+      end
       def global_scope_signal?(text)
         GLOBAL_SCOPE_PATTERNS.any? { |pattern| text.match?(pattern) }
       end

data/lib/claude_memory/distill/reference_material_detector.rb CHANGED Viewed

@@ -76,11 +76,14 @@ module ClaudeMemory
           end
         end
+        # Only facts are transformed; every other field must pass through
+        # unchanged (an earlier version silently dropped observations).
         Distill::Extraction.new(
           entities: extraction.entities,
           facts: new_facts,
           decisions: extraction.decisions,
-          signals: extraction.signals
+          signals: extraction.signals,
+          observations: extraction.observations
         )
       end

data/lib/claude_memory/domain/observation.rb ADDED Viewed

@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+module ClaudeMemory
+  module Domain
+    # Domain model representing an episodic observation — "what happened",
+    # as opposed to a Fact's "what is true". Instances are immutable (frozen).
+    #
+    # Priority follows Mastra's traffic-light scheme and is an internal signal
+    # for the Observer/Reflector pipeline: 1 = important (🔴), 2 = maybe (🟡),
+    # 3 = info only (🟢). Only 🔴 is meant to survive into the actor's prompt.
+    class Observation
+      KINDS = %w[user_statement agent_action tool_result preference decision event].freeze
+      IMPORTANT = 1
+      MAYBE = 2
+      INFO = 3
+      # Minimum corroboration (repeated sightings) before an observation may be
+      # promoted to a structured fact. The anti-hallucination gate: a one-off
+      # mention never becomes a committed fact.
+      PROMOTION_THRESHOLD = 2
+      attr_reader :id, :body, :kind, :priority, :scope, :project_path,
+        :source_content_item_id, :consolidated_into, :token_count,
+        :status, :session_id, :observed_at, :created_at, :reflected_at,
+        :corroboration_count, :promoted_at, :promoted_fact_id
+      # @param attributes [Hash] observation attributes (see column list)
+      # @raise [ArgumentError] if body is blank or priority is out of range
+      def initialize(attributes)
+        @id = attributes[:id]
+        @body = attributes[:body]
+        @kind = attributes[:kind] || "event"
+        @priority = attributes[:priority] || INFO
+        @scope = attributes[:scope] || "project"
+        @project_path = attributes[:project_path]
+        @source_content_item_id = attributes[:source_content_item_id]
+        @consolidated_into = attributes[:consolidated_into]
+        @token_count = attributes[:token_count]
+        @status = attributes[:status] || "active"
+        @session_id = attributes[:session_id]
+        @observed_at = attributes[:observed_at]
+        @created_at = attributes[:created_at]
+        @reflected_at = attributes[:reflected_at]
+        @corroboration_count = attributes[:corroboration_count] || 1
+        @promoted_at = attributes[:promoted_at]
+        @promoted_fact_id = attributes[:promoted_fact_id]
+        validate!
+        freeze
+      end
+      # @return [Boolean] true when the observation has not been consolidated away
+      def active?
+        status == "active"
+      end
+      # @return [Boolean] true when the Reflector has merged this into another
+      def consolidated?
+        status == "consolidated"
+      end
+      # @return [Boolean] true when the Reflector retired this on TTL
+      def expired?
+        status == "expired"
+      end
+      # @return [Boolean] true once promoted into a structured fact
+      def promoted?
+        !promoted_at.nil?
+      end
+      # @return [Boolean] true when corroborated enough to be promotion-eligible
+      def corroborated?(threshold)
+        corroboration_count >= threshold
+      end
+      # @return [Boolean] true for 🔴 — the only priority shown to the actor
+      def important?
+        priority == IMPORTANT
+      end
+      # @return [Boolean] true when scope is "global"
+      def global?
+        scope == "global"
+      end
+      # @return [Hash] all attributes as a plain hash
+      def to_h
+        {
+          id: id,
+          body: body,
+          kind: kind,
+          priority: priority,
+          scope: scope,
+          project_path: project_path,
+          source_content_item_id: source_content_item_id,
+          consolidated_into: consolidated_into,
+          token_count: token_count,
+          status: status,
+          session_id: session_id,
+          observed_at: observed_at,
+          created_at: created_at,
+          reflected_at: reflected_at,
+          corroboration_count: corroboration_count,
+          promoted_at: promoted_at,
+          promoted_fact_id: promoted_fact_id
+        }
+      end
+      private
+      def validate!
+        raise ArgumentError, "body required" if body.nil? || body.empty?
+        raise ArgumentError, "priority must be 1, 2, or 3" unless (IMPORTANT..INFO).cover?(priority)
+      end
+    end
+  end
+end

data/lib/claude_memory/embeddings/generator.rb CHANGED Viewed

@@ -65,7 +65,7 @@ module ClaudeMemory
         return zero_vector if tokens.empty?
         # Build term frequency map
-        tf_map = tokens.each_with_object(Hash.new(0)) { |token, h| h[token] += 1 }
+        tf_map = tokens.tally
         # Normalize term frequencies
         max_tf = tf_map.values.max.to_f