claude_memory 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude-plugin/marketplace.json +1 -1
- data/.claude-plugin/plugin.json +1 -1
- data/CHANGELOG.md +44 -0
- data/CLAUDE.md +11 -3
- data/README.md +35 -1
- data/docs/1_0_punchlist.md +269 -88
- data/docs/GETTING_STARTED.md +3 -1
- data/docs/architecture.md +3 -3
- data/docs/dashboard.md +23 -3
- data/docs/improvements.md +190 -5
- data/docs/quality_review.md +35 -0
- data/lib/claude_memory/commands/digest_command.rb +95 -3
- data/lib/claude_memory/commands/hook_command.rb +27 -2
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +7 -4
- data/lib/claude_memory/commands/registry.rb +2 -1
- data/lib/claude_memory/commands/show_command.rb +90 -0
- data/lib/claude_memory/commands/stats_command.rb +94 -2
- data/lib/claude_memory/dashboard/trust.rb +180 -11
- data/lib/claude_memory/distill/bare_conclusion_detector.rb +71 -0
- data/lib/claude_memory/hook/handler.rb +142 -1
- data/lib/claude_memory/templates/hooks.example.json +5 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +2 -0
- metadata +3 -1
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
|
|
5
|
+
module ClaudeMemory
|
|
6
|
+
module Commands
|
|
7
|
+
# Prints what memory would inject on the next SessionStart.
|
|
8
|
+
#
|
|
9
|
+
# The trust answer to "is this still worth it?" requires
|
|
10
|
+
# inspectability: a user who can't see what memory will inject can't
|
|
11
|
+
# develop confidence in it. The CLAUDE.md alternative is `cat
|
|
12
|
+
# CLAUDE.md` — instant, plain English, no tooling. This command is
|
|
13
|
+
# the same one-line inspect surface for the curated facts the
|
|
14
|
+
# injector picks each session.
|
|
15
|
+
#
|
|
16
|
+
# Runs the exact `Hook::ContextInjector` path real sessions use, so
|
|
17
|
+
# what you see here is what Claude actually receives — not a
|
|
18
|
+
# rebuilt approximation that could drift.
|
|
19
|
+
#
|
|
20
|
+
# The default suppresses the "Pending Knowledge Extraction" dump
|
|
21
|
+
# (which contains raw transcript JSON intended for LLM distillation)
|
|
22
|
+
# so the output stays human-readable. Pass `--pending` to see the
|
|
23
|
+
# full fresh-session payload, including those raw items.
|
|
24
|
+
class ShowCommand < BaseCommand
|
|
25
|
+
VALID_SOURCES = %w[startup resume clear].freeze
|
|
26
|
+
|
|
27
|
+
# Any string outside FRESH_SESSION_SOURCES skips the pending-knowledge
|
|
28
|
+
# block. "preview" reads naturally in any debug log this surfaces in.
|
|
29
|
+
NON_FRESH_SOURCE = "preview"
|
|
30
|
+
|
|
31
|
+
def call(args)
|
|
32
|
+
opts = parse_options(args, {source: nil, pending: false}) do |o|
|
|
33
|
+
OptionParser.new do |parser|
|
|
34
|
+
parser.banner = "Usage: claude-memory show [--source SOURCE] [--pending]"
|
|
35
|
+
parser.on("--source SOURCE", VALID_SOURCES,
|
|
36
|
+
"Simulate fresh-session source (#{VALID_SOURCES.join(", ")}). " \
|
|
37
|
+
"Forces inclusion of pending-knowledge and auto-memory-mirror " \
|
|
38
|
+
"sections regardless of --pending.") { |v| o[:source] = v }
|
|
39
|
+
parser.on("--pending",
|
|
40
|
+
"Include the pending-knowledge dump (raw transcript JSON " \
|
|
41
|
+
"for LLM distillation). Default suppresses it for readability.") { o[:pending] = true }
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
return 1 if opts.nil?
|
|
45
|
+
|
|
46
|
+
effective_source = opts[:source] || (opts[:pending] ? nil : NON_FRESH_SOURCE)
|
|
47
|
+
|
|
48
|
+
manager = Store::StoreManager.new
|
|
49
|
+
manager.ensure_both!
|
|
50
|
+
injector = Hook::ContextInjector.new(manager, source: effective_source)
|
|
51
|
+
context = injector.generate_context
|
|
52
|
+
|
|
53
|
+
print_header(opts[:source])
|
|
54
|
+
stdout.puts ""
|
|
55
|
+
|
|
56
|
+
if context.nil? || context.strip.empty?
|
|
57
|
+
stdout.puts "_Memory has no facts to inject yet._"
|
|
58
|
+
stdout.puts ""
|
|
59
|
+
stdout.puts "Run a few Claude Code sessions in this project, or use"
|
|
60
|
+
stdout.puts "`memory.store_extraction` from a session to seed facts."
|
|
61
|
+
else
|
|
62
|
+
stdout.puts context
|
|
63
|
+
stdout.puts ""
|
|
64
|
+
print_footer(injector, context)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
manager.close
|
|
68
|
+
0
|
|
69
|
+
rescue Sequel::DatabaseError => e
|
|
70
|
+
failure("Database error: #{e.message}")
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def print_header(source)
|
|
76
|
+
label = source ? " (source=#{source})" : ""
|
|
77
|
+
stdout.puts "## Memory snapshot — would be injected at next SessionStart#{label}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def print_footer(injector, context)
|
|
81
|
+
tokens = Core::TokenEstimator.estimate(context)
|
|
82
|
+
fact_count = injector.emitted_fact_ids.size
|
|
83
|
+
stdout.puts "---"
|
|
84
|
+
stdout.puts "#{fact_count} fact#{"s" unless fact_count == 1} • " \
|
|
85
|
+
"~#{tokens} token#{"s" unless tokens == 1} • " \
|
|
86
|
+
"#{context.length} chars"
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -13,14 +13,15 @@ module ClaudeMemory
|
|
|
13
13
|
SCOPE_PROJECT = "project"
|
|
14
14
|
|
|
15
15
|
def call(args)
|
|
16
|
-
opts = parse_options(args, {scope: SCOPE_ALL, tools: false, stale: false, since_days: nil, stale_days: nil}) do |o|
|
|
16
|
+
opts = parse_options(args, {scope: SCOPE_ALL, tools: false, tokens: false, stale: false, since_days: nil, stale_days: nil}) do |o|
|
|
17
17
|
OptionParser.new do |parser|
|
|
18
18
|
parser.banner = "Usage: claude-memory stats [options]"
|
|
19
19
|
parser.on("--scope SCOPE", ["all", "global", "project"],
|
|
20
20
|
"Show stats for: all (default), global, or project") { |v| o[:scope] = v }
|
|
21
21
|
parser.on("--tools", "Show MCP tool-call usage stats") { o[:tools] = true }
|
|
22
|
+
parser.on("--tokens", "Show SessionStart context-injection token budget") { o[:tokens] = true }
|
|
22
23
|
parser.on("--stale", "Show facts not recalled in CLAUDE_MEMORY_STALE_DAYS (default 14)") { o[:stale] = true }
|
|
23
|
-
parser.on("--since DAYS", Integer, "Limit --tools to last N days") { |v| o[:since_days] = v }
|
|
24
|
+
parser.on("--since DAYS", Integer, "Limit --tools/--tokens to last N days") { |v| o[:since_days] = v }
|
|
24
25
|
parser.on("--stale-days N", Integer, "Override staleness threshold for --stale") { |v| o[:stale_days] = v }
|
|
25
26
|
end
|
|
26
27
|
end
|
|
@@ -30,6 +31,10 @@ module ClaudeMemory
|
|
|
30
31
|
return print_mcp_tool_call_stats(opts[:since_days])
|
|
31
32
|
end
|
|
32
33
|
|
|
34
|
+
if opts[:tokens]
|
|
35
|
+
return print_token_budget_stats(opts[:since_days])
|
|
36
|
+
end
|
|
37
|
+
|
|
33
38
|
if opts[:stale]
|
|
34
39
|
return print_stale_facts(opts[:stale_days])
|
|
35
40
|
end
|
|
@@ -349,6 +354,93 @@ module ClaudeMemory
|
|
|
349
354
|
1
|
|
350
355
|
end
|
|
351
356
|
|
|
357
|
+
TOKEN_BUCKETS = [
|
|
358
|
+
["<500", 0, 500],
|
|
359
|
+
["500-1000", 500, 1000],
|
|
360
|
+
["1000-2000", 1000, 2000],
|
|
361
|
+
["2000-5000", 2000, 5000],
|
|
362
|
+
["5000+", 5000, Float::INFINITY]
|
|
363
|
+
].freeze
|
|
364
|
+
|
|
365
|
+
def print_token_budget_stats(since_days)
|
|
366
|
+
manager = ClaudeMemory::Store::StoreManager.new
|
|
367
|
+
db_path = manager.project_db_path
|
|
368
|
+
|
|
369
|
+
stdout.puts "SessionStart Context Token Budget"
|
|
370
|
+
stdout.puts "=" * 50
|
|
371
|
+
|
|
372
|
+
unless File.exist?(db_path)
|
|
373
|
+
stdout.puts "Project database does not exist: #{db_path}"
|
|
374
|
+
manager.close
|
|
375
|
+
return 0
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
db = open_readonly(db_path)
|
|
379
|
+
|
|
380
|
+
unless db.table_exists?(:activity_events)
|
|
381
|
+
stdout.puts "No activity telemetry recorded yet."
|
|
382
|
+
db.disconnect
|
|
383
|
+
manager.close
|
|
384
|
+
return 0
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
dataset = db[:activity_events]
|
|
388
|
+
.where(event_type: "hook_context", status: "success")
|
|
389
|
+
if since_days
|
|
390
|
+
cutoff = (Time.now - since_days * 86400).utc.iso8601
|
|
391
|
+
dataset = dataset.where { occurred_at >= cutoff }
|
|
392
|
+
stdout.puts "Window: last #{since_days} day#{"s" unless since_days == 1}"
|
|
393
|
+
else
|
|
394
|
+
stdout.puts "Window: all time"
|
|
395
|
+
end
|
|
396
|
+
stdout.puts
|
|
397
|
+
|
|
398
|
+
tokens = dataset.select_map(:detail_json).filter_map do |json|
|
|
399
|
+
next unless json
|
|
400
|
+
value = JSON.parse(json)["context_tokens"]
|
|
401
|
+
value if value.is_a?(Integer) && value > 0
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
if tokens.empty?
|
|
405
|
+
stdout.puts "No context injections recorded in window."
|
|
406
|
+
stdout.puts ""
|
|
407
|
+
stdout.puts "Token telemetry is recorded automatically on SessionStart hooks."
|
|
408
|
+
stdout.puts "Run a Claude Code session in this project to populate."
|
|
409
|
+
db.disconnect
|
|
410
|
+
manager.close
|
|
411
|
+
return 0
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
sorted = tokens.sort
|
|
415
|
+
total = sorted.size
|
|
416
|
+
stdout.puts "Sessions: #{format_number(total)}"
|
|
417
|
+
stdout.puts "p50: #{format_number(percentile(sorted, 0.50))} tokens"
|
|
418
|
+
stdout.puts "p95: #{format_number(percentile(sorted, 0.95))} tokens"
|
|
419
|
+
stdout.puts "Avg: #{format_number((sorted.sum.to_f / total).round)} tokens"
|
|
420
|
+
stdout.puts "Min: #{format_number(sorted.first)} tokens"
|
|
421
|
+
stdout.puts "Max: #{format_number(sorted.last)} tokens"
|
|
422
|
+
stdout.puts ""
|
|
423
|
+
print_token_distribution(sorted)
|
|
424
|
+
|
|
425
|
+
db.disconnect
|
|
426
|
+
manager.close
|
|
427
|
+
0
|
|
428
|
+
rescue Sequel::DatabaseError, JSON::ParserError, Extralite::Error => e
|
|
429
|
+
stderr.puts "Error reading token telemetry: #{e.message}"
|
|
430
|
+
1
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def print_token_distribution(sorted)
|
|
434
|
+
total = sorted.size
|
|
435
|
+
stdout.puts "Distribution:"
|
|
436
|
+
TOKEN_BUCKETS.each do |label, low, high|
|
|
437
|
+
count = sorted.count { |t| t >= low && t < high }
|
|
438
|
+
pct = (count * 100.0 / total).round(1)
|
|
439
|
+
bar = "█" * (pct / 5).round
|
|
440
|
+
stdout.puts " #{label.ljust(12)} #{count.to_s.rjust(5)} (#{pct.to_s.rjust(5)}%) #{bar}"
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
|
|
352
444
|
def print_per_tool_breakdown(dataset)
|
|
353
445
|
stdout.puts "Per-tool breakdown:"
|
|
354
446
|
stdout.puts " #{"Tool".ljust(28)} #{"Calls".rjust(7)} #{"Avg ms".rjust(8)} #{"P95 ms".rjust(8)} #{"Err %".rjust(6)}"
|
|
@@ -2,20 +2,37 @@
|
|
|
2
2
|
|
|
3
3
|
module ClaudeMemory
|
|
4
4
|
module Dashboard
|
|
5
|
-
# Sidebar data for the feed-first dashboard.
|
|
5
|
+
# Sidebar data for the feed-first dashboard. Six surfaces, each
|
|
6
|
+
# answering a different "is memory helping/costing/clean?" question:
|
|
6
7
|
#
|
|
7
|
-
# 1. Moments this week + week-over-week delta — the headline value
|
|
8
|
-
# A moment is any meaningful activity event (recall hit,
|
|
9
|
-
# context injection, conflict detected). Ingest-only
|
|
10
|
-
# because they're not directly user-visible value.
|
|
8
|
+
# 1. Moments this week + week-over-week delta — the headline value
|
|
9
|
+
# number. A moment is any meaningful activity event (recall hit,
|
|
10
|
+
# extraction, context injection, conflict detected). Ingest-only
|
|
11
|
+
# events don't count because they're not directly user-visible value.
|
|
11
12
|
#
|
|
12
13
|
# 2. "What memory knows about you" — up to 5 global facts rendered as
|
|
13
|
-
# plain English.
|
|
14
|
-
#
|
|
14
|
+
# plain English. The trust panel's most compelling surface: users
|
|
15
|
+
# can sanity-check what's being injected into their sessions.
|
|
15
16
|
#
|
|
16
|
-
# 3. Needs review — open conflicts plus facts
|
|
17
|
-
#
|
|
18
|
-
# count; the feed surfaces
|
|
17
|
+
# 3. Needs review — open conflicts plus stale facts (active but never
|
|
18
|
+
# recalled in the last N days) plus empty recalls (queries that
|
|
19
|
+
# returned nothing). A single actionable count; the feed surfaces
|
|
20
|
+
# the individual items.
|
|
21
|
+
#
|
|
22
|
+
# 4. Utilization (30d) — of facts extracted in the last 30 days, how
|
|
23
|
+
# many has Claude actually surfaced via recall or context injection.
|
|
24
|
+
# Low ratios are a signal too: memory accumulating knowledge that
|
|
25
|
+
# Claude isn't reaching for.
|
|
26
|
+
#
|
|
27
|
+
# 5. Token budget (30d, 0.11.0+) — p50/p95/avg `context_tokens`
|
|
28
|
+
# injected per SessionStart. Answers "what does memory cost per
|
|
29
|
+
# session?" via numbers a skeptical user can read.
|
|
30
|
+
#
|
|
31
|
+
# 6. Quality score (live + historical, 0.11.0+) — hallucination-rate
|
|
32
|
+
# proxy: 100 - (suspect_pct + bare_pct), clamped 0..100. Live is
|
|
33
|
+
# over the last UTILIZATION_DAYS; historical mirrors the same
|
|
34
|
+
# calculation across all active facts as a supplementary baseline.
|
|
35
|
+
# See `quality_review.md` 2026-04-30 note for why the split exists.
|
|
19
36
|
class Trust
|
|
20
37
|
WEEK_SECONDS = 7 * 86_400
|
|
21
38
|
UTILIZATION_DAYS = 30
|
|
@@ -31,8 +48,160 @@ module ClaudeMemory
|
|
|
31
48
|
fingerprint: fingerprint,
|
|
32
49
|
needs_review: needs_review,
|
|
33
50
|
utilization: utilization,
|
|
34
|
-
feedback: feedback_summary
|
|
51
|
+
feedback: feedback_summary,
|
|
52
|
+
token_budget: token_budget,
|
|
53
|
+
quality_score: quality_score
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# The trust panel's hallucination-rate proxy. Counts two pollution
|
|
58
|
+
# signals:
|
|
59
|
+
#
|
|
60
|
+
# - suspect: facts that ReferenceMaterialDetector retagged from
|
|
61
|
+
# `convention` to `reference` predicate (descriptions of external
|
|
62
|
+
# projects mislabeled as user conventions).
|
|
63
|
+
# - bare_conclusion: `decision` / `convention` facts whose object
|
|
64
|
+
# skipped the prompt-mandated reason clause and so are dead
|
|
65
|
+
# weight once the originating context is gone.
|
|
66
|
+
#
|
|
67
|
+
# Reports two windows so users can distinguish historical noise from
|
|
68
|
+
# live extraction quality (per `quality_review.md` 2026-04-30
|
|
69
|
+
# investigation): the headline `score` is computed over facts
|
|
70
|
+
# created within the last UTILIZATION_DAYS — that's the actionable
|
|
71
|
+
# signal. The `historical` block reports the same counts over all
|
|
72
|
+
# active facts so legacy data is visible without dominating.
|
|
73
|
+
#
|
|
74
|
+
# Score = 100 - (suspect_pct + bare_pct), clamped 0..100. Lower is
|
|
75
|
+
# worse. Returns 100 (perfect) when there are no facts in the
|
|
76
|
+
# window so a quiet week isn't penalized.
|
|
77
|
+
def quality_score
|
|
78
|
+
cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
|
|
79
|
+
live = compute_quality(cutoff: cutoff)
|
|
80
|
+
historical = compute_quality(cutoff: nil)
|
|
81
|
+
|
|
82
|
+
live.merge(
|
|
83
|
+
window_days: UTILIZATION_DAYS,
|
|
84
|
+
historical: historical
|
|
85
|
+
)
|
|
86
|
+
rescue Sequel::DatabaseError => e
|
|
87
|
+
ClaudeMemory.logger.debug("Trust#quality_score failed: #{e.message}")
|
|
88
|
+
quality_score_zero
|
|
89
|
+
end
|
|
90
|
+
public :quality_score
|
|
91
|
+
|
|
92
|
+
def quality_score_zero
|
|
93
|
+
{
|
|
94
|
+
total_active: 0,
|
|
95
|
+
suspect_count: 0,
|
|
96
|
+
bare_conclusion_count: 0,
|
|
97
|
+
suspect_pct: 0.0,
|
|
98
|
+
bare_pct: 0.0,
|
|
99
|
+
score: 100,
|
|
100
|
+
window_days: UTILIZATION_DAYS,
|
|
101
|
+
historical: {
|
|
102
|
+
total_active: 0,
|
|
103
|
+
suspect_count: 0,
|
|
104
|
+
bare_conclusion_count: 0,
|
|
105
|
+
suspect_pct: 0.0,
|
|
106
|
+
bare_pct: 0.0,
|
|
107
|
+
score: 100
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def compute_quality(cutoff:)
|
|
113
|
+
breakdown = aggregate_quality_counts(cutoff: cutoff)
|
|
114
|
+
total = breakdown[:total_active]
|
|
115
|
+
|
|
116
|
+
return zero_breakdown if total.zero?
|
|
117
|
+
|
|
118
|
+
suspect_pct = (breakdown[:suspect_count] * 100.0 / total).round(1)
|
|
119
|
+
bare_pct = (breakdown[:bare_conclusion_count] * 100.0 / total).round(1)
|
|
120
|
+
score = (100 - (suspect_pct + bare_pct)).clamp(0, 100).round
|
|
121
|
+
|
|
122
|
+
breakdown.merge(
|
|
123
|
+
suspect_pct: suspect_pct,
|
|
124
|
+
bare_pct: bare_pct,
|
|
125
|
+
score: score
|
|
126
|
+
)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def zero_breakdown
|
|
130
|
+
{total_active: 0, suspect_count: 0, bare_conclusion_count: 0,
|
|
131
|
+
suspect_pct: 0.0, bare_pct: 0.0, score: 100}
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def aggregate_quality_counts(cutoff: nil)
|
|
135
|
+
detector = Distill::BareConclusionDetector.new
|
|
136
|
+
suspect = 0
|
|
137
|
+
bare = 0
|
|
138
|
+
total = 0
|
|
139
|
+
|
|
140
|
+
%w[project global].each do |scope|
|
|
141
|
+
store = @manager.store_if_exists(scope)
|
|
142
|
+
next unless store
|
|
143
|
+
dataset = store.facts.where(status: "active")
|
|
144
|
+
dataset = dataset.where { created_at >= cutoff } if cutoff
|
|
145
|
+
total += dataset.count
|
|
146
|
+
suspect += dataset.where(predicate: "reference").count
|
|
147
|
+
dataset.where(predicate: %w[decision convention])
|
|
148
|
+
.select(:predicate, :object_literal)
|
|
149
|
+
.all
|
|
150
|
+
.each { |row| bare += 1 if detector.bare_conclusion?(row) }
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
{total_active: total, suspect_count: suspect, bare_conclusion_count: bare}
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# What does memory cost? Aggregates `context_tokens` from successful
|
|
157
|
+
# `hook_context` activity events over the last UTILIZATION_DAYS so a
|
|
158
|
+
# skeptical user can see the per-session token cost in p50/p95.
|
|
159
|
+
#
|
|
160
|
+
# Shape: {p50:, p95:, avg:, sample_size:, window_days:}
|
|
161
|
+
# All ints. Returns zeros when there are no events in the window.
|
|
162
|
+
def token_budget
|
|
163
|
+
store = @manager.default_store(prefer: :project)
|
|
164
|
+
return token_budget_zero unless store
|
|
165
|
+
|
|
166
|
+
cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
|
|
167
|
+
rows = store.activity_events
|
|
168
|
+
.where(event_type: "hook_context", status: "success")
|
|
169
|
+
.where { occurred_at >= cutoff }
|
|
170
|
+
.select(:detail_json)
|
|
171
|
+
.all
|
|
172
|
+
|
|
173
|
+
tokens = rows.filter_map do |row|
|
|
174
|
+
details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
|
|
175
|
+
value = details["context_tokens"]
|
|
176
|
+
value if value.is_a?(Integer) && value > 0
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
return token_budget_zero if tokens.empty?
|
|
180
|
+
|
|
181
|
+
sorted = tokens.sort
|
|
182
|
+
{
|
|
183
|
+
p50: percentile(sorted, 0.50),
|
|
184
|
+
p95: percentile(sorted, 0.95),
|
|
185
|
+
avg: (sorted.sum.to_f / sorted.size).round,
|
|
186
|
+
sample_size: sorted.size,
|
|
187
|
+
window_days: UTILIZATION_DAYS
|
|
35
188
|
}
|
|
189
|
+
rescue Sequel::DatabaseError, JSON::ParserError => e
|
|
190
|
+
ClaudeMemory.logger.debug("Trust#token_budget failed: #{e.message}")
|
|
191
|
+
token_budget_zero
|
|
192
|
+
end
|
|
193
|
+
public :token_budget
|
|
194
|
+
|
|
195
|
+
def token_budget_zero
|
|
196
|
+
{p50: 0, p95: 0, avg: 0, sample_size: 0, window_days: UTILIZATION_DAYS}
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def percentile(sorted, pct)
|
|
200
|
+
return 0 if sorted.empty?
|
|
201
|
+
idx = (sorted.size * pct).ceil - 1
|
|
202
|
+
idx = 0 if idx < 0
|
|
203
|
+
idx = sorted.size - 1 if idx >= sorted.size
|
|
204
|
+
sorted[idx]
|
|
36
205
|
end
|
|
37
206
|
|
|
38
207
|
private
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Distill
|
|
5
|
+
# Catches facts that survived distillation without a reason clause.
|
|
6
|
+
# The SessionStart distillation prompt explicitly requires `decision`
|
|
7
|
+
# and `convention` facts to embed the reason ("— because …", "so that
|
|
8
|
+
# …", "to avoid …", "caused by …", "breaks when …"); facts that ship
|
|
9
|
+
# without one are dead weight once they go stale because nobody can
|
|
10
|
+
# recover the original justification by re-reading the row.
|
|
11
|
+
#
|
|
12
|
+
# This detector is the production-side mirror of that prompt
|
|
13
|
+
# constraint. It exists so the dashboard can quantify how many facts
|
|
14
|
+
# are slipping through the prompt's reason-clause requirement —
|
|
15
|
+
# higher bare-conclusion ratio means the LLM is producing low-quality
|
|
16
|
+
# extractions, which is a hallucination-rate proxy worth surfacing.
|
|
17
|
+
#
|
|
18
|
+
# Pure function, no side effects, safe to call in tight loops.
|
|
19
|
+
class BareConclusionDetector
|
|
20
|
+
# Predicates the prompt requires reasons for. Other predicates
|
|
21
|
+
# (uses_framework, uses_database, etc.) carry their meaning in the
|
|
22
|
+
# subject-predicate-object shape itself, so a bare object is fine.
|
|
23
|
+
GUARDED_PREDICATES = %w[decision convention].freeze
|
|
24
|
+
|
|
25
|
+
# Reason-clause signals lifted from the distill-transcripts skill
|
|
26
|
+
# prompt plus a small set of common natural-language variants. The
|
|
27
|
+
# match is case-insensitive and substring-anchored — any one signal
|
|
28
|
+
# qualifies the fact as "explained" even without an em dash.
|
|
29
|
+
REASON_PATTERNS = [
|
|
30
|
+
/\bbecause\b/i,
|
|
31
|
+
/\bso\s+that\b/i,
|
|
32
|
+
/\bso\s+the\b/i,
|
|
33
|
+
/\bso\s+we\b/i,
|
|
34
|
+
/\bin\s+order\s+to\b/i,
|
|
35
|
+
/\bto\s+avoid\b/i,
|
|
36
|
+
/\bto\s+prevent\b/i,
|
|
37
|
+
/\bto\s+ensure\b/i,
|
|
38
|
+
/\bto\s+support\b/i,
|
|
39
|
+
/\bto\s+allow\b/i,
|
|
40
|
+
/\bto\s+enable\b/i,
|
|
41
|
+
/\bto\s+make\b/i,
|
|
42
|
+
/\bto\s+fix\b/i,
|
|
43
|
+
/\bto\s+handle\b/i,
|
|
44
|
+
/\bcaused\s+by\b/i,
|
|
45
|
+
/\bbreaks\s+when\b/i,
|
|
46
|
+
/\bdue\s+to\b/i,
|
|
47
|
+
/\botherwise\b/i,
|
|
48
|
+
/\bwithout\s+(?:which|this|it)\b/i
|
|
49
|
+
].freeze
|
|
50
|
+
|
|
51
|
+
# Returns true when the fact has a guarded predicate AND its object
|
|
52
|
+
# text shows no reason-clause signal. Returns false for any fact
|
|
53
|
+
# outside the guarded predicates so the metric isn't polluted by
|
|
54
|
+
# legitimately-bare facts (uses_database "sqlite" doesn't need a
|
|
55
|
+
# rationale embedded in its object).
|
|
56
|
+
#
|
|
57
|
+
# @param fact [Hash] with :predicate and :object_literal keys (or
|
|
58
|
+
# :predicate / :object — accepts both shapes used in the codebase)
|
|
59
|
+
# @return [Boolean]
|
|
60
|
+
def bare_conclusion?(fact)
|
|
61
|
+
predicate = fact[:predicate].to_s
|
|
62
|
+
return false unless GUARDED_PREDICATES.include?(predicate)
|
|
63
|
+
|
|
64
|
+
object = (fact[:object_literal] || fact[:object]).to_s
|
|
65
|
+
return false if object.empty?
|
|
66
|
+
|
|
67
|
+
REASON_PATTERNS.none? { |re| object.match?(re) }
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -93,6 +93,59 @@ module ClaudeMemory
|
|
|
93
93
|
result
|
|
94
94
|
end
|
|
95
95
|
|
|
96
|
+
# First-week ROI nudge. Computes per-session metrics (facts
|
|
97
|
+
# contributed via Stop-hook ingest, percentage of those Claude
|
|
98
|
+
# actually used in recall/context-injection) and decides whether
|
|
99
|
+
# to print to the user. Quiets after MAX_NUDGES successful runs
|
|
100
|
+
# or when CLAUDE_MEMORY_NO_NUDGE=1.
|
|
101
|
+
#
|
|
102
|
+
# The "first ~10 sessions" gate is enforced by counting prior
|
|
103
|
+
# `roi_nudge` activity events with status=success across both
|
|
104
|
+
# stores. Once the user has seen the nudge enough times, memory
|
|
105
|
+
# gets out of the way; trust is established or it isn't.
|
|
106
|
+
MAX_NUDGES = 10
|
|
107
|
+
ENV_NUDGE_OPT_OUT = "CLAUDE_MEMORY_NO_NUDGE"
|
|
108
|
+
|
|
109
|
+
def nudge(payload)
|
|
110
|
+
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
111
|
+
session_id = payload["session_id"] || @config.session_id
|
|
112
|
+
|
|
113
|
+
# Cleanly silent on opt-out — no activity event, no record of
|
|
114
|
+
# having tried. Users who set the env var don't want a paper
|
|
115
|
+
# trail of suppressed nudges.
|
|
116
|
+
return {status: :silent, reason: "opt_out"} if @env[ENV_NUDGE_OPT_OUT] == "1"
|
|
117
|
+
return {status: :silent, reason: "no_session_id"} if session_id.nil? || session_id.empty?
|
|
118
|
+
|
|
119
|
+
prior = prior_nudge_count
|
|
120
|
+
if prior >= MAX_NUDGES
|
|
121
|
+
return {status: :silent, reason: "first_week_complete", prior_count: prior}
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
contributed_ids = session_contributed_facts(session_id)
|
|
125
|
+
n = contributed_ids.size
|
|
126
|
+
|
|
127
|
+
if n.zero?
|
|
128
|
+
# Don't burn one of the user's 10 nudge slots on an empty
|
|
129
|
+
# session. Memory contributed nothing → no trust signal to
|
|
130
|
+
# surface; come back next session with real data.
|
|
131
|
+
return {status: :silent, reason: "no_contributions", prior_count: prior}
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
used = session_used_facts(session_id, contributed_ids)
|
|
135
|
+
pct = (used * 100.0 / n).round
|
|
136
|
+
message = "memory contributed #{n} fact#{"s" unless n == 1} this session, %used = #{pct}%"
|
|
137
|
+
|
|
138
|
+
log_activity("roi_nudge", status: "success", session_id: session_id, t0: t0,
|
|
139
|
+
details: {n: n, used: used, pct: pct, prior_count: prior})
|
|
140
|
+
|
|
141
|
+
{
|
|
142
|
+
status: :emitted,
|
|
143
|
+
message: message,
|
|
144
|
+
n: n, used: used, pct: pct,
|
|
145
|
+
remaining: MAX_NUDGES - prior - 1
|
|
146
|
+
}
|
|
147
|
+
end
|
|
148
|
+
|
|
96
149
|
def context(payload)
|
|
97
150
|
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
98
151
|
|
|
@@ -105,7 +158,11 @@ module ClaudeMemory
|
|
|
105
158
|
|
|
106
159
|
log_activity("hook_context",
|
|
107
160
|
status: context_text ? "success" : "skipped", t0: t0,
|
|
108
|
-
details: {
|
|
161
|
+
details: {
|
|
162
|
+
context_length: context_text&.length,
|
|
163
|
+
context_tokens: Core::TokenEstimator.estimate(context_text),
|
|
164
|
+
source: source
|
|
165
|
+
})
|
|
109
166
|
|
|
110
167
|
{status: :ok, context: context_text}
|
|
111
168
|
rescue => e
|
|
@@ -126,6 +183,90 @@ module ClaudeMemory
|
|
|
126
183
|
project_path = payload["project_path"] || @config.project_dir
|
|
127
184
|
Store::StoreManager.new(project_path: project_path, env: @env)
|
|
128
185
|
end
|
|
186
|
+
|
|
187
|
+
# Cross-scope nudge counter. Counts both stores so a user with
|
|
188
|
+
# global facts only doesn't bypass the first-week limit.
|
|
189
|
+
def prior_nudge_count
|
|
190
|
+
manager_or_self.then do |m|
|
|
191
|
+
%w[project global].sum do |scope|
|
|
192
|
+
store = m.respond_to?(:store_if_exists) ? m.store_if_exists(scope) : nil
|
|
193
|
+
next 0 unless store
|
|
194
|
+
store.activity_events.where(event_type: "roi_nudge", status: "success").count
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
rescue Sequel::DatabaseError
|
|
198
|
+
# If we can't read the count, err on the side of "still in
|
|
199
|
+
# first week" so users keep getting feedback while we figure
|
|
200
|
+
# out what's wrong with the DB.
|
|
201
|
+
0
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Facts whose provenance points to content_items captured in
|
|
205
|
+
# this session. Active facts only — superseded/rejected ones
|
|
206
|
+
# don't count as memory contributing.
|
|
207
|
+
def session_contributed_facts(session_id)
|
|
208
|
+
return [] unless @store
|
|
209
|
+
@store.facts
|
|
210
|
+
.join(:provenance, fact_id: :id)
|
|
211
|
+
.join(:content_items, id: Sequel[:provenance][:content_item_id])
|
|
212
|
+
.where(Sequel[:content_items][:session_id] => session_id)
|
|
213
|
+
.where(Sequel[:facts][:status] => "active")
|
|
214
|
+
.select(Sequel[:facts][:id])
|
|
215
|
+
.distinct
|
|
216
|
+
.map { |row| row[:id] }
|
|
217
|
+
rescue Sequel::DatabaseError => e
|
|
218
|
+
ClaudeMemory.logger.debug("session_contributed_facts failed: #{e.message}")
|
|
219
|
+
[]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Of the given fact ids, how many appear in top_fact_ids of any
|
|
223
|
+
# recall or hook_context activity event tagged with this
|
|
224
|
+
# session_id?
|
|
225
|
+
def session_used_facts(session_id, fact_ids)
|
|
226
|
+
return 0 if fact_ids.empty?
|
|
227
|
+
return 0 unless @store
|
|
228
|
+
target = fact_ids.to_set
|
|
229
|
+
used = Set.new
|
|
230
|
+
|
|
231
|
+
@store.activity_events
|
|
232
|
+
.where(event_type: %w[recall hook_context], status: "success")
|
|
233
|
+
.where(session_id: session_id)
|
|
234
|
+
.select(:detail_json)
|
|
235
|
+
.all
|
|
236
|
+
.each do |row|
|
|
237
|
+
details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
|
|
238
|
+
(details["top_fact_ids"] || []).each { |id| used << id if target.include?(id) }
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
used.size
|
|
242
|
+
rescue Sequel::DatabaseError, JSON::ParserError => e
|
|
243
|
+
ClaudeMemory.logger.debug("session_used_facts failed: #{e.message}")
|
|
244
|
+
0
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def manager_or_self
|
|
248
|
+
return @manager if @manager
|
|
249
|
+
# When the Handler was given only a single store (no manager),
|
|
250
|
+
# we still want to count nudges; treat the store like a single-
|
|
251
|
+
# scope manager via a tiny wrapper.
|
|
252
|
+
@_handler_store_facade ||= SingleStoreFacade.new(@store)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
class SingleStoreFacade
|
|
256
|
+
def initialize(store)
|
|
257
|
+
@store = store
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def store_if_exists(scope)
|
|
261
|
+
# The manager's store_if_exists returns nil for the absent
|
|
262
|
+
# scope; we don't know which scope this single store
|
|
263
|
+
# represents, so return it for "project" and nil for
|
|
264
|
+
# "global". Counts undercount global-only setups, which is
|
|
265
|
+
# acceptable — global-only users would normally pass a
|
|
266
|
+
# manager.
|
|
267
|
+
(scope == "project") ? @store : nil
|
|
268
|
+
end
|
|
269
|
+
end
|
|
129
270
|
end
|
|
130
271
|
end
|
|
131
272
|
end
|