smart_brain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.en.md +173 -0
  4. data/README.md +173 -0
  5. data/agents/brain_assistant.rb +11 -0
  6. data/config/brain.yml +32 -0
  7. data/conversation_demo.rb +438 -0
  8. data/db/migrate/001_init.sql +98 -0
  9. data/example.rb +91 -0
  10. data/lib/smart_brain/adapters/smart_rag/direct_client.rb +47 -0
  11. data/lib/smart_brain/adapters/smart_rag/http_client.rb +61 -0
  12. data/lib/smart_brain/adapters/smart_rag/null_client.rb +22 -0
  13. data/lib/smart_brain/configuration.rb +41 -0
  14. data/lib/smart_brain/consolidator/working_summary.rb +102 -0
  15. data/lib/smart_brain/context_composer/composer.rb +75 -0
  16. data/lib/smart_brain/contracts/context_package.rb +16 -0
  17. data/lib/smart_brain/contracts/evidence_pack.rb +16 -0
  18. data/lib/smart_brain/contracts/retrieval_plan.rb +17 -0
  19. data/lib/smart_brain/event_store/in_memory.rb +103 -0
  20. data/lib/smart_brain/fusion/merger.rb +137 -0
  21. data/lib/smart_brain/memory_extractor/extractor.rb +92 -0
  22. data/lib/smart_brain/memory_store/in_memory.rb +78 -0
  23. data/lib/smart_brain/observability/tracker.rb +60 -0
  24. data/lib/smart_brain/retrieval_planner/planner.rb +122 -0
  25. data/lib/smart_brain/retrievers/exact_retriever.rb +62 -0
  26. data/lib/smart_brain/retrievers/memory_retriever.rb +30 -0
  27. data/lib/smart_brain/retrievers/relational_retriever.rb +53 -0
  28. data/lib/smart_brain/runtime.rb +195 -0
  29. data/lib/smart_brain/version.rb +5 -0
  30. data/lib/smart_brain.rb +35 -0
  31. data/templates/brain_assistant.erb +5 -0
  32. data/workers/brain_assistant.rb +9 -0
  33. metadata +283 -0
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module Adapters
5
+ module SmartRag
6
+ class HttpClient
7
+ def initialize(transport:, timeout_seconds: 2)
8
+ @transport = transport
9
+ @timeout_seconds = timeout_seconds
10
+ end
11
+
12
+ def retrieve(plan)
13
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
14
+ raw = transport.call(plan, timeout_seconds: timeout_seconds)
15
+ build_pack(raw: raw, request_id: plan[:request_id], took_ms: elapsed_ms(started_at))
16
+ rescue Timeout::Error
17
+ {
18
+ version: '0.1',
19
+ request_id: plan[:request_id],
20
+ plan_id: "timeout-#{plan[:request_id]}",
21
+ generated_at: Time.now.utc.iso8601,
22
+ evidences: [],
23
+ stats: { candidates: 0, returned: 0, took_ms: elapsed_ms(started_at) },
24
+ explain: { ignored_fields: [] },
25
+ warnings: ['smart_rag timeout; fallback to memory-only evidence']
26
+ }
27
+ end
28
+
29
+ private
30
+
31
+ attr_reader :transport, :timeout_seconds
32
+
33
+ def build_pack(raw:, request_id:, took_ms:)
34
+ ignored = []
35
+ ignored << 'global_filters.language not supported' unless raw.key?(:supports_language_filter) && raw[:supports_language_filter]
36
+
37
+ {
38
+ version: '0.1',
39
+ request_id: request_id,
40
+ plan_id: raw[:plan_id] || "remote-#{request_id}",
41
+ generated_at: Time.now.utc.iso8601,
42
+ evidences: Array(raw[:evidences]),
43
+ stats: {
44
+ candidates: raw.dig(:stats, :candidates) || Array(raw[:evidences]).size,
45
+ returned: Array(raw[:evidences]).size,
46
+ took_ms: took_ms
47
+ },
48
+ explain: {
49
+ ignored_fields: ignored + Array(raw.dig(:explain, :ignored_fields))
50
+ },
51
+ warnings: Array(raw[:warnings])
52
+ }
53
+ end
54
+
55
+ def elapsed_ms(start)
56
+ ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round(2)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module Adapters
5
+ module SmartRag
6
+ class NullClient
7
+ def retrieve(plan)
8
+ {
9
+ version: '0.1',
10
+ plan_id: "local-#{plan[:request_id]}",
11
+ request_id: plan[:request_id],
12
+ generated_at: Time.now.utc.iso8601,
13
+ evidences: [],
14
+ stats: { candidates: 0, returned: 0, took_ms: 0 },
15
+ explain: { ignored_fields: [] },
16
+ warnings: ['smart_rag client not configured; returned empty evidences']
17
+ }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+
5
+ module SmartBrain
6
+ class Configuration
7
+ DEFAULT_PATH = File.expand_path('../../config/brain.yml', __dir__)
8
+
9
+ def self.load(path = nil)
10
+ file_path = path || DEFAULT_PATH
11
+ data = File.exist?(file_path) ? YAML.safe_load(File.read(file_path), symbolize_names: true) : {}
12
+ new(data || {})
13
+ end
14
+
15
+ attr_reader :raw
16
+
17
+ def initialize(raw)
18
+ @raw = raw
19
+ end
20
+
21
+ def policies
22
+ raw.fetch(:policies, {})
23
+ end
24
+
25
+ def retention
26
+ policies.fetch(:retention, {})
27
+ end
28
+
29
+ def retrieval
30
+ policies.fetch(:retrieval, {})
31
+ end
32
+
33
+ def composition
34
+ policies.fetch(:composition, {})
35
+ end
36
+
37
+ def observability
38
+ policies.fetch(:observability, {})
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module Consolidator
5
+ class WorkingSummary
6
+ def initialize(config:, clock:)
7
+ @config = config
8
+ @clock = clock
9
+ @summaries = {}
10
+ @last_summary_turn = Hash.new(0)
11
+ end
12
+
13
+ def update(session_id:, turn_count:, recent_turns:, memory_items:, stage_event: false)
14
+ reason = trigger_reason(session_id: session_id, turn_count: turn_count, recent_turns: recent_turns, stage_event: stage_event)
15
+ return latest_summary(session_id).merge(triggered: false, trigger_reason: 'not_triggered') unless reason
16
+
17
+ summary = {
18
+ summary_version: next_version(session_id),
19
+ summary_source_turn_range: source_turn_range(turn_count),
20
+ summary_generated_at: clock.call.iso8601,
21
+ text: build_text(memory_items),
22
+ triggered: true,
23
+ trigger_reason: reason
24
+ }
25
+ summaries[session_id] = summary
26
+ last_summary_turn[session_id] = turn_count
27
+ summary
28
+ end
29
+
30
+ def latest_summary(session_id)
31
+ summaries[session_id] || default_summary
32
+ end
33
+
34
+ private
35
+
36
+ attr_reader :config, :clock, :summaries, :last_summary_turn
37
+
38
+ def trigger_reason(session_id:, turn_count:, recent_turns:, stage_event:)
39
+ turns_since_last = turn_count - last_summary_turn[session_id]
40
+ threshold = config.retention.fetch(:summarize_after_turns, 12)
41
+ return 'turn_threshold' if turns_since_last >= threshold
42
+
43
+ token_limit = config.composition.fetch(:token_limit, 8192)
44
+ token_used = estimate_tokens(recent_turns)
45
+ return 'token_pressure' if token_used > (token_limit * 0.7)
46
+ return 'stage_event' if stage_event
47
+
48
+ nil
49
+ end
50
+
51
+ def estimate_tokens(recent_turns)
52
+ recent_turns.sum { |t| t[:content].to_s.length / 4 }
53
+ end
54
+
55
+ def next_version(session_id)
56
+ previous = summaries[session_id]
57
+ previous ? previous[:summary_version] + 1 : 1
58
+ end
59
+
60
+ def source_turn_range(turn_count)
61
+ { from: [turn_count - (config.retention.fetch(:summarize_after_turns, 12) - 1), 1].max, to: turn_count }
62
+ end
63
+
64
+ def build_text(memory_items)
65
+ goals = memory_items.select { |i| i[:type] == 'goals' }
66
+ tasks = memory_items.select { |i| i[:type] == 'tasks' }
67
+ decisions = memory_items.select { |i| i[:type] == 'decisions' }
68
+ refs = memory_items.select { |i| i[:type] == 'entities' }
69
+
70
+ [
71
+ 'Goals:',
72
+ *to_lines(goals, fallback: '- None'),
73
+ 'Decisions:',
74
+ *to_lines(decisions, fallback: '- None'),
75
+ 'Tasks:',
76
+ *to_lines(tasks, fallback: '- None'),
77
+ 'Key References:',
78
+ *to_lines(refs, fallback: '- None'),
79
+ 'Open Questions:',
80
+ '- None'
81
+ ].join("\n")
82
+ end
83
+
84
+ def to_lines(items, fallback:)
85
+ return [fallback] if items.empty?
86
+
87
+ items.first(5).map { |i| "- #{i[:key]}" }
88
+ end
89
+
90
+ def default_summary
91
+ {
92
+ summary_version: 0,
93
+ summary_source_turn_range: { from: 0, to: 0 },
94
+ summary_generated_at: clock.call.iso8601,
95
+ text: "Goals:\n- None\nDecisions:\n- None\nTasks:\n- None\nKey References:\n- None\nOpen Questions:\n- None",
96
+ triggered: false,
97
+ trigger_reason: 'empty'
98
+ }
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+
5
+ module SmartBrain
6
+ module ContextComposer
7
+ class Composer
8
+ def initialize(config:, clock:)
9
+ @config = config
10
+ @clock = clock
11
+ end
12
+
13
+ def compose(session_id:, user_message:, plan:, plan_id:, summary:, recent_turns:, evidence_bundle:)
14
+ context_id = SecureRandom.uuid
15
+ evidence = evidence_bundle.fetch(:selected, [])
16
+ used_estimate = estimate_tokens(summary: summary[:text], recent_turns: recent_turns, evidence: evidence, user_message: user_message)
17
+ token_limit = config.composition.fetch(:token_limit, 8192)
18
+
19
+ {
20
+ version: '0.1',
21
+ context_id: context_id,
22
+ session_id: session_id,
23
+ created_at: clock.call.iso8601,
24
+ system_blocks: [],
25
+ developer_blocks: [],
26
+ working_summary: summary[:text],
27
+ recent_turns: recent_turns.first(config.composition.fetch(:recent_turns_max, 8)),
28
+ evidence: evidence,
29
+ user_message: { role: 'user', content: user_message },
30
+ constraints: {
31
+ token_budget: {
32
+ limit: token_limit,
33
+ used_estimate: used_estimate
34
+ },
35
+ diversity: {
36
+ by_document: config.composition.dig(:diversity, :by_document) || 3,
37
+ by_source: config.composition.dig(:diversity, :by_source_uri) || 2
38
+ },
39
+ truncation: {
40
+ snippets_max_chars: config.composition.fetch(:max_snippet_chars, 800),
41
+ recent_turns_max: config.composition.fetch(:recent_turns_max, 8)
42
+ }
43
+ },
44
+ debug: {
45
+ trace: {
46
+ context_id: context_id,
47
+ request_id: plan[:request_id],
48
+ plan_id: plan_id
49
+ },
50
+ planner: {
51
+ request_id: plan[:request_id],
52
+ purpose: plan[:purpose],
53
+ queries: plan[:queries].map { |q| q[:text] }
54
+ },
55
+ why_selected: evidence.map { |e| "#{e[:id]} score=#{e[:score]} source=#{e[:source]}" },
56
+ ignored: evidence_bundle[:ignored_fields] || [],
57
+ dropped: (evidence_bundle[:dropped] || []).map { |e| { id: e[:id], reason: e[:drop_reason] } }
58
+ }
59
+ }
60
+ end
61
+
62
+ private
63
+
64
+ attr_reader :config, :clock
65
+
66
+ def estimate_tokens(summary:, recent_turns:, evidence:, user_message:)
67
+ text_size = summary.to_s.length
68
+ text_size += recent_turns.sum { |t| t[:content].to_s.length }
69
+ text_size += evidence.sum { |e| e[:snippet].to_s.length }
70
+ text_size += user_message.to_s.length
71
+ (text_size / 4.0).ceil
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module Contracts
5
+ class ContextPackage
6
+ REQUIRED_KEYS = %i[version context_id session_id created_at user_message evidence].freeze
7
+
8
+ def self.validate!(pkg)
9
+ missing = REQUIRED_KEYS.reject { |key| pkg.key?(key) }
10
+ raise ArgumentError, "invalid context package: missing #{missing.join(', ')}" unless missing.empty?
11
+
12
+ true
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module Contracts
5
+ class EvidencePack
6
+ REQUIRED_KEYS = %i[version request_id plan_id generated_at evidences].freeze
7
+
8
+ def self.validate!(pack)
9
+ missing = REQUIRED_KEYS.reject { |key| pack.key?(key) }
10
+ raise ArgumentError, "invalid evidence pack: missing #{missing.join(', ')}" unless missing.empty?
11
+
12
+ true
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module Contracts
5
+ class RetrievalPlan
6
+ REQUIRED_KEYS = %i[version request_id purpose queries budget].freeze
7
+
8
+ def self.validate!(plan)
9
+ missing = REQUIRED_KEYS.reject { |key| plan.key?(key) }
10
+ raise ArgumentError, "invalid retrieval plan: missing #{missing.join(', ')}" unless missing.empty?
11
+ raise ArgumentError, 'invalid retrieval plan: queries must not be empty' if Array(plan[:queries]).empty?
12
+
13
+ true
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+
5
+ module SmartBrain
6
+ module EventStore
7
+ class InMemory
8
+ def initialize
9
+ @sessions = {}
10
+ end
11
+
12
+ def append_turn(session_id:, turn_events:, created_at:)
13
+ session = sessions[session_id] ||= { seq: 0, turns: [] }
14
+ session[:seq] += 1
15
+ turn_id = SecureRandom.uuid
16
+ messages = normalize_messages(turn_id: turn_id, messages: turn_events[:messages] || [], created_at: created_at)
17
+ refs = normalize_refs(turn_id: turn_id, refs: turn_events[:refs] || [], created_at: created_at)
18
+ turn = {
19
+ id: turn_id,
20
+ session_id: session_id,
21
+ seq: session[:seq],
22
+ created_at: created_at.iso8601,
23
+ turn_events: turn_events.merge(messages: messages, refs: refs)
24
+ }
25
+ session[:turns] << turn
26
+ turn
27
+ end
28
+
29
+ def turns_count(session_id:)
30
+ (sessions[session_id] || { turns: [] })[:turns].size
31
+ end
32
+
33
+ def recent_turns(session_id:, limit:)
34
+ session = sessions[session_id]
35
+ return [] unless session
36
+
37
+ session[:turns].last(limit).flat_map do |turn|
38
+ (turn.dig(:turn_events, :messages) || []).map do |m|
39
+ {
40
+ turn_id: turn[:id],
41
+ message_id: m[:id],
42
+ role: m[:role],
43
+ content: m[:content],
44
+ created_at: m[:created_at]
45
+ }
46
+ end
47
+ end
48
+ end
49
+
50
+ def recent_refs(session_id:, limit:)
51
+ session = sessions[session_id]
52
+ return [] unless session
53
+
54
+ session[:turns].last(limit).flat_map { |t| t.dig(:turn_events, :refs) || [] }
55
+ end
56
+
57
+ def entity_frequencies(session_id:, window_turns:)
58
+ session = sessions[session_id]
59
+ return Hash.new(0) unless session
60
+
61
+ freq = Hash.new(0)
62
+ session[:turns].last(window_turns).each do |turn|
63
+ Array(turn.dig(:turn_events, :entities)).each do |entity|
64
+ canonical = entity[:canonical] || entity[:name]
65
+ freq[canonical.to_s.downcase] += 1
66
+ end
67
+ end
68
+ freq
69
+ end
70
+
71
+ def all_turns(session_id:)
72
+ return sessions.values.flat_map { |s| s[:turns] } if session_id.nil?
73
+
74
+ (sessions[session_id] || { turns: [] })[:turns]
75
+ end
76
+
77
+ private
78
+
79
+ attr_reader :sessions
80
+
81
+ def normalize_messages(turn_id:, messages:, created_at:)
82
+ messages.map do |m|
83
+ m.merge(
84
+ id: m[:id] || SecureRandom.uuid,
85
+ turn_id: turn_id,
86
+ created_at: m[:created_at] || created_at.iso8601
87
+ )
88
+ end
89
+ end
90
+
91
+ def normalize_refs(turn_id:, refs:, created_at:)
92
+ refs.map do |ref|
93
+ ref.merge(
94
+ id: ref[:id] || SecureRandom.uuid,
95
+ turn_id: turn_id,
96
+ created_at: ref[:created_at] || created_at.iso8601,
97
+ ref_meta_json: ref[:ref_meta_json] || {}
98
+ )
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module Fusion
5
+ class Merger
6
+ def initialize(config:)
7
+ @config = config
8
+ end
9
+
10
+ def merge(query:, memory_evidence:, resource_evidence:)
11
+ combined = (memory_evidence + normalize_resource(resource_evidence))
12
+ deduped = dedupe(combined)
13
+ ranked = deduped.sort_by { |item| -rerank_score(item, query) }
14
+ diversified, dropped = apply_diversity(ranked)
15
+ selected = apply_budget(diversified)
16
+
17
+ {
18
+ selected: selected,
19
+ dropped: dropped,
20
+ ignored_fields: []
21
+ }
22
+ end
23
+
24
+ private
25
+
26
+ attr_reader :config
27
+
28
+ def normalize_resource(items)
29
+ items.map do |item|
30
+ item.merge(
31
+ source: 'resource',
32
+ score: item.fetch(:score, item.dig(:signals, :rerank_score) || item.dig(:signals, :rrf_score) || 0.4),
33
+ ref: item[:ref] || {
34
+ document_id: item[:document_id],
35
+ section_id: item[:section_id],
36
+ chunk_index: item.dig(:metadata, :chunk_index)
37
+ }
38
+ )
39
+ end
40
+ end
41
+
42
+ def dedupe(items)
43
+ deduped = {}
44
+ items.each do |item|
45
+ key = dedupe_key(item)
46
+ existing = deduped[key]
47
+ deduped[key] = item if existing.nil? || item.fetch(:score, 0.0) > existing.fetch(:score, 0.0)
48
+ end
49
+ deduped.values
50
+ end
51
+
52
+ def apply_diversity(items)
53
+ by_document = config.composition.dig(:diversity, :by_document) || 3
54
+ by_source = config.composition.dig(:diversity, :by_source_uri) || 2
55
+
56
+ doc_counter = Hash.new(0)
57
+ source_counter = Hash.new(0)
58
+ kept = []
59
+ dropped = []
60
+
61
+ items.each do |item|
62
+ ref = item[:ref] || {}
63
+ document_key = ref[:document_id] || item[:title]
64
+ source_prefix = item[:source_uri].to_s.split('/')[0, 3].join('/')
65
+ source_key = source_prefix.empty? ? item[:source_uri] : source_prefix
66
+
67
+ if doc_counter[document_key] >= by_document || source_counter[source_key] >= by_source
68
+ dropped << item.merge(drop_reason: 'diversity')
69
+ next
70
+ end
71
+
72
+ doc_counter[document_key] += 1
73
+ source_counter[source_key] += 1
74
+ kept << item
75
+ end
76
+
77
+ [kept, dropped]
78
+ end
79
+
80
+ def apply_budget(items)
81
+ limit = config.composition.fetch(:evidence_max_items, 12)
82
+ max_chars = config.composition.fetch(:max_snippet_chars, 800)
83
+ ratio = parse_ratio(config.composition.dig(:diversity, :memory_resource_ratio) || '40/60')
84
+
85
+ memory_limit = (limit * ratio[:memory]).floor
86
+ resource_limit = limit - memory_limit
87
+ selected = []
88
+
89
+ memory_items = items.select { |i| i[:source] == 'memory' }.first(memory_limit)
90
+ resource_items = items.select { |i| i[:source] == 'resource' }.first(resource_limit)
91
+ selected.concat(memory_items).concat(resource_items)
92
+
93
+ if selected.length < limit
94
+ leftovers = (items - selected).first(limit - selected.length)
95
+ selected.concat(leftovers)
96
+ end
97
+
98
+ selected.first(limit).map do |item|
99
+ snippet = item[:snippet].to_s
100
+ item.merge(snippet: snippet.length > max_chars ? "#{snippet[0...max_chars]}..." : snippet)
101
+ end
102
+ end
103
+
104
+ def parse_ratio(text)
105
+ memory, resource = text.to_s.split('/').map(&:to_i)
106
+ total = memory + resource
107
+ return { memory: 0.4, resource: 0.6 } if total <= 0
108
+
109
+ { memory: memory.to_f / total, resource: resource.to_f / total }
110
+ end
111
+
112
+ def rerank_score(item, query)
113
+ score = item.fetch(:score, 0.0)
114
+ score + lexical_boost(item: item, query: query)
115
+ end
116
+
117
+ def lexical_boost(item:, query:)
118
+ terms = query.to_s.downcase.scan(/[[:alnum:]_\-\p{Han}]+/)
119
+ text = "#{item[:title]} #{item[:snippet]}".downcase
120
+ terms.count { |term| text.include?(term) } * 0.05
121
+ end
122
+
123
+ def dedupe_key(item)
124
+ ref = item[:ref] || {}
125
+ if ref[:document_id] && ref[:section_id]
126
+ "resource:#{ref[:document_id]}:#{ref[:section_id]}:#{ref[:chunk_index]}"
127
+ elsif ref[:memory_item_id]
128
+ "memory:#{ref[:memory_item_id]}"
129
+ elsif ref[:turn_id] && ref[:message_id]
130
+ "memory-turn:#{ref[:turn_id]}:#{ref[:message_id]}"
131
+ else
132
+ "fallback:#{item[:id]}"
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SmartBrain
4
+ module MemoryExtractor
5
+ class Extractor
6
+ def initialize(config:)
7
+ @config = config
8
+ end
9
+
10
+ def extract(session_id:, turn:, entity_frequencies: Hash.new(0))
11
+ events = turn.fetch(:turn_events)
12
+ explain = []
13
+ items = []
14
+
15
+ collect(items, explain, events[:tasks], type: 'tasks', confidence: confidence(:tool_derived), source_turn_id: turn[:id])
16
+ collect(items, explain, events[:decisions], type: 'decisions', confidence: confidence(:user_asserted), source_turn_id: turn[:id])
17
+ collect(items, explain, events[:goals], type: 'goals', confidence: confidence(:user_asserted), source_turn_id: turn[:id])
18
+ collect(items, explain, events[:events], type: 'events', confidence: confidence(:tool_derived), source_turn_id: turn[:id])
19
+
20
+ Array(events[:preferences]).each do |preference|
21
+ key = preference.fetch(:key)
22
+ if preference[:confirmed]
23
+ items << build_item(type: 'preferences', key: key, value_json: preference, source_turn_id: turn[:id], confidence: confidence(:user_asserted))
24
+ explain << "write preferences:#{key}"
25
+ else
26
+ explain << "skip preferences:#{key} not confirmed"
27
+ end
28
+ end
29
+
30
+ Array(events[:entities]).each do |entity|
31
+ key = entity.fetch(:key)
32
+ canonical = (entity[:canonical] || entity[:name]).to_s.downcase
33
+ should_write = entity[:remember] || entity_structure_signal?(entity) || entity_frequencies[canonical] >= freq_threshold
34
+ if should_write
35
+ items << build_item(type: 'entities', key: key, value_json: entity, source_turn_id: turn[:id], confidence: confidence(:inferred))
36
+ explain << "write entities:#{key}"
37
+ else
38
+ explain << "skip entities:#{key} below threshold"
39
+ end
40
+ end
41
+
42
+ Array(events[:retractions]).each do |retraction|
43
+ items << build_item(type: retraction.fetch(:type), key: retraction.fetch(:key), value_json: retraction, source_turn_id: turn[:id], confidence: confidence(:user_asserted), status: 'retracted')
44
+ explain << "retract #{retraction.fetch(:type)}:#{retraction.fetch(:key)}"
45
+ end
46
+
47
+ {
48
+ session_id: session_id,
49
+ items: items,
50
+ explain: explain
51
+ }
52
+ end
53
+
54
+ private
55
+
56
+ attr_reader :config
57
+
58
+ def collect(items, explain, raw_items, type:, confidence:, source_turn_id:)
59
+ Array(raw_items).each do |entry|
60
+ key = entry.fetch(:key)
61
+ items << build_item(type: type, key: key, value_json: entry, source_turn_id: source_turn_id, confidence: confidence)
62
+ explain << "write #{type}:#{key}"
63
+ end
64
+ end
65
+
66
+ def build_item(type:, key:, value_json:, source_turn_id:, confidence:, status: 'active')
67
+ {
68
+ type: type,
69
+ key: key,
70
+ value_json: value_json,
71
+ source_turn_id: source_turn_id,
72
+ confidence: confidence,
73
+ status: status,
74
+ updated_at: Time.now.utc.iso8601
75
+ }
76
+ end
77
+
78
+ def confidence(name)
79
+ config.retention.fetch(:confidence, {}).fetch(name, 0.6)
80
+ end
81
+
82
+ def freq_threshold
83
+ config.retention.dig(:entity_gate, :freq_threshold) || 2
84
+ end
85
+
86
+ def entity_structure_signal?(entity)
87
+ canonical = entity[:canonical].to_s
88
+ canonical.include?('/') || canonical.include?('http') || canonical.include?('.')
89
+ end
90
+ end
91
+ end
92
+ end