smart_brain 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.en.md +173 -0
- data/README.md +173 -0
- data/agents/brain_assistant.rb +11 -0
- data/config/brain.yml +32 -0
- data/conversation_demo.rb +438 -0
- data/db/migrate/001_init.sql +98 -0
- data/example.rb +91 -0
- data/lib/smart_brain/adapters/smart_rag/direct_client.rb +47 -0
- data/lib/smart_brain/adapters/smart_rag/http_client.rb +61 -0
- data/lib/smart_brain/adapters/smart_rag/null_client.rb +22 -0
- data/lib/smart_brain/configuration.rb +41 -0
- data/lib/smart_brain/consolidator/working_summary.rb +102 -0
- data/lib/smart_brain/context_composer/composer.rb +75 -0
- data/lib/smart_brain/contracts/context_package.rb +16 -0
- data/lib/smart_brain/contracts/evidence_pack.rb +16 -0
- data/lib/smart_brain/contracts/retrieval_plan.rb +17 -0
- data/lib/smart_brain/event_store/in_memory.rb +103 -0
- data/lib/smart_brain/fusion/merger.rb +137 -0
- data/lib/smart_brain/memory_extractor/extractor.rb +92 -0
- data/lib/smart_brain/memory_store/in_memory.rb +78 -0
- data/lib/smart_brain/observability/tracker.rb +60 -0
- data/lib/smart_brain/retrieval_planner/planner.rb +122 -0
- data/lib/smart_brain/retrievers/exact_retriever.rb +62 -0
- data/lib/smart_brain/retrievers/memory_retriever.rb +30 -0
- data/lib/smart_brain/retrievers/relational_retriever.rb +53 -0
- data/lib/smart_brain/runtime.rb +195 -0
- data/lib/smart_brain/version.rb +5 -0
- data/lib/smart_brain.rb +35 -0
- data/templates/brain_assistant.erb +5 -0
- data/workers/brain_assistant.rb +9 -0
- metadata +283 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module Adapters
|
|
5
|
+
module SmartRag
|
|
6
|
+
class HttpClient
|
|
7
|
+
def initialize(transport:, timeout_seconds: 2)
|
|
8
|
+
@transport = transport
|
|
9
|
+
@timeout_seconds = timeout_seconds
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def retrieve(plan)
|
|
13
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
14
|
+
raw = transport.call(plan, timeout_seconds: timeout_seconds)
|
|
15
|
+
build_pack(raw: raw, request_id: plan[:request_id], took_ms: elapsed_ms(started_at))
|
|
16
|
+
rescue Timeout::Error
|
|
17
|
+
{
|
|
18
|
+
version: '0.1',
|
|
19
|
+
request_id: plan[:request_id],
|
|
20
|
+
plan_id: "timeout-#{plan[:request_id]}",
|
|
21
|
+
generated_at: Time.now.utc.iso8601,
|
|
22
|
+
evidences: [],
|
|
23
|
+
stats: { candidates: 0, returned: 0, took_ms: elapsed_ms(started_at) },
|
|
24
|
+
explain: { ignored_fields: [] },
|
|
25
|
+
warnings: ['smart_rag timeout; fallback to memory-only evidence']
|
|
26
|
+
}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
attr_reader :transport, :timeout_seconds
|
|
32
|
+
|
|
33
|
+
def build_pack(raw:, request_id:, took_ms:)
|
|
34
|
+
ignored = []
|
|
35
|
+
ignored << 'global_filters.language not supported' unless raw.key?(:supports_language_filter) && raw[:supports_language_filter]
|
|
36
|
+
|
|
37
|
+
{
|
|
38
|
+
version: '0.1',
|
|
39
|
+
request_id: request_id,
|
|
40
|
+
plan_id: raw[:plan_id] || "remote-#{request_id}",
|
|
41
|
+
generated_at: Time.now.utc.iso8601,
|
|
42
|
+
evidences: Array(raw[:evidences]),
|
|
43
|
+
stats: {
|
|
44
|
+
candidates: raw.dig(:stats, :candidates) || Array(raw[:evidences]).size,
|
|
45
|
+
returned: Array(raw[:evidences]).size,
|
|
46
|
+
took_ms: took_ms
|
|
47
|
+
},
|
|
48
|
+
explain: {
|
|
49
|
+
ignored_fields: ignored + Array(raw.dig(:explain, :ignored_fields))
|
|
50
|
+
},
|
|
51
|
+
warnings: Array(raw[:warnings])
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def elapsed_ms(start)
|
|
56
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round(2)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module Adapters
|
|
5
|
+
module SmartRag
|
|
6
|
+
class NullClient
|
|
7
|
+
def retrieve(plan)
|
|
8
|
+
{
|
|
9
|
+
version: '0.1',
|
|
10
|
+
plan_id: "local-#{plan[:request_id]}",
|
|
11
|
+
request_id: plan[:request_id],
|
|
12
|
+
generated_at: Time.now.utc.iso8601,
|
|
13
|
+
evidences: [],
|
|
14
|
+
stats: { candidates: 0, returned: 0, took_ms: 0 },
|
|
15
|
+
explain: { ignored_fields: [] },
|
|
16
|
+
warnings: ['smart_rag client not configured; returned empty evidences']
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
|
|
5
|
+
module SmartBrain
|
|
6
|
+
class Configuration
|
|
7
|
+
DEFAULT_PATH = File.expand_path('../../config/brain.yml', __dir__)
|
|
8
|
+
|
|
9
|
+
def self.load(path = nil)
|
|
10
|
+
file_path = path || DEFAULT_PATH
|
|
11
|
+
data = File.exist?(file_path) ? YAML.safe_load(File.read(file_path), symbolize_names: true) : {}
|
|
12
|
+
new(data || {})
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
attr_reader :raw
|
|
16
|
+
|
|
17
|
+
def initialize(raw)
|
|
18
|
+
@raw = raw
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def policies
|
|
22
|
+
raw.fetch(:policies, {})
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def retention
|
|
26
|
+
policies.fetch(:retention, {})
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def retrieval
|
|
30
|
+
policies.fetch(:retrieval, {})
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def composition
|
|
34
|
+
policies.fetch(:composition, {})
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def observability
|
|
38
|
+
policies.fetch(:observability, {})
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module Consolidator
|
|
5
|
+
class WorkingSummary
|
|
6
|
+
def initialize(config:, clock:)
|
|
7
|
+
@config = config
|
|
8
|
+
@clock = clock
|
|
9
|
+
@summaries = {}
|
|
10
|
+
@last_summary_turn = Hash.new(0)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def update(session_id:, turn_count:, recent_turns:, memory_items:, stage_event: false)
|
|
14
|
+
reason = trigger_reason(session_id: session_id, turn_count: turn_count, recent_turns: recent_turns, stage_event: stage_event)
|
|
15
|
+
return latest_summary(session_id).merge(triggered: false, trigger_reason: 'not_triggered') unless reason
|
|
16
|
+
|
|
17
|
+
summary = {
|
|
18
|
+
summary_version: next_version(session_id),
|
|
19
|
+
summary_source_turn_range: source_turn_range(turn_count),
|
|
20
|
+
summary_generated_at: clock.call.iso8601,
|
|
21
|
+
text: build_text(memory_items),
|
|
22
|
+
triggered: true,
|
|
23
|
+
trigger_reason: reason
|
|
24
|
+
}
|
|
25
|
+
summaries[session_id] = summary
|
|
26
|
+
last_summary_turn[session_id] = turn_count
|
|
27
|
+
summary
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def latest_summary(session_id)
|
|
31
|
+
summaries[session_id] || default_summary
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
attr_reader :config, :clock, :summaries, :last_summary_turn
|
|
37
|
+
|
|
38
|
+
def trigger_reason(session_id:, turn_count:, recent_turns:, stage_event:)
|
|
39
|
+
turns_since_last = turn_count - last_summary_turn[session_id]
|
|
40
|
+
threshold = config.retention.fetch(:summarize_after_turns, 12)
|
|
41
|
+
return 'turn_threshold' if turns_since_last >= threshold
|
|
42
|
+
|
|
43
|
+
token_limit = config.composition.fetch(:token_limit, 8192)
|
|
44
|
+
token_used = estimate_tokens(recent_turns)
|
|
45
|
+
return 'token_pressure' if token_used > (token_limit * 0.7)
|
|
46
|
+
return 'stage_event' if stage_event
|
|
47
|
+
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def estimate_tokens(recent_turns)
|
|
52
|
+
recent_turns.sum { |t| t[:content].to_s.length / 4 }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def next_version(session_id)
|
|
56
|
+
previous = summaries[session_id]
|
|
57
|
+
previous ? previous[:summary_version] + 1 : 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def source_turn_range(turn_count)
|
|
61
|
+
{ from: [turn_count - (config.retention.fetch(:summarize_after_turns, 12) - 1), 1].max, to: turn_count }
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def build_text(memory_items)
|
|
65
|
+
goals = memory_items.select { |i| i[:type] == 'goals' }
|
|
66
|
+
tasks = memory_items.select { |i| i[:type] == 'tasks' }
|
|
67
|
+
decisions = memory_items.select { |i| i[:type] == 'decisions' }
|
|
68
|
+
refs = memory_items.select { |i| i[:type] == 'entities' }
|
|
69
|
+
|
|
70
|
+
[
|
|
71
|
+
'Goals:',
|
|
72
|
+
*to_lines(goals, fallback: '- None'),
|
|
73
|
+
'Decisions:',
|
|
74
|
+
*to_lines(decisions, fallback: '- None'),
|
|
75
|
+
'Tasks:',
|
|
76
|
+
*to_lines(tasks, fallback: '- None'),
|
|
77
|
+
'Key References:',
|
|
78
|
+
*to_lines(refs, fallback: '- None'),
|
|
79
|
+
'Open Questions:',
|
|
80
|
+
'- None'
|
|
81
|
+
].join("\n")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def to_lines(items, fallback:)
|
|
85
|
+
return [fallback] if items.empty?
|
|
86
|
+
|
|
87
|
+
items.first(5).map { |i| "- #{i[:key]}" }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def default_summary
|
|
91
|
+
{
|
|
92
|
+
summary_version: 0,
|
|
93
|
+
summary_source_turn_range: { from: 0, to: 0 },
|
|
94
|
+
summary_generated_at: clock.call.iso8601,
|
|
95
|
+
text: "Goals:\n- None\nDecisions:\n- None\nTasks:\n- None\nKey References:\n- None\nOpen Questions:\n- None",
|
|
96
|
+
triggered: false,
|
|
97
|
+
trigger_reason: 'empty'
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
|
|
5
|
+
module SmartBrain
|
|
6
|
+
module ContextComposer
|
|
7
|
+
class Composer
|
|
8
|
+
def initialize(config:, clock:)
|
|
9
|
+
@config = config
|
|
10
|
+
@clock = clock
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def compose(session_id:, user_message:, plan:, plan_id:, summary:, recent_turns:, evidence_bundle:)
|
|
14
|
+
context_id = SecureRandom.uuid
|
|
15
|
+
evidence = evidence_bundle.fetch(:selected, [])
|
|
16
|
+
used_estimate = estimate_tokens(summary: summary[:text], recent_turns: recent_turns, evidence: evidence, user_message: user_message)
|
|
17
|
+
token_limit = config.composition.fetch(:token_limit, 8192)
|
|
18
|
+
|
|
19
|
+
{
|
|
20
|
+
version: '0.1',
|
|
21
|
+
context_id: context_id,
|
|
22
|
+
session_id: session_id,
|
|
23
|
+
created_at: clock.call.iso8601,
|
|
24
|
+
system_blocks: [],
|
|
25
|
+
developer_blocks: [],
|
|
26
|
+
working_summary: summary[:text],
|
|
27
|
+
recent_turns: recent_turns.first(config.composition.fetch(:recent_turns_max, 8)),
|
|
28
|
+
evidence: evidence,
|
|
29
|
+
user_message: { role: 'user', content: user_message },
|
|
30
|
+
constraints: {
|
|
31
|
+
token_budget: {
|
|
32
|
+
limit: token_limit,
|
|
33
|
+
used_estimate: used_estimate
|
|
34
|
+
},
|
|
35
|
+
diversity: {
|
|
36
|
+
by_document: config.composition.dig(:diversity, :by_document) || 3,
|
|
37
|
+
by_source: config.composition.dig(:diversity, :by_source_uri) || 2
|
|
38
|
+
},
|
|
39
|
+
truncation: {
|
|
40
|
+
snippets_max_chars: config.composition.fetch(:max_snippet_chars, 800),
|
|
41
|
+
recent_turns_max: config.composition.fetch(:recent_turns_max, 8)
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
debug: {
|
|
45
|
+
trace: {
|
|
46
|
+
context_id: context_id,
|
|
47
|
+
request_id: plan[:request_id],
|
|
48
|
+
plan_id: plan_id
|
|
49
|
+
},
|
|
50
|
+
planner: {
|
|
51
|
+
request_id: plan[:request_id],
|
|
52
|
+
purpose: plan[:purpose],
|
|
53
|
+
queries: plan[:queries].map { |q| q[:text] }
|
|
54
|
+
},
|
|
55
|
+
why_selected: evidence.map { |e| "#{e[:id]} score=#{e[:score]} source=#{e[:source]}" },
|
|
56
|
+
ignored: evidence_bundle[:ignored_fields] || [],
|
|
57
|
+
dropped: (evidence_bundle[:dropped] || []).map { |e| { id: e[:id], reason: e[:drop_reason] } }
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
attr_reader :config, :clock
|
|
65
|
+
|
|
66
|
+
def estimate_tokens(summary:, recent_turns:, evidence:, user_message:)
|
|
67
|
+
text_size = summary.to_s.length
|
|
68
|
+
text_size += recent_turns.sum { |t| t[:content].to_s.length }
|
|
69
|
+
text_size += evidence.sum { |e| e[:snippet].to_s.length }
|
|
70
|
+
text_size += user_message.to_s.length
|
|
71
|
+
(text_size / 4.0).ceil
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module Contracts
|
|
5
|
+
class ContextPackage
|
|
6
|
+
REQUIRED_KEYS = %i[version context_id session_id created_at user_message evidence].freeze
|
|
7
|
+
|
|
8
|
+
def self.validate!(pkg)
|
|
9
|
+
missing = REQUIRED_KEYS.reject { |key| pkg.key?(key) }
|
|
10
|
+
raise ArgumentError, "invalid context package: missing #{missing.join(', ')}" unless missing.empty?
|
|
11
|
+
|
|
12
|
+
true
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module Contracts
|
|
5
|
+
class EvidencePack
|
|
6
|
+
REQUIRED_KEYS = %i[version request_id plan_id generated_at evidences].freeze
|
|
7
|
+
|
|
8
|
+
def self.validate!(pack)
|
|
9
|
+
missing = REQUIRED_KEYS.reject { |key| pack.key?(key) }
|
|
10
|
+
raise ArgumentError, "invalid evidence pack: missing #{missing.join(', ')}" unless missing.empty?
|
|
11
|
+
|
|
12
|
+
true
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module Contracts
|
|
5
|
+
class RetrievalPlan
|
|
6
|
+
REQUIRED_KEYS = %i[version request_id purpose queries budget].freeze
|
|
7
|
+
|
|
8
|
+
def self.validate!(plan)
|
|
9
|
+
missing = REQUIRED_KEYS.reject { |key| plan.key?(key) }
|
|
10
|
+
raise ArgumentError, "invalid retrieval plan: missing #{missing.join(', ')}" unless missing.empty?
|
|
11
|
+
raise ArgumentError, 'invalid retrieval plan: queries must not be empty' if Array(plan[:queries]).empty?
|
|
12
|
+
|
|
13
|
+
true
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
|
|
5
|
+
module SmartBrain
|
|
6
|
+
module EventStore
|
|
7
|
+
class InMemory
|
|
8
|
+
def initialize
|
|
9
|
+
@sessions = {}
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def append_turn(session_id:, turn_events:, created_at:)
|
|
13
|
+
session = sessions[session_id] ||= { seq: 0, turns: [] }
|
|
14
|
+
session[:seq] += 1
|
|
15
|
+
turn_id = SecureRandom.uuid
|
|
16
|
+
messages = normalize_messages(turn_id: turn_id, messages: turn_events[:messages] || [], created_at: created_at)
|
|
17
|
+
refs = normalize_refs(turn_id: turn_id, refs: turn_events[:refs] || [], created_at: created_at)
|
|
18
|
+
turn = {
|
|
19
|
+
id: turn_id,
|
|
20
|
+
session_id: session_id,
|
|
21
|
+
seq: session[:seq],
|
|
22
|
+
created_at: created_at.iso8601,
|
|
23
|
+
turn_events: turn_events.merge(messages: messages, refs: refs)
|
|
24
|
+
}
|
|
25
|
+
session[:turns] << turn
|
|
26
|
+
turn
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def turns_count(session_id:)
|
|
30
|
+
(sessions[session_id] || { turns: [] })[:turns].size
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def recent_turns(session_id:, limit:)
|
|
34
|
+
session = sessions[session_id]
|
|
35
|
+
return [] unless session
|
|
36
|
+
|
|
37
|
+
session[:turns].last(limit).flat_map do |turn|
|
|
38
|
+
(turn.dig(:turn_events, :messages) || []).map do |m|
|
|
39
|
+
{
|
|
40
|
+
turn_id: turn[:id],
|
|
41
|
+
message_id: m[:id],
|
|
42
|
+
role: m[:role],
|
|
43
|
+
content: m[:content],
|
|
44
|
+
created_at: m[:created_at]
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def recent_refs(session_id:, limit:)
|
|
51
|
+
session = sessions[session_id]
|
|
52
|
+
return [] unless session
|
|
53
|
+
|
|
54
|
+
session[:turns].last(limit).flat_map { |t| t.dig(:turn_events, :refs) || [] }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def entity_frequencies(session_id:, window_turns:)
|
|
58
|
+
session = sessions[session_id]
|
|
59
|
+
return Hash.new(0) unless session
|
|
60
|
+
|
|
61
|
+
freq = Hash.new(0)
|
|
62
|
+
session[:turns].last(window_turns).each do |turn|
|
|
63
|
+
Array(turn.dig(:turn_events, :entities)).each do |entity|
|
|
64
|
+
canonical = entity[:canonical] || entity[:name]
|
|
65
|
+
freq[canonical.to_s.downcase] += 1
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
freq
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def all_turns(session_id:)
|
|
72
|
+
return sessions.values.flat_map { |s| s[:turns] } if session_id.nil?
|
|
73
|
+
|
|
74
|
+
(sessions[session_id] || { turns: [] })[:turns]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
attr_reader :sessions
|
|
80
|
+
|
|
81
|
+
def normalize_messages(turn_id:, messages:, created_at:)
|
|
82
|
+
messages.map do |m|
|
|
83
|
+
m.merge(
|
|
84
|
+
id: m[:id] || SecureRandom.uuid,
|
|
85
|
+
turn_id: turn_id,
|
|
86
|
+
created_at: m[:created_at] || created_at.iso8601
|
|
87
|
+
)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def normalize_refs(turn_id:, refs:, created_at:)
|
|
92
|
+
refs.map do |ref|
|
|
93
|
+
ref.merge(
|
|
94
|
+
id: ref[:id] || SecureRandom.uuid,
|
|
95
|
+
turn_id: turn_id,
|
|
96
|
+
created_at: ref[:created_at] || created_at.iso8601,
|
|
97
|
+
ref_meta_json: ref[:ref_meta_json] || {}
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module Fusion
|
|
5
|
+
class Merger
|
|
6
|
+
def initialize(config:)
|
|
7
|
+
@config = config
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def merge(query:, memory_evidence:, resource_evidence:)
|
|
11
|
+
combined = (memory_evidence + normalize_resource(resource_evidence))
|
|
12
|
+
deduped = dedupe(combined)
|
|
13
|
+
ranked = deduped.sort_by { |item| -rerank_score(item, query) }
|
|
14
|
+
diversified, dropped = apply_diversity(ranked)
|
|
15
|
+
selected = apply_budget(diversified)
|
|
16
|
+
|
|
17
|
+
{
|
|
18
|
+
selected: selected,
|
|
19
|
+
dropped: dropped,
|
|
20
|
+
ignored_fields: []
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
attr_reader :config
|
|
27
|
+
|
|
28
|
+
def normalize_resource(items)
|
|
29
|
+
items.map do |item|
|
|
30
|
+
item.merge(
|
|
31
|
+
source: 'resource',
|
|
32
|
+
score: item.fetch(:score, item.dig(:signals, :rerank_score) || item.dig(:signals, :rrf_score) || 0.4),
|
|
33
|
+
ref: item[:ref] || {
|
|
34
|
+
document_id: item[:document_id],
|
|
35
|
+
section_id: item[:section_id],
|
|
36
|
+
chunk_index: item.dig(:metadata, :chunk_index)
|
|
37
|
+
}
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def dedupe(items)
|
|
43
|
+
deduped = {}
|
|
44
|
+
items.each do |item|
|
|
45
|
+
key = dedupe_key(item)
|
|
46
|
+
existing = deduped[key]
|
|
47
|
+
deduped[key] = item if existing.nil? || item.fetch(:score, 0.0) > existing.fetch(:score, 0.0)
|
|
48
|
+
end
|
|
49
|
+
deduped.values
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def apply_diversity(items)
|
|
53
|
+
by_document = config.composition.dig(:diversity, :by_document) || 3
|
|
54
|
+
by_source = config.composition.dig(:diversity, :by_source_uri) || 2
|
|
55
|
+
|
|
56
|
+
doc_counter = Hash.new(0)
|
|
57
|
+
source_counter = Hash.new(0)
|
|
58
|
+
kept = []
|
|
59
|
+
dropped = []
|
|
60
|
+
|
|
61
|
+
items.each do |item|
|
|
62
|
+
ref = item[:ref] || {}
|
|
63
|
+
document_key = ref[:document_id] || item[:title]
|
|
64
|
+
source_prefix = item[:source_uri].to_s.split('/')[0, 3].join('/')
|
|
65
|
+
source_key = source_prefix.empty? ? item[:source_uri] : source_prefix
|
|
66
|
+
|
|
67
|
+
if doc_counter[document_key] >= by_document || source_counter[source_key] >= by_source
|
|
68
|
+
dropped << item.merge(drop_reason: 'diversity')
|
|
69
|
+
next
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
doc_counter[document_key] += 1
|
|
73
|
+
source_counter[source_key] += 1
|
|
74
|
+
kept << item
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
[kept, dropped]
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def apply_budget(items)
|
|
81
|
+
limit = config.composition.fetch(:evidence_max_items, 12)
|
|
82
|
+
max_chars = config.composition.fetch(:max_snippet_chars, 800)
|
|
83
|
+
ratio = parse_ratio(config.composition.dig(:diversity, :memory_resource_ratio) || '40/60')
|
|
84
|
+
|
|
85
|
+
memory_limit = (limit * ratio[:memory]).floor
|
|
86
|
+
resource_limit = limit - memory_limit
|
|
87
|
+
selected = []
|
|
88
|
+
|
|
89
|
+
memory_items = items.select { |i| i[:source] == 'memory' }.first(memory_limit)
|
|
90
|
+
resource_items = items.select { |i| i[:source] == 'resource' }.first(resource_limit)
|
|
91
|
+
selected.concat(memory_items).concat(resource_items)
|
|
92
|
+
|
|
93
|
+
if selected.length < limit
|
|
94
|
+
leftovers = (items - selected).first(limit - selected.length)
|
|
95
|
+
selected.concat(leftovers)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
selected.first(limit).map do |item|
|
|
99
|
+
snippet = item[:snippet].to_s
|
|
100
|
+
item.merge(snippet: snippet.length > max_chars ? "#{snippet[0...max_chars]}..." : snippet)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def parse_ratio(text)
|
|
105
|
+
memory, resource = text.to_s.split('/').map(&:to_i)
|
|
106
|
+
total = memory + resource
|
|
107
|
+
return { memory: 0.4, resource: 0.6 } if total <= 0
|
|
108
|
+
|
|
109
|
+
{ memory: memory.to_f / total, resource: resource.to_f / total }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def rerank_score(item, query)
|
|
113
|
+
score = item.fetch(:score, 0.0)
|
|
114
|
+
score + lexical_boost(item: item, query: query)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def lexical_boost(item:, query:)
|
|
118
|
+
terms = query.to_s.downcase.scan(/[[:alnum:]_\-\p{Han}]+/)
|
|
119
|
+
text = "#{item[:title]} #{item[:snippet]}".downcase
|
|
120
|
+
terms.count { |term| text.include?(term) } * 0.05
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def dedupe_key(item)
|
|
124
|
+
ref = item[:ref] || {}
|
|
125
|
+
if ref[:document_id] && ref[:section_id]
|
|
126
|
+
"resource:#{ref[:document_id]}:#{ref[:section_id]}:#{ref[:chunk_index]}"
|
|
127
|
+
elsif ref[:memory_item_id]
|
|
128
|
+
"memory:#{ref[:memory_item_id]}"
|
|
129
|
+
elsif ref[:turn_id] && ref[:message_id]
|
|
130
|
+
"memory-turn:#{ref[:turn_id]}:#{ref[:message_id]}"
|
|
131
|
+
else
|
|
132
|
+
"fallback:#{item[:id]}"
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SmartBrain
|
|
4
|
+
module MemoryExtractor
|
|
5
|
+
class Extractor
|
|
6
|
+
def initialize(config:)
|
|
7
|
+
@config = config
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def extract(session_id:, turn:, entity_frequencies: Hash.new(0))
|
|
11
|
+
events = turn.fetch(:turn_events)
|
|
12
|
+
explain = []
|
|
13
|
+
items = []
|
|
14
|
+
|
|
15
|
+
collect(items, explain, events[:tasks], type: 'tasks', confidence: confidence(:tool_derived), source_turn_id: turn[:id])
|
|
16
|
+
collect(items, explain, events[:decisions], type: 'decisions', confidence: confidence(:user_asserted), source_turn_id: turn[:id])
|
|
17
|
+
collect(items, explain, events[:goals], type: 'goals', confidence: confidence(:user_asserted), source_turn_id: turn[:id])
|
|
18
|
+
collect(items, explain, events[:events], type: 'events', confidence: confidence(:tool_derived), source_turn_id: turn[:id])
|
|
19
|
+
|
|
20
|
+
Array(events[:preferences]).each do |preference|
|
|
21
|
+
key = preference.fetch(:key)
|
|
22
|
+
if preference[:confirmed]
|
|
23
|
+
items << build_item(type: 'preferences', key: key, value_json: preference, source_turn_id: turn[:id], confidence: confidence(:user_asserted))
|
|
24
|
+
explain << "write preferences:#{key}"
|
|
25
|
+
else
|
|
26
|
+
explain << "skip preferences:#{key} not confirmed"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
Array(events[:entities]).each do |entity|
|
|
31
|
+
key = entity.fetch(:key)
|
|
32
|
+
canonical = (entity[:canonical] || entity[:name]).to_s.downcase
|
|
33
|
+
should_write = entity[:remember] || entity_structure_signal?(entity) || entity_frequencies[canonical] >= freq_threshold
|
|
34
|
+
if should_write
|
|
35
|
+
items << build_item(type: 'entities', key: key, value_json: entity, source_turn_id: turn[:id], confidence: confidence(:inferred))
|
|
36
|
+
explain << "write entities:#{key}"
|
|
37
|
+
else
|
|
38
|
+
explain << "skip entities:#{key} below threshold"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
Array(events[:retractions]).each do |retraction|
|
|
43
|
+
items << build_item(type: retraction.fetch(:type), key: retraction.fetch(:key), value_json: retraction, source_turn_id: turn[:id], confidence: confidence(:user_asserted), status: 'retracted')
|
|
44
|
+
explain << "retract #{retraction.fetch(:type)}:#{retraction.fetch(:key)}"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
{
|
|
48
|
+
session_id: session_id,
|
|
49
|
+
items: items,
|
|
50
|
+
explain: explain
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
attr_reader :config
|
|
57
|
+
|
|
58
|
+
def collect(items, explain, raw_items, type:, confidence:, source_turn_id:)
|
|
59
|
+
Array(raw_items).each do |entry|
|
|
60
|
+
key = entry.fetch(:key)
|
|
61
|
+
items << build_item(type: type, key: key, value_json: entry, source_turn_id: source_turn_id, confidence: confidence)
|
|
62
|
+
explain << "write #{type}:#{key}"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_item(type:, key:, value_json:, source_turn_id:, confidence:, status: 'active')
|
|
67
|
+
{
|
|
68
|
+
type: type,
|
|
69
|
+
key: key,
|
|
70
|
+
value_json: value_json,
|
|
71
|
+
source_turn_id: source_turn_id,
|
|
72
|
+
confidence: confidence,
|
|
73
|
+
status: status,
|
|
74
|
+
updated_at: Time.now.utc.iso8601
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def confidence(name)
|
|
79
|
+
config.retention.fetch(:confidence, {}).fetch(name, 0.6)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def freq_threshold
|
|
83
|
+
config.retention.dig(:entity_gate, :freq_threshold) || 2
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def entity_structure_signal?(entity)
|
|
87
|
+
canonical = entity[:canonical].to_s
|
|
88
|
+
canonical.include?('/') || canonical.include?('http') || canonical.include?('.')
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|