legion-apollo 0.3.6 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/README.md +1 -0
- data/lib/legion/apollo/helpers/confidence.rb +6 -1
- data/lib/legion/apollo/helpers/similarity.rb +23 -2
- data/lib/legion/apollo/helpers/tag_normalizer.rb +22 -2
- data/lib/legion/apollo/local/graph.rb +218 -54
- data/lib/legion/apollo/local/migrations/003_harden_graph_relationships.rb +23 -0
- data/lib/legion/apollo/local.rb +401 -86
- data/lib/legion/apollo/messages/access_boost.rb +9 -1
- data/lib/legion/apollo/messages/ingest.rb +9 -1
- data/lib/legion/apollo/messages/query.rb +9 -1
- data/lib/legion/apollo/messages/writeback.rb +9 -1
- data/lib/legion/apollo/routes.rb +53 -18
- data/lib/legion/apollo/runners/request.rb +5 -0
- data/lib/legion/apollo/settings.rb +0 -3
- data/lib/legion/apollo/version.rb +1 -1
- data/lib/legion/apollo.rb +234 -35
- metadata +4 -3
data/lib/legion/apollo/local.rb
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'digest'
|
|
4
|
+
require 'legion/logging'
|
|
5
|
+
require 'socket'
|
|
4
6
|
require 'time'
|
|
5
7
|
require_relative 'local/graph'
|
|
8
|
+
require_relative 'helpers/similarity'
|
|
9
|
+
require_relative 'helpers/tag_normalizer'
|
|
6
10
|
|
|
7
11
|
module Legion
|
|
8
12
|
module Apollo
|
|
@@ -10,93 +14,112 @@ module Legion
|
|
|
10
14
|
# Mirrors Legion::Apollo's public API but stores locally.
|
|
11
15
|
module Local # rubocop:disable Metrics/ModuleLength
|
|
12
16
|
MIGRATION_PATH = File.expand_path('local/migrations', __dir__).freeze
|
|
17
|
+
LIFECYCLE_MUTEX = Mutex.new
|
|
18
|
+
WRITE_MUTEX = Mutex.new
|
|
19
|
+
SEED_MUTEX = Mutex.new
|
|
20
|
+
HYDRATION_MUTEX = Mutex.new
|
|
13
21
|
|
|
14
22
|
class << self # rubocop:disable Metrics/ClassLength
|
|
15
|
-
|
|
16
|
-
return if @started
|
|
17
|
-
return unless local_enabled?
|
|
18
|
-
return unless data_local_available?
|
|
23
|
+
include Legion::Logging::Helper
|
|
19
24
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
25
|
+
def start
|
|
26
|
+
LIFECYCLE_MUTEX.synchronize { start_without_lock }
|
|
27
|
+
rescue StandardError => e
|
|
28
|
+
handle_exception(e, level: :error, operation: 'apollo.local.start')
|
|
29
|
+
@started = false
|
|
23
30
|
end
|
|
24
31
|
|
|
25
32
|
def shutdown
|
|
33
|
+
LIFECYCLE_MUTEX.synchronize do
|
|
34
|
+
@started = false
|
|
35
|
+
@seeded = false
|
|
36
|
+
log.info 'Legion::Apollo::Local shutdown'
|
|
37
|
+
end
|
|
38
|
+
rescue StandardError => e
|
|
39
|
+
handle_exception(e, level: :warn, operation: 'apollo.local.shutdown')
|
|
26
40
|
@started = false
|
|
27
|
-
|
|
41
|
+
@seeded = false
|
|
28
42
|
end
|
|
29
43
|
|
|
30
44
|
def started?
|
|
31
45
|
@started == true
|
|
32
46
|
end
|
|
33
47
|
|
|
34
|
-
def ingest(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength
|
|
48
|
+
def ingest(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength
|
|
35
49
|
return not_started_error unless started?
|
|
36
50
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
42
|
-
expires = compute_expires_at
|
|
43
|
-
|
|
44
|
-
row = {
|
|
45
|
-
content: content,
|
|
46
|
-
content_hash: hash,
|
|
47
|
-
tags: Legion::JSON.dump(Array(tags).first(local_setting(:max_tags, 20))),
|
|
48
|
-
embedding: embedding ? Legion::JSON.dump(embedding) : nil,
|
|
49
|
-
embedded_at: embedded_at,
|
|
50
|
-
source_channel: opts[:source_channel],
|
|
51
|
-
source_agent: opts[:source_agent],
|
|
52
|
-
submitted_by: opts[:submitted_by],
|
|
53
|
-
confidence: opts[:confidence] || 1.0,
|
|
54
|
-
expires_at: expires,
|
|
55
|
-
created_at: now,
|
|
56
|
-
updated_at: now
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
id = db[:local_knowledge].insert(row)
|
|
60
|
-
sync_fts(id, content, row[:tags])
|
|
61
|
-
|
|
62
|
-
{ success: true, mode: :local, id: id }
|
|
51
|
+
tags = normalize_tags_input(tags)
|
|
52
|
+
WRITE_MUTEX.synchronize do
|
|
53
|
+
ingest_without_lock(content: content, tags: tags, **opts)
|
|
54
|
+
end
|
|
63
55
|
rescue StandardError => e
|
|
56
|
+
handle_exception(
|
|
57
|
+
e,
|
|
58
|
+
level: :error,
|
|
59
|
+
operation: 'apollo.local.ingest',
|
|
60
|
+
tags: Array(tags).size,
|
|
61
|
+
source_channel: opts[:source_channel]
|
|
62
|
+
)
|
|
64
63
|
{ success: false, error: e.message }
|
|
65
64
|
end
|
|
66
65
|
|
|
67
|
-
def upsert(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
66
|
+
def upsert(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
68
67
|
return not_started_error unless started?
|
|
69
68
|
|
|
70
|
-
sorted_tags =
|
|
69
|
+
sorted_tags = normalize_tags_input(tags).sort
|
|
71
70
|
tag_json = Legion::JSON.dump(sorted_tags)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
71
|
+
WRITE_MUTEX.synchronize do
|
|
72
|
+
existing = db[:local_knowledge].where(tags: tag_json).first
|
|
73
|
+
|
|
74
|
+
if existing
|
|
75
|
+
update_upsert_entry(existing, content, tag_json, opts)
|
|
76
|
+
else
|
|
77
|
+
result = ingest_without_lock(content: content, tags: sorted_tags, **opts)
|
|
78
|
+
result[:mode] = :inserted if result[:success] && result[:mode] != :deduplicated
|
|
79
|
+
result
|
|
80
|
+
end
|
|
80
81
|
end
|
|
81
82
|
rescue StandardError => e
|
|
82
|
-
|
|
83
|
+
handle_exception(
|
|
84
|
+
e,
|
|
85
|
+
level: :warn,
|
|
86
|
+
operation: 'apollo.local.upsert',
|
|
87
|
+
tags: Array(tags).size,
|
|
88
|
+
source_channel: opts[:source_channel]
|
|
89
|
+
)
|
|
83
90
|
{ success: false, error: e.message }
|
|
84
91
|
end
|
|
85
92
|
|
|
86
93
|
def query(text:, limit: nil, min_confidence: nil, tags: nil, **) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
87
94
|
return not_started_error unless started?
|
|
88
95
|
|
|
96
|
+
text = normalize_text_input(text)
|
|
97
|
+
tags = normalize_tags_input(tags)
|
|
89
98
|
limit ||= local_setting(:default_limit, 5)
|
|
90
99
|
min_confidence ||= local_setting(:min_confidence, 0.3)
|
|
91
100
|
multiplier = local_setting(:fts_candidate_multiplier, 3)
|
|
101
|
+
log.info do
|
|
102
|
+
"Apollo::Local query executing text_length=#{text.to_s.length} " \
|
|
103
|
+
"limit=#{limit} min_confidence=#{min_confidence} tag_count=#{Array(tags).size}"
|
|
104
|
+
end
|
|
105
|
+
log.debug { "Apollo::Local query limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size}" }
|
|
92
106
|
|
|
93
107
|
candidates = fts_search(text, limit: limit * multiplier)
|
|
94
108
|
candidates = filter_candidates(candidates, min_confidence: min_confidence, tags: tags)
|
|
95
109
|
candidates = cosine_rerank(text, candidates) if can_rerank?
|
|
96
110
|
results = candidates.first(limit)
|
|
97
111
|
|
|
112
|
+
log.info { "Apollo::Local query completed count=#{results.size}" }
|
|
98
113
|
{ success: true, results: results, count: results.size, mode: :local }
|
|
99
114
|
rescue StandardError => e
|
|
115
|
+
handle_exception(
|
|
116
|
+
e,
|
|
117
|
+
level: :error,
|
|
118
|
+
operation: 'apollo.local.query',
|
|
119
|
+
limit: limit,
|
|
120
|
+
min_confidence: min_confidence,
|
|
121
|
+
tag_count: Array(tags).size
|
|
122
|
+
)
|
|
100
123
|
{ success: false, error: e.message }
|
|
101
124
|
end
|
|
102
125
|
|
|
@@ -109,28 +132,97 @@ module Legion
|
|
|
109
132
|
end
|
|
110
133
|
|
|
111
134
|
def reset!
|
|
112
|
-
|
|
113
|
-
|
|
135
|
+
LIFECYCLE_MUTEX.synchronize do
|
|
136
|
+
@started = false
|
|
137
|
+
@seeded = false
|
|
138
|
+
end
|
|
114
139
|
end
|
|
115
140
|
|
|
116
141
|
def seed_self_knowledge
|
|
117
142
|
return unless started?
|
|
118
|
-
return if @seeded
|
|
119
143
|
|
|
120
|
-
|
|
121
|
-
return if files.empty?
|
|
122
|
-
|
|
123
|
-
count = seed_files(files)
|
|
124
|
-
@seeded = true
|
|
125
|
-
Legion::Logging.info("Apollo::Local seeded #{count} self-knowledge files") if defined?(Legion::Logging)
|
|
144
|
+
SEED_MUTEX.synchronize { seed_self_knowledge_without_lock }
|
|
126
145
|
rescue StandardError => e
|
|
127
|
-
|
|
146
|
+
handle_exception(e, level: :warn, operation: 'apollo.local.seed_self_knowledge')
|
|
128
147
|
end
|
|
129
148
|
|
|
130
149
|
def seeded?
|
|
131
150
|
@seeded == true
|
|
132
151
|
end
|
|
133
152
|
|
|
153
|
+
def query_by_tags(tags:, limit: 50) # rubocop:disable Metrics/MethodLength
|
|
154
|
+
return { success: false, error: :not_started } unless started?
|
|
155
|
+
|
|
156
|
+
tags = normalize_tags_input(tags)
|
|
157
|
+
results = query_by_tags_via_sql(tags: tags, limit: limit)
|
|
158
|
+
|
|
159
|
+
log.info { "Apollo::Local query_by_tags completed tag_count=#{tags.size} count=#{results.size}" }
|
|
160
|
+
{ success: true, results: results, count: results.size }
|
|
161
|
+
rescue StandardError => e
|
|
162
|
+
handle_exception(
|
|
163
|
+
e,
|
|
164
|
+
level: :error,
|
|
165
|
+
operation: 'apollo.local.query_by_tags',
|
|
166
|
+
tag_count: tags.size,
|
|
167
|
+
limit: limit
|
|
168
|
+
)
|
|
169
|
+
{ success: false, error: e.message }
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def promote_to_global(tags:, min_confidence: 0.6) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
173
|
+
return { success: false, error: :not_started } unless started?
|
|
174
|
+
|
|
175
|
+
tags = normalize_tags_input(tags)
|
|
176
|
+
entries = query_by_tags(tags: tags)
|
|
177
|
+
unless entries[:success] && entries[:results]&.any?
|
|
178
|
+
log.info { "Apollo::Local promote_to_global skipped tag_count=#{tags.size} reason=no_entries" }
|
|
179
|
+
return { success: true, promoted: 0 }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
promoted = 0
|
|
183
|
+
entries[:results].each do |entry|
|
|
184
|
+
next if entry[:confidence].to_f < min_confidence
|
|
185
|
+
|
|
186
|
+
entry_tags = parse_tags(entry[:tags])
|
|
187
|
+
hostname = begin
|
|
188
|
+
::Socket.gethostname
|
|
189
|
+
rescue StandardError => e
|
|
190
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.resolve_hostname')
|
|
191
|
+
'unknown'
|
|
192
|
+
end
|
|
193
|
+
result = Legion::Apollo.ingest(
|
|
194
|
+
content: entry[:content],
|
|
195
|
+
tags: entry_tags + ['promoted_from_local'],
|
|
196
|
+
source_channel: 'local_promotion',
|
|
197
|
+
submitted_by: "node:#{hostname}",
|
|
198
|
+
confidence: entry[:confidence],
|
|
199
|
+
scope: :global
|
|
200
|
+
)
|
|
201
|
+
promoted += 1 if result[:success]
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
log.info { "Apollo::Local promote_to_global completed promoted=#{promoted} tag_count=#{tags.size}" }
|
|
205
|
+
{ success: true, promoted: promoted }
|
|
206
|
+
rescue StandardError => e
|
|
207
|
+
handle_exception(
|
|
208
|
+
e,
|
|
209
|
+
level: :error,
|
|
210
|
+
operation: 'apollo.local.promote_to_global',
|
|
211
|
+
tag_count: tags.size,
|
|
212
|
+
min_confidence: min_confidence
|
|
213
|
+
)
|
|
214
|
+
{ success: false, error: e.message }
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def hydrate_from_global
|
|
218
|
+
return { success: false, error: :not_started } unless started?
|
|
219
|
+
|
|
220
|
+
HYDRATION_MUTEX.synchronize { hydrate_from_global_without_lock }
|
|
221
|
+
rescue StandardError => e
|
|
222
|
+
handle_exception(e, level: :error, operation: 'apollo.local.hydrate_from_global')
|
|
223
|
+
{ success: false, error: e.message }
|
|
224
|
+
end
|
|
225
|
+
|
|
134
226
|
private
|
|
135
227
|
|
|
136
228
|
def self_knowledge_files
|
|
@@ -162,15 +254,17 @@ module Legion
|
|
|
162
254
|
end
|
|
163
255
|
|
|
164
256
|
def ingest_global(content:, tags:)
|
|
257
|
+
log.debug { "Apollo::Local forwarding seed entry to global tag_count=#{Array(tags).size}" }
|
|
165
258
|
Legion::Apollo.ingest(content: content, tags: tags, source_channel: 'self-knowledge',
|
|
166
259
|
submitted_by: 'legion-apollo', confidence: 0.9, scope: :global)
|
|
167
260
|
rescue StandardError => e
|
|
168
|
-
|
|
261
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.ingest_global_seed', tag_count: Array(tags).size)
|
|
169
262
|
end
|
|
170
263
|
|
|
171
264
|
def global_available?
|
|
172
265
|
defined?(Legion::Apollo) && Legion::Apollo.started? && Legion::Apollo.respond_to?(:ingest)
|
|
173
|
-
rescue StandardError
|
|
266
|
+
rescue StandardError => e
|
|
267
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.global_available')
|
|
174
268
|
false
|
|
175
269
|
end
|
|
176
270
|
|
|
@@ -184,13 +278,15 @@ module Legion
|
|
|
184
278
|
return true if local.nil?
|
|
185
279
|
|
|
186
280
|
local[:enabled] != false
|
|
187
|
-
rescue StandardError
|
|
281
|
+
rescue StandardError => e
|
|
282
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.local_enabled')
|
|
188
283
|
true
|
|
189
284
|
end
|
|
190
285
|
|
|
191
286
|
def data_local_available?
|
|
192
287
|
defined?(Legion::Data::Local) && Legion::Data::Local.connected?
|
|
193
|
-
rescue StandardError
|
|
288
|
+
rescue StandardError => e
|
|
289
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.data_local_available')
|
|
194
290
|
false
|
|
195
291
|
end
|
|
196
292
|
|
|
@@ -205,23 +301,137 @@ module Legion
|
|
|
205
301
|
|
|
206
302
|
def duplicate?(hash)
|
|
207
303
|
db[:local_knowledge].where(content_hash: hash).any?
|
|
208
|
-
rescue StandardError
|
|
304
|
+
rescue StandardError => e
|
|
305
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.duplicate_check', hash: hash)
|
|
209
306
|
false
|
|
210
307
|
end
|
|
211
308
|
|
|
212
|
-
def
|
|
309
|
+
def start_without_lock
|
|
310
|
+
return if @started
|
|
311
|
+
return unless local_enabled? && data_local_available?
|
|
312
|
+
|
|
313
|
+
Legion::Data::Local.register_migrations(name: :apollo_local, path: MIGRATION_PATH)
|
|
314
|
+
@started = true
|
|
315
|
+
log.info 'Legion::Apollo::Local started'
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def seed_self_knowledge_without_lock
|
|
319
|
+
return if @seeded
|
|
320
|
+
|
|
321
|
+
files = self_knowledge_files
|
|
322
|
+
return if files.empty?
|
|
323
|
+
|
|
324
|
+
count = seed_files(files)
|
|
325
|
+
@seeded = true
|
|
326
|
+
log.info("Apollo::Local seeded #{count} self-knowledge files")
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def hydrate_from_global_without_lock # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
330
|
+
local_check = query_by_tags(tags: ['partner'])
|
|
331
|
+
if local_check[:success] && local_check[:results]&.any?
|
|
332
|
+
log.info 'Apollo::Local hydration skipped because local partner data already exists'
|
|
333
|
+
return { success: true, skipped: :local_data_exists }
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
unless Legion::Apollo.transport_available? || Legion::Apollo.data_available?
|
|
337
|
+
log.info 'Apollo::Local hydration skipped because global Apollo is unavailable'
|
|
338
|
+
return { success: true, skipped: :global_unavailable }
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
global_entries = Legion::Apollo.retrieve(text: 'partner bond', scope: :global, limit: 20)
|
|
342
|
+
entries = Array(global_entries[:entries] || global_entries[:results])
|
|
343
|
+
unless global_entries[:success] && entries.any?
|
|
344
|
+
log.info 'Apollo::Local hydration skipped because no global partner data was found'
|
|
345
|
+
return { success: true, skipped: :no_global_data }
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
log.info { "Apollo::Local hydration started global_count=#{entries.size}" }
|
|
349
|
+
hydrated = 0
|
|
350
|
+
entries.each do |entry|
|
|
351
|
+
entry_tags = entry[:tags].is_a?(Array) ? entry[:tags] : []
|
|
352
|
+
clean_tags = entry_tags.reject { |tag| tag == 'promoted_from_local' } + ['hydrated_from_global']
|
|
353
|
+
|
|
354
|
+
result = ingest(
|
|
355
|
+
content: entry[:content],
|
|
356
|
+
tags: clean_tags,
|
|
357
|
+
confidence: ((entry[:confidence] || 0.5) * 0.9).round(10),
|
|
358
|
+
source_channel: 'global_hydration'
|
|
359
|
+
)
|
|
360
|
+
hydrated += 1 if result[:success]
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
log.info { "Apollo::Local hydration completed hydrated=#{hydrated}" }
|
|
364
|
+
{ success: true, hydrated: hydrated }
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def ingest_without_lock(content:, tags:, **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
368
|
+
hash = content_hash(content)
|
|
369
|
+
return deduplicated_ingest(hash) if duplicate?(hash)
|
|
370
|
+
|
|
371
|
+
log.info do
|
|
372
|
+
"Apollo::Local ingest accepted content_length=#{content.to_s.length} " \
|
|
373
|
+
"tags=#{Array(tags).size} source_channel=#{opts[:source_channel]}"
|
|
374
|
+
end
|
|
375
|
+
log.debug { "Apollo::Local ingest hash=#{hash} tags=#{Array(tags).size} source_channel=#{opts[:source_channel]}" }
|
|
376
|
+
|
|
377
|
+
row = build_ingest_row(content: content, hash: hash, tags: tags, **opts)
|
|
378
|
+
id = persist_ingest_row(row)
|
|
379
|
+
|
|
380
|
+
log.info { "Apollo::Local ingest stored id=#{id} hash=#{hash}" }
|
|
381
|
+
{ success: true, mode: :local, id: id }
|
|
382
|
+
rescue Sequel::UniqueConstraintViolation
|
|
383
|
+
raise unless duplicate?(hash)
|
|
384
|
+
|
|
385
|
+
deduplicated_ingest(hash)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def build_ingest_row(content:, hash:, tags:, **opts)
|
|
389
|
+
{
|
|
390
|
+
content: content,
|
|
391
|
+
content_hash: hash,
|
|
392
|
+
tags: serialized_tags(tags),
|
|
393
|
+
source_channel: opts[:source_channel],
|
|
394
|
+
source_agent: opts[:source_agent],
|
|
395
|
+
submitted_by: opts[:submitted_by],
|
|
396
|
+
confidence: opts[:confidence] || 1.0
|
|
397
|
+
}.merge(embedding_columns(content)).merge(timestamp_columns)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def persist_ingest_row(row)
|
|
401
|
+
db.transaction do
|
|
402
|
+
id = db[:local_knowledge].insert(row)
|
|
403
|
+
sync_fts!(id, row[:content], row[:tags])
|
|
404
|
+
id
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def deduplicated_ingest(hash)
|
|
409
|
+
log.info { "Apollo::Local ingest deduplicated hash=#{hash}" }
|
|
410
|
+
{ success: true, mode: :deduplicated }
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def generate_embedding(content) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
213
414
|
unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:can_embed?) && Legion::LLM.can_embed?
|
|
415
|
+
log.debug 'Apollo::Local embedding skipped because embeddings are unavailable'
|
|
214
416
|
return [nil, nil]
|
|
215
417
|
end
|
|
216
418
|
|
|
419
|
+
content = normalize_text_input(content)
|
|
217
420
|
result = Legion::LLM.embed(content)
|
|
218
421
|
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
219
422
|
if vector.is_a?(Array) && vector.any?
|
|
423
|
+
log.debug { "Apollo::Local embedding generated dimensions=#{vector.size}" }
|
|
220
424
|
[vector, Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')]
|
|
221
425
|
else
|
|
222
426
|
[nil, nil]
|
|
223
427
|
end
|
|
224
|
-
rescue StandardError
|
|
428
|
+
rescue StandardError => e
|
|
429
|
+
handle_exception(
|
|
430
|
+
e,
|
|
431
|
+
level: :warn,
|
|
432
|
+
operation: 'apollo.local.generate_embedding',
|
|
433
|
+
content_length: content.to_s.length
|
|
434
|
+
)
|
|
225
435
|
[nil, nil]
|
|
226
436
|
end
|
|
227
437
|
|
|
@@ -230,15 +440,40 @@ module Legion
|
|
|
230
440
|
(Time.now.utc + (years * 365.25 * 24 * 3600)).strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
231
441
|
end
|
|
232
442
|
|
|
233
|
-
def sync_fts(id, content, tags_json)
|
|
443
|
+
def sync_fts!(id, content, tags_json)
|
|
234
444
|
sql = 'INSERT INTO local_knowledge_fts(rowid, content, tags) ' \
|
|
235
445
|
"VALUES (#{id}, #{db.literal(content)}, #{db.literal(tags_json)})"
|
|
236
446
|
db.run(sql)
|
|
237
|
-
|
|
238
|
-
|
|
447
|
+
log.debug { "Apollo::Local FTS synced id=#{id}" }
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
def embedding_columns(content)
|
|
451
|
+
embedding, embedded_at = generate_embedding(content)
|
|
452
|
+
|
|
453
|
+
{
|
|
454
|
+
embedding: embedding ? Legion::JSON.dump(embedding) : nil,
|
|
455
|
+
embedded_at: embedded_at,
|
|
456
|
+
expires_at: compute_expires_at
|
|
457
|
+
}
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
def timestamp_columns
|
|
461
|
+
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
462
|
+
{ created_at: now, updated_at: now }
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def serialized_tags(tags)
|
|
466
|
+
Legion::JSON.dump(normalize_tags_input(tags))
|
|
239
467
|
end
|
|
240
468
|
|
|
241
469
|
def fts_search(text, limit:) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
470
|
+
if text.to_s.strip.empty?
|
|
471
|
+
return db[:local_knowledge]
|
|
472
|
+
.where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
|
|
473
|
+
.limit(limit)
|
|
474
|
+
.all
|
|
475
|
+
end
|
|
476
|
+
|
|
242
477
|
escaped = text.to_s.gsub('"', '""')
|
|
243
478
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
244
479
|
db.fetch(
|
|
@@ -247,7 +482,8 @@ module Legion
|
|
|
247
482
|
'WHERE local_knowledge_fts MATCH ? AND lk.expires_at > ? ORDER BY fts.rank LIMIT ?',
|
|
248
483
|
escaped, now, limit
|
|
249
484
|
).all
|
|
250
|
-
rescue StandardError
|
|
485
|
+
rescue StandardError => e
|
|
486
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.fts_search', limit: limit, fallback: :ilike)
|
|
251
487
|
db[:local_knowledge]
|
|
252
488
|
.where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
|
|
253
489
|
.where(Sequel.ilike(:content, "%#{text}%"))
|
|
@@ -271,7 +507,8 @@ module Legion
|
|
|
271
507
|
return [] if tags_json.nil? || tags_json.empty?
|
|
272
508
|
|
|
273
509
|
Legion::JSON.parse(tags_json)
|
|
274
|
-
rescue StandardError
|
|
510
|
+
rescue StandardError => e
|
|
511
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.parse_tags')
|
|
275
512
|
[]
|
|
276
513
|
end
|
|
277
514
|
|
|
@@ -280,6 +517,7 @@ module Legion
|
|
|
280
517
|
end
|
|
281
518
|
|
|
282
519
|
def cosine_rerank(text, candidates) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
520
|
+
text = normalize_text_input(text)
|
|
283
521
|
query_result = Legion::LLM.embed(text)
|
|
284
522
|
query_vec = query_result.is_a?(Hash) ? query_result[:vector] : query_result
|
|
285
523
|
return candidates unless query_vec.is_a?(Array) && query_vec.any?
|
|
@@ -295,7 +533,8 @@ module Legion
|
|
|
295
533
|
end
|
|
296
534
|
|
|
297
535
|
scored.sort_by { |c| -(c[:similarity] || 0) }
|
|
298
|
-
rescue StandardError
|
|
536
|
+
rescue StandardError => e
|
|
537
|
+
handle_exception(e, level: :warn, operation: 'apollo.local.cosine_rerank', candidate_count: candidates.size)
|
|
299
538
|
candidates
|
|
300
539
|
end
|
|
301
540
|
|
|
@@ -304,7 +543,8 @@ module Legion
|
|
|
304
543
|
|
|
305
544
|
parsed = Legion::JSON.parse(embedding_json)
|
|
306
545
|
parsed.is_a?(Array) ? parsed.map(&:to_f) : nil
|
|
307
|
-
rescue StandardError
|
|
546
|
+
rescue StandardError => e
|
|
547
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.parse_embedding')
|
|
308
548
|
nil
|
|
309
549
|
end
|
|
310
550
|
|
|
@@ -315,32 +555,107 @@ module Legion
|
|
|
315
555
|
return default if local.nil?
|
|
316
556
|
|
|
317
557
|
local[key] || default
|
|
318
|
-
rescue StandardError
|
|
558
|
+
rescue StandardError => e
|
|
559
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.local_setting', key: key)
|
|
319
560
|
default
|
|
320
561
|
end
|
|
321
562
|
|
|
322
|
-
def
|
|
323
|
-
|
|
563
|
+
def normalize_text_input(value)
|
|
564
|
+
if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
|
|
565
|
+
return Legion::Apollo.send(:normalize_text_input, value)
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
value.to_s
|
|
569
|
+
rescue StandardError => e
|
|
570
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.normalize_text_input')
|
|
571
|
+
value.to_s
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
def normalize_tags_input(tags)
|
|
575
|
+
Legion::Apollo::Helpers::TagNormalizer.normalize(Array(tags)).first(max_tags_limit)
|
|
576
|
+
rescue StandardError => e
|
|
577
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.normalize_tags_input')
|
|
578
|
+
Array(tags).map(&:to_s).first(max_tags_limit)
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
def max_tags_limit
|
|
582
|
+
configured = if defined?(Legion::Settings) && Legion::Settings[:apollo].is_a?(Hash)
|
|
583
|
+
Legion::Settings[:apollo][:max_tags]
|
|
584
|
+
end
|
|
585
|
+
limit = configured.nil? ? Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS : configured.to_i
|
|
586
|
+
[limit, Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS].min
|
|
587
|
+
rescue StandardError => e
|
|
588
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.max_tags_limit')
|
|
589
|
+
Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
def query_by_tags_via_sql(tags:, limit:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
324
593
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
594
|
+
dataset = db[:local_knowledge].where(Sequel.lit('expires_at > ?', now))
|
|
595
|
+
|
|
596
|
+
Array(tags).map(&:to_s).each do |tag|
|
|
597
|
+
dataset = dataset.where(
|
|
598
|
+
Sequel.lit(
|
|
599
|
+
'EXISTS (SELECT 1 FROM json_each(local_knowledge.tags) WHERE json_each.value = ?)',
|
|
600
|
+
tag
|
|
601
|
+
)
|
|
602
|
+
)
|
|
603
|
+
end
|
|
325
604
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
605
|
+
dataset.limit(limit).all
|
|
606
|
+
rescue StandardError => e
|
|
607
|
+
handle_exception(
|
|
608
|
+
e,
|
|
609
|
+
level: :debug,
|
|
610
|
+
operation: 'apollo.local.query_by_tags_via_sql',
|
|
611
|
+
tag_count: Array(tags).size,
|
|
612
|
+
limit: limit
|
|
334
613
|
)
|
|
335
|
-
|
|
614
|
+
query_by_tags_via_ruby(tags: tags, limit: limit)
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
def query_by_tags_via_ruby(tags:, limit:)
|
|
618
|
+
candidates = db[:local_knowledge]
|
|
619
|
+
.where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
|
|
620
|
+
.all
|
|
621
|
+
|
|
622
|
+
candidates.select do |row|
|
|
623
|
+
row_tags = parse_tags(row[:tags])
|
|
624
|
+
tags.all? { |tag| row_tags.include?(tag) }
|
|
625
|
+
end.first(limit)
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
def update_upsert_entry(existing, content, tags_json, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
629
|
+
content = content.to_s
|
|
630
|
+
new_hash = content_hash(content)
|
|
631
|
+
embedding, embedded_at = generate_embedding(content)
|
|
632
|
+
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
633
|
+
expires_at = compute_expires_at
|
|
634
|
+
|
|
635
|
+
db.transaction do
|
|
636
|
+
db[:local_knowledge].where(id: existing[:id]).update(
|
|
637
|
+
content: content,
|
|
638
|
+
content_hash: new_hash,
|
|
639
|
+
tags: tags_json,
|
|
640
|
+
embedding: embedding ? Legion::JSON.dump(embedding) : nil,
|
|
641
|
+
embedded_at: embedded_at,
|
|
642
|
+
confidence: opts.fetch(:confidence, existing[:confidence]),
|
|
643
|
+
expires_at: expires_at,
|
|
644
|
+
source_channel: opts.fetch(:source_channel, existing[:source_channel]),
|
|
645
|
+
source_agent: opts.fetch(:source_agent, existing[:source_agent]),
|
|
646
|
+
submitted_by: opts.fetch(:submitted_by, existing[:submitted_by]),
|
|
647
|
+
updated_at: now
|
|
648
|
+
)
|
|
649
|
+
rebuild_fts_entry!(existing[:id], content, tags_json)
|
|
650
|
+
end
|
|
651
|
+
log.info { "Apollo::Local upsert updated id=#{existing[:id]} hash=#{new_hash}" }
|
|
336
652
|
{ success: true, mode: :updated, id: existing[:id] }
|
|
337
653
|
end
|
|
338
654
|
|
|
339
|
-
def rebuild_fts_entry(id, content, tags_json)
|
|
655
|
+
def rebuild_fts_entry!(id, content, tags_json)
|
|
340
656
|
db.run("DELETE FROM local_knowledge_fts WHERE rowid = #{id}")
|
|
341
|
-
sync_fts(id, content, tags_json)
|
|
342
|
-
|
|
343
|
-
nil
|
|
657
|
+
sync_fts!(id, content, tags_json)
|
|
658
|
+
log.debug { "Apollo::Local FTS rebuilt id=#{id}" }
|
|
344
659
|
end
|
|
345
660
|
|
|
346
661
|
def not_started_error
|