legion-apollo 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'digest'
4
+ require 'legion/logging'
5
+ require 'socket'
4
6
  require 'time'
5
7
  require_relative 'local/graph'
8
+ require_relative 'helpers/similarity'
9
+ require_relative 'helpers/tag_normalizer'
6
10
 
7
11
  module Legion
8
12
  module Apollo
@@ -10,93 +14,112 @@ module Legion
10
14
  # Mirrors Legion::Apollo's public API but stores locally.
11
15
  module Local # rubocop:disable Metrics/ModuleLength
12
16
  MIGRATION_PATH = File.expand_path('local/migrations', __dir__).freeze
17
+ LIFECYCLE_MUTEX = Mutex.new
18
+ WRITE_MUTEX = Mutex.new
19
+ SEED_MUTEX = Mutex.new
20
+ HYDRATION_MUTEX = Mutex.new
13
21
 
14
22
  class << self # rubocop:disable Metrics/ClassLength
15
- def start
16
- return if @started
17
- return unless local_enabled?
18
- return unless data_local_available?
23
+ include Legion::Logging::Helper
19
24
 
20
- Legion::Data::Local.register_migrations(name: :apollo_local, path: MIGRATION_PATH)
21
- @started = true
22
- Legion::Logging.info 'Legion::Apollo::Local started' if defined?(Legion::Logging)
25
+ def start
26
+ LIFECYCLE_MUTEX.synchronize { start_without_lock }
27
+ rescue StandardError => e
28
+ handle_exception(e, level: :error, operation: 'apollo.local.start')
29
+ @started = false
23
30
  end
24
31
 
25
32
  def shutdown
33
+ LIFECYCLE_MUTEX.synchronize do
34
+ @started = false
35
+ @seeded = false
36
+ log.info 'Legion::Apollo::Local shutdown'
37
+ end
38
+ rescue StandardError => e
39
+ handle_exception(e, level: :warn, operation: 'apollo.local.shutdown')
26
40
  @started = false
27
- Legion::Logging.info 'Legion::Apollo::Local shutdown' if defined?(Legion::Logging)
41
+ @seeded = false
28
42
  end
29
43
 
30
44
  def started?
31
45
  @started == true
32
46
  end
33
47
 
34
- def ingest(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
48
+ def ingest(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength
35
49
  return not_started_error unless started?
36
50
 
37
- hash = content_hash(content)
38
- return { success: true, mode: :deduplicated } if duplicate?(hash)
39
-
40
- embedding, embedded_at = generate_embedding(content)
41
- now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
42
- expires = compute_expires_at
43
-
44
- row = {
45
- content: content,
46
- content_hash: hash,
47
- tags: Legion::JSON.dump(Array(tags).first(local_setting(:max_tags, 20))),
48
- embedding: embedding ? Legion::JSON.dump(embedding) : nil,
49
- embedded_at: embedded_at,
50
- source_channel: opts[:source_channel],
51
- source_agent: opts[:source_agent],
52
- submitted_by: opts[:submitted_by],
53
- confidence: opts[:confidence] || 1.0,
54
- expires_at: expires,
55
- created_at: now,
56
- updated_at: now
57
- }
58
-
59
- id = db[:local_knowledge].insert(row)
60
- sync_fts(id, content, row[:tags])
61
-
62
- { success: true, mode: :local, id: id }
51
+ tags = normalize_tags_input(tags)
52
+ WRITE_MUTEX.synchronize do
53
+ ingest_without_lock(content: content, tags: tags, **opts)
54
+ end
63
55
  rescue StandardError => e
56
+ handle_exception(
57
+ e,
58
+ level: :error,
59
+ operation: 'apollo.local.ingest',
60
+ tags: Array(tags).size,
61
+ source_channel: opts[:source_channel]
62
+ )
64
63
  { success: false, error: e.message }
65
64
  end
66
65
 
67
- def upsert(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
66
+ def upsert(content:, tags: [], **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
68
67
  return not_started_error unless started?
69
68
 
70
- sorted_tags = Array(tags).map(&:to_s).sort
69
+ sorted_tags = normalize_tags_input(tags).sort
71
70
  tag_json = Legion::JSON.dump(sorted_tags)
72
- existing = db[:local_knowledge].where(tags: tag_json).first
73
-
74
- if existing
75
- update_upsert_entry(existing, content, tag_json, opts)
76
- else
77
- result = ingest(content: content, tags: sorted_tags, **opts)
78
- result[:mode] = :inserted if result[:success] && result[:mode] != :deduplicated
79
- result
71
+ WRITE_MUTEX.synchronize do
72
+ existing = db[:local_knowledge].where(tags: tag_json).first
73
+
74
+ if existing
75
+ update_upsert_entry(existing, content, tag_json, opts)
76
+ else
77
+ result = ingest_without_lock(content: content, tags: sorted_tags, **opts)
78
+ result[:mode] = :inserted if result[:success] && result[:mode] != :deduplicated
79
+ result
80
+ end
80
81
  end
81
82
  rescue StandardError => e
82
- Legion::Logging.warn "Apollo::Local upsert error: #{e.message}" if defined?(Legion::Logging)
83
+ handle_exception(
84
+ e,
85
+ level: :warn,
86
+ operation: 'apollo.local.upsert',
87
+ tags: Array(tags).size,
88
+ source_channel: opts[:source_channel]
89
+ )
83
90
  { success: false, error: e.message }
84
91
  end
85
92
 
86
93
  def query(text:, limit: nil, min_confidence: nil, tags: nil, **) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
87
94
  return not_started_error unless started?
88
95
 
96
+ text = normalize_text_input(text)
97
+ tags = normalize_tags_input(tags)
89
98
  limit ||= local_setting(:default_limit, 5)
90
99
  min_confidence ||= local_setting(:min_confidence, 0.3)
91
100
  multiplier = local_setting(:fts_candidate_multiplier, 3)
101
+ log.info do
102
+ "Apollo::Local query executing text_length=#{text.to_s.length} " \
103
+ "limit=#{limit} min_confidence=#{min_confidence} tag_count=#{Array(tags).size}"
104
+ end
105
+ log.debug { "Apollo::Local query limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size}" }
92
106
 
93
107
  candidates = fts_search(text, limit: limit * multiplier)
94
108
  candidates = filter_candidates(candidates, min_confidence: min_confidence, tags: tags)
95
109
  candidates = cosine_rerank(text, candidates) if can_rerank?
96
110
  results = candidates.first(limit)
97
111
 
112
+ log.info { "Apollo::Local query completed count=#{results.size}" }
98
113
  { success: true, results: results, count: results.size, mode: :local }
99
114
  rescue StandardError => e
115
+ handle_exception(
116
+ e,
117
+ level: :error,
118
+ operation: 'apollo.local.query',
119
+ limit: limit,
120
+ min_confidence: min_confidence,
121
+ tag_count: Array(tags).size
122
+ )
100
123
  { success: false, error: e.message }
101
124
  end
102
125
 
@@ -109,28 +132,97 @@ module Legion
109
132
  end
110
133
 
111
134
  def reset!
112
- @started = false
113
- @seeded = false
135
+ LIFECYCLE_MUTEX.synchronize do
136
+ @started = false
137
+ @seeded = false
138
+ end
114
139
  end
115
140
 
116
141
  def seed_self_knowledge
117
142
  return unless started?
118
- return if @seeded
119
143
 
120
- files = self_knowledge_files
121
- return if files.empty?
122
-
123
- count = seed_files(files)
124
- @seeded = true
125
- Legion::Logging.info("Apollo::Local seeded #{count} self-knowledge files") if defined?(Legion::Logging)
144
+ SEED_MUTEX.synchronize { seed_self_knowledge_without_lock }
126
145
  rescue StandardError => e
127
- Legion::Logging.warn("Apollo::Local seed failed: #{e.message}") if defined?(Legion::Logging)
146
+ handle_exception(e, level: :warn, operation: 'apollo.local.seed_self_knowledge')
128
147
  end
129
148
 
130
149
  def seeded?
131
150
  @seeded == true
132
151
  end
133
152
 
153
+ def query_by_tags(tags:, limit: 50) # rubocop:disable Metrics/MethodLength
154
+ return { success: false, error: :not_started } unless started?
155
+
156
+ tags = normalize_tags_input(tags)
157
+ results = query_by_tags_via_sql(tags: tags, limit: limit)
158
+
159
+ log.info { "Apollo::Local query_by_tags completed tag_count=#{tags.size} count=#{results.size}" }
160
+ { success: true, results: results, count: results.size }
161
+ rescue StandardError => e
162
+ handle_exception(
163
+ e,
164
+ level: :error,
165
+ operation: 'apollo.local.query_by_tags',
166
+ tag_count: tags.size,
167
+ limit: limit
168
+ )
169
+ { success: false, error: e.message }
170
+ end
171
+
172
+ def promote_to_global(tags:, min_confidence: 0.6) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
173
+ return { success: false, error: :not_started } unless started?
174
+
175
+ tags = normalize_tags_input(tags)
176
+ entries = query_by_tags(tags: tags)
177
+ unless entries[:success] && entries[:results]&.any?
178
+ log.info { "Apollo::Local promote_to_global skipped tag_count=#{tags.size} reason=no_entries" }
179
+ return { success: true, promoted: 0 }
180
+ end
181
+
182
+ promoted = 0
183
+ entries[:results].each do |entry|
184
+ next if entry[:confidence].to_f < min_confidence
185
+
186
+ entry_tags = parse_tags(entry[:tags])
187
+ hostname = begin
188
+ ::Socket.gethostname
189
+ rescue StandardError => e
190
+ handle_exception(e, level: :debug, operation: 'apollo.local.resolve_hostname')
191
+ 'unknown'
192
+ end
193
+ result = Legion::Apollo.ingest(
194
+ content: entry[:content],
195
+ tags: entry_tags + ['promoted_from_local'],
196
+ source_channel: 'local_promotion',
197
+ submitted_by: "node:#{hostname}",
198
+ confidence: entry[:confidence],
199
+ scope: :global
200
+ )
201
+ promoted += 1 if result[:success]
202
+ end
203
+
204
+ log.info { "Apollo::Local promote_to_global completed promoted=#{promoted} tag_count=#{tags.size}" }
205
+ { success: true, promoted: promoted }
206
+ rescue StandardError => e
207
+ handle_exception(
208
+ e,
209
+ level: :error,
210
+ operation: 'apollo.local.promote_to_global',
211
+ tag_count: tags.size,
212
+ min_confidence: min_confidence
213
+ )
214
+ { success: false, error: e.message }
215
+ end
216
+
217
+ def hydrate_from_global
218
+ return { success: false, error: :not_started } unless started?
219
+
220
+ HYDRATION_MUTEX.synchronize { hydrate_from_global_without_lock }
221
+ rescue StandardError => e
222
+ handle_exception(e, level: :error, operation: 'apollo.local.hydrate_from_global')
223
+ { success: false, error: e.message }
224
+ end
225
+
134
226
  private
135
227
 
136
228
  def self_knowledge_files
@@ -162,15 +254,17 @@ module Legion
162
254
  end
163
255
 
164
256
  def ingest_global(content:, tags:)
257
+ log.debug { "Apollo::Local forwarding seed entry to global tag_count=#{Array(tags).size}" }
165
258
  Legion::Apollo.ingest(content: content, tags: tags, source_channel: 'self-knowledge',
166
259
  submitted_by: 'legion-apollo', confidence: 0.9, scope: :global)
167
260
  rescue StandardError => e
168
- Legion::Logging.debug("Global seed ingest failed: #{e.message}") if defined?(Legion::Logging)
261
+ handle_exception(e, level: :debug, operation: 'apollo.local.ingest_global_seed', tag_count: Array(tags).size)
169
262
  end
170
263
 
171
264
  def global_available?
172
265
  defined?(Legion::Apollo) && Legion::Apollo.started? && Legion::Apollo.respond_to?(:ingest)
173
- rescue StandardError
266
+ rescue StandardError => e
267
+ handle_exception(e, level: :debug, operation: 'apollo.local.global_available')
174
268
  false
175
269
  end
176
270
 
@@ -184,13 +278,15 @@ module Legion
184
278
  return true if local.nil?
185
279
 
186
280
  local[:enabled] != false
187
- rescue StandardError
281
+ rescue StandardError => e
282
+ handle_exception(e, level: :debug, operation: 'apollo.local.local_enabled')
188
283
  true
189
284
  end
190
285
 
191
286
  def data_local_available?
192
287
  defined?(Legion::Data::Local) && Legion::Data::Local.connected?
193
- rescue StandardError
288
+ rescue StandardError => e
289
+ handle_exception(e, level: :debug, operation: 'apollo.local.data_local_available')
194
290
  false
195
291
  end
196
292
 
@@ -205,23 +301,137 @@ module Legion
205
301
 
206
302
  def duplicate?(hash)
207
303
  db[:local_knowledge].where(content_hash: hash).any?
208
- rescue StandardError
304
+ rescue StandardError => e
305
+ handle_exception(e, level: :debug, operation: 'apollo.local.duplicate_check', hash: hash)
209
306
  false
210
307
  end
211
308
 
212
- def generate_embedding(content) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
309
+ def start_without_lock
310
+ return if @started
311
+ return unless local_enabled? && data_local_available?
312
+
313
+ Legion::Data::Local.register_migrations(name: :apollo_local, path: MIGRATION_PATH)
314
+ @started = true
315
+ log.info 'Legion::Apollo::Local started'
316
+ end
317
+
318
+ def seed_self_knowledge_without_lock
319
+ return if @seeded
320
+
321
+ files = self_knowledge_files
322
+ return if files.empty?
323
+
324
+ count = seed_files(files)
325
+ @seeded = true
326
+ log.info("Apollo::Local seeded #{count} self-knowledge files")
327
+ end
328
+
329
+ def hydrate_from_global_without_lock # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
330
+ local_check = query_by_tags(tags: ['partner'])
331
+ if local_check[:success] && local_check[:results]&.any?
332
+ log.info 'Apollo::Local hydration skipped because local partner data already exists'
333
+ return { success: true, skipped: :local_data_exists }
334
+ end
335
+
336
+ unless Legion::Apollo.transport_available? || Legion::Apollo.data_available?
337
+ log.info 'Apollo::Local hydration skipped because global Apollo is unavailable'
338
+ return { success: true, skipped: :global_unavailable }
339
+ end
340
+
341
+ global_entries = Legion::Apollo.retrieve(text: 'partner bond', scope: :global, limit: 20)
342
+ entries = Array(global_entries[:entries] || global_entries[:results])
343
+ unless global_entries[:success] && entries.any?
344
+ log.info 'Apollo::Local hydration skipped because no global partner data was found'
345
+ return { success: true, skipped: :no_global_data }
346
+ end
347
+
348
+ log.info { "Apollo::Local hydration started global_count=#{entries.size}" }
349
+ hydrated = 0
350
+ entries.each do |entry|
351
+ entry_tags = entry[:tags].is_a?(Array) ? entry[:tags] : []
352
+ clean_tags = entry_tags.reject { |tag| tag == 'promoted_from_local' } + ['hydrated_from_global']
353
+
354
+ result = ingest(
355
+ content: entry[:content],
356
+ tags: clean_tags,
357
+ confidence: ((entry[:confidence] || 0.5) * 0.9).round(10),
358
+ source_channel: 'global_hydration'
359
+ )
360
+ hydrated += 1 if result[:success]
361
+ end
362
+
363
+ log.info { "Apollo::Local hydration completed hydrated=#{hydrated}" }
364
+ { success: true, hydrated: hydrated }
365
+ end
366
+
367
+ def ingest_without_lock(content:, tags:, **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
368
+ hash = content_hash(content)
369
+ return deduplicated_ingest(hash) if duplicate?(hash)
370
+
371
+ log.info do
372
+ "Apollo::Local ingest accepted content_length=#{content.to_s.length} " \
373
+ "tags=#{Array(tags).size} source_channel=#{opts[:source_channel]}"
374
+ end
375
+ log.debug { "Apollo::Local ingest hash=#{hash} tags=#{Array(tags).size} source_channel=#{opts[:source_channel]}" }
376
+
377
+ row = build_ingest_row(content: content, hash: hash, tags: tags, **opts)
378
+ id = persist_ingest_row(row)
379
+
380
+ log.info { "Apollo::Local ingest stored id=#{id} hash=#{hash}" }
381
+ { success: true, mode: :local, id: id }
382
+ rescue Sequel::UniqueConstraintViolation
383
+ raise unless duplicate?(hash)
384
+
385
+ deduplicated_ingest(hash)
386
+ end
387
+
388
+ def build_ingest_row(content:, hash:, tags:, **opts)
389
+ {
390
+ content: content,
391
+ content_hash: hash,
392
+ tags: serialized_tags(tags),
393
+ source_channel: opts[:source_channel],
394
+ source_agent: opts[:source_agent],
395
+ submitted_by: opts[:submitted_by],
396
+ confidence: opts[:confidence] || 1.0
397
+ }.merge(embedding_columns(content)).merge(timestamp_columns)
398
+ end
399
+
400
+ def persist_ingest_row(row)
401
+ db.transaction do
402
+ id = db[:local_knowledge].insert(row)
403
+ sync_fts!(id, row[:content], row[:tags])
404
+ id
405
+ end
406
+ end
407
+
408
+ def deduplicated_ingest(hash)
409
+ log.info { "Apollo::Local ingest deduplicated hash=#{hash}" }
410
+ { success: true, mode: :deduplicated }
411
+ end
412
+
413
+ def generate_embedding(content) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
213
414
  unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:can_embed?) && Legion::LLM.can_embed?
415
+ log.debug 'Apollo::Local embedding skipped because embeddings are unavailable'
214
416
  return [nil, nil]
215
417
  end
216
418
 
419
+ content = normalize_text_input(content)
217
420
  result = Legion::LLM.embed(content)
218
421
  vector = result.is_a?(Hash) ? result[:vector] : result
219
422
  if vector.is_a?(Array) && vector.any?
423
+ log.debug { "Apollo::Local embedding generated dimensions=#{vector.size}" }
220
424
  [vector, Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')]
221
425
  else
222
426
  [nil, nil]
223
427
  end
224
- rescue StandardError
428
+ rescue StandardError => e
429
+ handle_exception(
430
+ e,
431
+ level: :warn,
432
+ operation: 'apollo.local.generate_embedding',
433
+ content_length: content.to_s.length
434
+ )
225
435
  [nil, nil]
226
436
  end
227
437
 
@@ -230,15 +440,40 @@ module Legion
230
440
  (Time.now.utc + (years * 365.25 * 24 * 3600)).strftime('%Y-%m-%dT%H:%M:%S.%LZ')
231
441
  end
232
442
 
233
- def sync_fts(id, content, tags_json)
443
+ def sync_fts!(id, content, tags_json)
234
444
  sql = 'INSERT INTO local_knowledge_fts(rowid, content, tags) ' \
235
445
  "VALUES (#{id}, #{db.literal(content)}, #{db.literal(tags_json)})"
236
446
  db.run(sql)
237
- rescue StandardError => e
238
- Legion::Logging.warn("FTS5 sync failed for id=#{id}: #{e.message}") if defined?(Legion::Logging)
447
+ log.debug { "Apollo::Local FTS synced id=#{id}" }
448
+ end
449
+
450
+ def embedding_columns(content)
451
+ embedding, embedded_at = generate_embedding(content)
452
+
453
+ {
454
+ embedding: embedding ? Legion::JSON.dump(embedding) : nil,
455
+ embedded_at: embedded_at,
456
+ expires_at: compute_expires_at
457
+ }
458
+ end
459
+
460
+ def timestamp_columns
461
+ now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
462
+ { created_at: now, updated_at: now }
463
+ end
464
+
465
+ def serialized_tags(tags)
466
+ Legion::JSON.dump(normalize_tags_input(tags))
239
467
  end
240
468
 
241
469
  def fts_search(text, limit:) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
470
+ if text.to_s.strip.empty?
471
+ return db[:local_knowledge]
472
+ .where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
473
+ .limit(limit)
474
+ .all
475
+ end
476
+
242
477
  escaped = text.to_s.gsub('"', '""')
243
478
  now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
244
479
  db.fetch(
@@ -247,7 +482,8 @@ module Legion
247
482
  'WHERE local_knowledge_fts MATCH ? AND lk.expires_at > ? ORDER BY fts.rank LIMIT ?',
248
483
  escaped, now, limit
249
484
  ).all
250
- rescue StandardError
485
+ rescue StandardError => e
486
+ handle_exception(e, level: :debug, operation: 'apollo.local.fts_search', limit: limit, fallback: :ilike)
251
487
  db[:local_knowledge]
252
488
  .where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
253
489
  .where(Sequel.ilike(:content, "%#{text}%"))
@@ -271,7 +507,8 @@ module Legion
271
507
  return [] if tags_json.nil? || tags_json.empty?
272
508
 
273
509
  Legion::JSON.parse(tags_json)
274
- rescue StandardError
510
+ rescue StandardError => e
511
+ handle_exception(e, level: :debug, operation: 'apollo.local.parse_tags')
275
512
  []
276
513
  end
277
514
 
@@ -280,6 +517,7 @@ module Legion
280
517
  end
281
518
 
282
519
  def cosine_rerank(text, candidates) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
520
+ text = normalize_text_input(text)
283
521
  query_result = Legion::LLM.embed(text)
284
522
  query_vec = query_result.is_a?(Hash) ? query_result[:vector] : query_result
285
523
  return candidates unless query_vec.is_a?(Array) && query_vec.any?
@@ -295,7 +533,8 @@ module Legion
295
533
  end
296
534
 
297
535
  scored.sort_by { |c| -(c[:similarity] || 0) }
298
- rescue StandardError
536
+ rescue StandardError => e
537
+ handle_exception(e, level: :warn, operation: 'apollo.local.cosine_rerank', candidate_count: candidates.size)
299
538
  candidates
300
539
  end
301
540
 
@@ -304,7 +543,8 @@ module Legion
304
543
 
305
544
  parsed = Legion::JSON.parse(embedding_json)
306
545
  parsed.is_a?(Array) ? parsed.map(&:to_f) : nil
307
- rescue StandardError
546
+ rescue StandardError => e
547
+ handle_exception(e, level: :debug, operation: 'apollo.local.parse_embedding')
308
548
  nil
309
549
  end
310
550
 
@@ -315,32 +555,107 @@ module Legion
315
555
  return default if local.nil?
316
556
 
317
557
  local[key] || default
318
- rescue StandardError
558
+ rescue StandardError => e
559
+ handle_exception(e, level: :debug, operation: 'apollo.local.local_setting', key: key)
319
560
  default
320
561
  end
321
562
 
322
- def update_upsert_entry(existing, content, tags_json, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
323
- new_hash = content_hash(content)
563
+ def normalize_text_input(value)
564
+ if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
565
+ return Legion::Apollo.send(:normalize_text_input, value)
566
+ end
567
+
568
+ value.to_s
569
+ rescue StandardError => e
570
+ handle_exception(e, level: :debug, operation: 'apollo.local.normalize_text_input')
571
+ value.to_s
572
+ end
573
+
574
+ def normalize_tags_input(tags)
575
+ Legion::Apollo::Helpers::TagNormalizer.normalize(Array(tags)).first(max_tags_limit)
576
+ rescue StandardError => e
577
+ handle_exception(e, level: :debug, operation: 'apollo.local.normalize_tags_input')
578
+ Array(tags).map(&:to_s).first(max_tags_limit)
579
+ end
580
+
581
+ def max_tags_limit
582
+ configured = if defined?(Legion::Settings) && Legion::Settings[:apollo].is_a?(Hash)
583
+ Legion::Settings[:apollo][:max_tags]
584
+ end
585
+ limit = configured.nil? ? Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS : configured.to_i
586
+ [limit, Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS].min
587
+ rescue StandardError => e
588
+ handle_exception(e, level: :debug, operation: 'apollo.local.max_tags_limit')
589
+ Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS
590
+ end
591
+
592
+ def query_by_tags_via_sql(tags:, limit:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
324
593
  now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
594
+ dataset = db[:local_knowledge].where(Sequel.lit('expires_at > ?', now))
595
+
596
+ Array(tags).map(&:to_s).each do |tag|
597
+ dataset = dataset.where(
598
+ Sequel.lit(
599
+ 'EXISTS (SELECT 1 FROM json_each(local_knowledge.tags) WHERE json_each.value = ?)',
600
+ tag
601
+ )
602
+ )
603
+ end
325
604
 
326
- db[:local_knowledge].where(id: existing[:id]).update(
327
- content: content.to_s,
328
- content_hash: new_hash,
329
- confidence: opts.fetch(:confidence, existing[:confidence]),
330
- source_channel: opts.fetch(:source_channel, existing[:source_channel]),
331
- source_agent: opts.fetch(:source_agent, existing[:source_agent]),
332
- submitted_by: opts.fetch(:submitted_by, existing[:submitted_by]),
333
- updated_at: now
605
+ dataset.limit(limit).all
606
+ rescue StandardError => e
607
+ handle_exception(
608
+ e,
609
+ level: :debug,
610
+ operation: 'apollo.local.query_by_tags_via_sql',
611
+ tag_count: Array(tags).size,
612
+ limit: limit
334
613
  )
335
- rebuild_fts_entry(existing[:id], content.to_s, tags_json)
614
+ query_by_tags_via_ruby(tags: tags, limit: limit)
615
+ end
616
+
617
+ def query_by_tags_via_ruby(tags:, limit:)
618
+ candidates = db[:local_knowledge]
619
+ .where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
620
+ .all
621
+
622
+ candidates.select do |row|
623
+ row_tags = parse_tags(row[:tags])
624
+ tags.all? { |tag| row_tags.include?(tag) }
625
+ end.first(limit)
626
+ end
627
+
628
+ def update_upsert_entry(existing, content, tags_json, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
629
+ content = content.to_s
630
+ new_hash = content_hash(content)
631
+ embedding, embedded_at = generate_embedding(content)
632
+ now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
633
+ expires_at = compute_expires_at
634
+
635
+ db.transaction do
636
+ db[:local_knowledge].where(id: existing[:id]).update(
637
+ content: content,
638
+ content_hash: new_hash,
639
+ tags: tags_json,
640
+ embedding: embedding ? Legion::JSON.dump(embedding) : nil,
641
+ embedded_at: embedded_at,
642
+ confidence: opts.fetch(:confidence, existing[:confidence]),
643
+ expires_at: expires_at,
644
+ source_channel: opts.fetch(:source_channel, existing[:source_channel]),
645
+ source_agent: opts.fetch(:source_agent, existing[:source_agent]),
646
+ submitted_by: opts.fetch(:submitted_by, existing[:submitted_by]),
647
+ updated_at: now
648
+ )
649
+ rebuild_fts_entry!(existing[:id], content, tags_json)
650
+ end
651
+ log.info { "Apollo::Local upsert updated id=#{existing[:id]} hash=#{new_hash}" }
336
652
  { success: true, mode: :updated, id: existing[:id] }
337
653
  end
338
654
 
339
- def rebuild_fts_entry(id, content, tags_json)
655
+ def rebuild_fts_entry!(id, content, tags_json)
340
656
  db.run("DELETE FROM local_knowledge_fts WHERE rowid = #{id}")
341
- sync_fts(id, content, tags_json)
342
- rescue StandardError
343
- nil
657
+ sync_fts!(id, content, tags_json)
658
+ log.debug { "Apollo::Local FTS rebuilt id=#{id}" }
344
659
  end
345
660
 
346
661
  def not_started_error