lex-apollo 0.4.24 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2dceee29e5b2af12bab5833beac8b837d2366eaf9509c7e4ce04e88b44a5c28f
4
- data.tar.gz: 97d62a6385f375b19a6f48b1881fef8525e5949c4348bfe0cd07581d9ff710f2
3
+ metadata.gz: fe40d4ba4efef56b2bd1cec166f525ca3c165989453e455bdacc14a2a5377d16
4
+ data.tar.gz: e4f7f0daabec002d031c00a6936308df68d75dd2200b1f49f7264de6f2833153
5
5
  SHA512:
6
- metadata.gz: 90065729231f3322eeaaeaa0e44e03b9b79350157674bec30f174fa6447d74bc53b4794e3385eee9d617c13c62cb22198de39fc21ebece7d2d512e7b1dd09ba6
7
- data.tar.gz: f1c1b607fd9d9d244c0937b654ea95ca361980f5732c6e513719a6163004595ce0aaa9baab4181db56c8e6e8d1c511c1d6695c9110905e2094a252ba06e5a2c6
6
+ metadata.gz: 7d0995197fb2c7191a81a91bba476792570baaf5e750df8b0464de096beabc82a4fbec8fabb8b09fda967d5c834c7bf682041d8775ce24c602d1790e332692c7
7
+ data.tar.gz: 6c0277a64242c3c37795dde7229db4d2f0c27d191a7ad5d252266db1c4b433538ec5741f57624a3e262d1ad1b38ca9abf9c0ec1b2463a7bd69b9fac2126cb190
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.26] - 2026-05-11
4
+
5
+ ### Fixed
6
+ - Handle `Sequel::UniqueConstraintViolation` in `create_candidate_entry` gracefully — a race condition during concurrent knowledge ingestion can cause two threads to pass the content_hash dedup check simultaneously and both attempt to insert the same row. On collision, the rescue block now looks up the existing winner row by content_hash (excluding archived) and returns its ID so the caller continues normally (access log, contradiction detection, etc.) instead of propagating a database error.
7
+ - Added `Sequel::UniqueConstraintViolation` stub to the test-only Sequel shim so the race-condition rescue path is exercisable in unit tests without a live database.
8
+
9
+ ## [0.4.25] - 2026-05-08
10
+
11
+ ### Fixed
12
+ - Entity watchdog now remembers processed task-log text during the process lifetime so unchanged logs do not trigger repeated structured LLM extraction on every watchdog interval.
13
+
3
14
  ## [0.4.24] - 2026-05-07
4
15
 
5
16
  ### Fixed
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'digest'
3
4
  require 'legion/extensions/actors/every'
4
5
  require_relative '../runners/knowledge'
5
6
  require_relative '../runners/entity_extractor'
@@ -40,6 +41,8 @@ module Legion
40
41
 
41
42
  ingested = 0
42
43
  texts.each do |text|
44
+ next if task_log_text_processed?(text)
45
+
43
46
  result = extract_entities(
44
47
  text: text,
45
48
  entity_types: entity_types,
@@ -47,6 +50,7 @@ module Legion
47
50
  )
48
51
  next unless result[:success]
49
52
 
53
+ mark_task_log_text_processed(text) unless result[:source] == :unavailable
50
54
  result[:entities].each do |entity|
51
55
  next if entity_exists_in_apollo?(entity)
52
56
 
@@ -124,6 +128,28 @@ module Legion
124
128
  def dedup_similarity_threshold
125
129
  settings[:entity_watchdog][:dedup_threshold].to_f
126
130
  end
131
+
132
+ def task_log_text_processed?(text)
133
+ processed_task_log_hashes.key?(task_log_text_hash(text))
134
+ end
135
+
136
+ def mark_task_log_text_processed(text)
137
+ hashes = processed_task_log_hashes
138
+ hashes[task_log_text_hash(text)] = true
139
+ hashes.shift while hashes.size > processed_task_log_hash_limit
140
+ end
141
+
142
+ def processed_task_log_hashes
143
+ @processed_task_log_hashes ||= {}
144
+ end
145
+
146
+ def processed_task_log_hash_limit
147
+ [settings[:entity_watchdog][:log_limit].to_i * 4, 100].max
148
+ end
149
+
150
+ def task_log_text_hash(text)
151
+ Digest::SHA256.hexdigest(text.to_s)
152
+ end
127
153
  end
128
154
  end
129
155
  end
@@ -469,6 +469,22 @@ module Legion
469
469
  )
470
470
  log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
471
471
  new_entry.id
472
+ rescue Sequel::UniqueConstraintViolation => e
473
+ # Race condition: another thread/process inserted the same content_hash between our
474
+ # dedup check and this insert. Fetch and return the winner's id so the caller can
475
+ # continue normally (access log, contradiction detection, etc.).
476
+ winner = Helpers::DataModels.apollo_entry
477
+ .where(content_hash: content_hash)
478
+ .exclude(status: 'archived')
479
+ .first
480
+ if winner
481
+ log.warn("Apollo Knowledge.create_candidate_entry race_dedup entry_id=#{winner.id} content_hash=#{content_hash} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
482
+ winner.id
483
+ else
484
+ handle_exception(e, level: :warn, handled: true, operation: 'apollo.knowledge.create_candidate_entry',
485
+ content_hash: content_hash)
486
+ nil
487
+ end
472
488
  end
473
489
 
474
490
  def browse_query?(query)
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.4.24'
6
+ VERSION = '0.4.26'
7
7
  end
8
8
  end
9
9
  end
@@ -67,6 +67,13 @@ RSpec.describe Legion::Extensions::Apollo::Actor::EntityWatchdog do
67
67
  expect(actor).to have_received(:publish_entity_ingest).once
68
68
  end
69
69
 
70
+ it 'does not extract entities from the same task log text twice' do
71
+ actor.scan_and_ingest
72
+ actor.scan_and_ingest
73
+
74
+ expect(actor).to have_received(:extract_entities).once
75
+ end
76
+
70
77
  context 'when entity already exists in Apollo (high similarity)' do
71
78
  let(:existing_match) do
72
79
  { success: true, entries: [{ id: 42, content: 'lex-synapse', distance: 0.02 }], count: 1 }
@@ -321,6 +321,30 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
321
321
  expect(result[:deduped]).to be true
322
322
  expect(result[:entry_id]).to eq('uuid-existing')
323
323
  end
324
+
325
+ it 'recovers gracefully when a concurrent ingest wins the content_hash unique constraint race' do
326
+ # Simulate: dedup check passes (nil — no existing entry yet), then .create
327
+ # raises UniqueConstraintViolation (another thread inserted between check and insert).
328
+ # create_candidate_entry must rescue and return the existing entry's id so the
329
+ # caller succeeds rather than propagating a database error.
330
+ race_entry = double('race_entry', id: 'uuid-race-winner')
331
+
332
+ allow(mock_entry_class).to receive(:create)
333
+ .and_raise(Sequel::UniqueConstraintViolation, 'duplicate key value violates unique constraint "idx_apollo_content_hash"')
334
+
335
+ collision_dataset = double('collision_dataset')
336
+ allow(mock_entry_class).to receive(:where).with(content_hash: anything).and_return(collision_dataset)
337
+ allow(collision_dataset).to receive(:exclude).with(status: 'archived').and_return(collision_dataset)
338
+ # First call: dedup pre-check returns nil (not yet in DB).
339
+ # Second call: post-collision lookup returns the winner inserted by the other thread.
340
+ allow(collision_dataset).to receive(:first).and_return(nil, race_entry)
341
+
342
+ result = host.handle_ingest(content: 'concurrent content', content_type: 'fact',
343
+ source_agent: 'agent-1',
344
+ content_hash: 'd3861b2862454c5a6a9e480829333841')
345
+ expect(result[:success]).to be true
346
+ expect(result[:entry_id]).to eq('uuid-race-winner')
347
+ end
324
348
  end
325
349
  end
326
350
 
data/spec/spec_helper.rb CHANGED
@@ -13,6 +13,7 @@ require 'legion/transport'
13
13
  unless defined?(Sequel)
14
14
  module Sequel
15
15
  class Error < StandardError; end
16
+ class UniqueConstraintViolation < Error; end
16
17
 
17
18
  def self.pg_array(arr) = arr
18
19
  def self.lit(str, *) = str
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.24
4
+ version: 0.4.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity