lex-apollo 0.4.24 → 0.4.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 96a0b3d72ad8895d264315b2da3710c323d31280cf08fe615f8db74c9c762e82
|
|
4
|
+
data.tar.gz: a6d938a7409cfd0f1b5f3a0dc6c7f1f3c0399dc93a8b7ba8ffcfff0c82536bc3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: efd4519deb1935da01026f3149460fafd110be233f7cdaf522c1980c4cab70a2debcb4ca4ba8a7e3b5b8be0b409d94fdf566fdba6cc8bcdcc1a3ebef879ef6fe
|
|
7
|
+
data.tar.gz: b0b446a0af1010dde65247cff125c17d086ebba9cf9bd0269575fc7d94fed6eab06a2d836ee6419d742b4fe150aa7b055deefb11e6160b30bde83f79b5c63d71
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.25] - 2026-05-08
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Entity watchdog now remembers processed task-log text during the process lifetime so unchanged logs do not trigger repeated structured LLM extraction on every watchdog interval.
|
|
7
|
+
|
|
3
8
|
## [0.4.24] - 2026-05-07
|
|
4
9
|
|
|
5
10
|
### Fixed
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'digest'
|
|
3
4
|
require 'legion/extensions/actors/every'
|
|
4
5
|
require_relative '../runners/knowledge'
|
|
5
6
|
require_relative '../runners/entity_extractor'
|
|
@@ -40,6 +41,8 @@ module Legion
|
|
|
40
41
|
|
|
41
42
|
ingested = 0
|
|
42
43
|
texts.each do |text|
|
|
44
|
+
next if task_log_text_processed?(text)
|
|
45
|
+
|
|
43
46
|
result = extract_entities(
|
|
44
47
|
text: text,
|
|
45
48
|
entity_types: entity_types,
|
|
@@ -47,6 +50,7 @@ module Legion
|
|
|
47
50
|
)
|
|
48
51
|
next unless result[:success]
|
|
49
52
|
|
|
53
|
+
mark_task_log_text_processed(text) unless result[:source] == :unavailable
|
|
50
54
|
result[:entities].each do |entity|
|
|
51
55
|
next if entity_exists_in_apollo?(entity)
|
|
52
56
|
|
|
@@ -124,6 +128,28 @@ module Legion
|
|
|
124
128
|
def dedup_similarity_threshold
|
|
125
129
|
settings[:entity_watchdog][:dedup_threshold].to_f
|
|
126
130
|
end
|
|
131
|
+
|
|
132
|
+
def task_log_text_processed?(text)
|
|
133
|
+
processed_task_log_hashes.key?(task_log_text_hash(text))
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def mark_task_log_text_processed(text)
|
|
137
|
+
hashes = processed_task_log_hashes
|
|
138
|
+
hashes[task_log_text_hash(text)] = true
|
|
139
|
+
hashes.shift while hashes.size > processed_task_log_hash_limit
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def processed_task_log_hashes
|
|
143
|
+
@processed_task_log_hashes ||= {}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def processed_task_log_hash_limit
|
|
147
|
+
[settings[:entity_watchdog][:log_limit].to_i * 4, 100].max
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def task_log_text_hash(text)
|
|
151
|
+
Digest::SHA256.hexdigest(text.to_s)
|
|
152
|
+
end
|
|
127
153
|
end
|
|
128
154
|
end
|
|
129
155
|
end
|
|
@@ -67,6 +67,13 @@ RSpec.describe Legion::Extensions::Apollo::Actor::EntityWatchdog do
|
|
|
67
67
|
expect(actor).to have_received(:publish_entity_ingest).once
|
|
68
68
|
end
|
|
69
69
|
|
|
70
|
+
it 'does not extract entities from the same task log text twice' do
|
|
71
|
+
actor.scan_and_ingest
|
|
72
|
+
actor.scan_and_ingest
|
|
73
|
+
|
|
74
|
+
expect(actor).to have_received(:extract_entities).once
|
|
75
|
+
end
|
|
76
|
+
|
|
70
77
|
context 'when entity already exists in Apollo (high similarity)' do
|
|
71
78
|
let(:existing_match) do
|
|
72
79
|
{ success: true, entries: [{ id: 42, content: 'lex-synapse', distance: 0.02 }], count: 1 }
|