lex-apollo 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +26 -24
- data/lib/legion/extensions/apollo/actors/entity_watchdog.rb +143 -0
- data/lib/legion/extensions/apollo/helpers/confidence.rb +10 -5
- data/lib/legion/extensions/apollo/runners/entity_extractor.rb +76 -0
- data/lib/legion/extensions/apollo/runners/knowledge.rb +26 -10
- data/lib/legion/extensions/apollo/runners/maintenance.rb +12 -1
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/lib/legion/extensions/apollo.rb +1 -0
- data/spec/legion/extensions/apollo/actors/entity_watchdog_spec.rb +127 -0
- data/spec/legion/extensions/apollo/helpers/confidence_spec.rb +21 -8
- data/spec/legion/extensions/apollo/runners/decay_cycle_spec.rb +3 -2
- data/spec/legion/extensions/apollo/runners/entity_extractor_spec.rb +124 -0
- metadata +5 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e5f9f60cb9a67bc5d4cd35af051b1abe3251787d0da55ac092c1898942bd2144
|
|
4
|
+
data.tar.gz: 8b0cd86443469ae7a0d5d857a127f855cd0e68f8d967c94a727e8f166372e483
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3f239c19c6212f7c142aec1e0fe34aed651928f5a4c37acfcf54152d6719594f94b6ddb037bf48666eb93f9c747d341a477039e320e2e74fef17a8188519e0a9
|
|
7
|
+
data.tar.gz: 6227ef5fffa37ad570940591d390ac594d71f10a49de1e75a6ce568b075ac6c0293ba13478130eebbc366c6195b383327dab2b7400398bc2fe6f1f274f4b61ca
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,30 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.3.3] - 2026-03-20
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `Runners::EntityExtractor`: LLM-backed structured extraction of people, services, repositories, and concepts from arbitrary text
|
|
7
|
+
- `Actors::EntityWatchdog`: interval actor (120s) that reads recent task logs, extracts entities, deduplicates against Apollo, and publishes ingest messages for net-new entities
|
|
8
|
+
- Settings support: `apollo.entity_watchdog.types`, `apollo.entity_watchdog.min_confidence`, `apollo.entity_watchdog.dedup_threshold`
|
|
9
|
+
- Fallback behavior when `Legion::LLM` is unavailable (returns empty entity list, no error)
|
|
10
|
+
|
|
11
|
+
## [0.3.2] - 2026-03-20
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- Replace exponential confidence decay (`confidence * 0.998`) with power-law decay
|
|
15
|
+
(`confidence / (1 + alpha)` per tick, where `alpha` defaults to 0.1)
|
|
16
|
+
- Configurable via `apollo.power_law_alpha` setting (default: 0.1)
|
|
17
|
+
- Source diversity enforcement in corroboration: same-source corroboration (matching
|
|
18
|
+
`source_provider`) receives 50% boost weight instead of full weight
|
|
19
|
+
- `check_corroboration` skips auto-promotion when both candidate and match have
|
|
20
|
+
the same known `source_provider` (correlated error prevention)
|
|
21
|
+
- `apply_corroboration_boost` accepts optional `weight:` kwarg (default: 1.0)
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- `source_provider` field populated on ingest via explicit kwarg or agent name inference
|
|
25
|
+
- `handle_ingest` accepts `source_provider:` kwarg; derives provider from agent name
|
|
26
|
+
convention when not explicitly provided
|
|
27
|
+
|
|
3
28
|
## [0.3.1] - 2026-03-17
|
|
4
29
|
|
|
5
30
|
### Added
|
data/README.md
CHANGED
|
@@ -26,40 +26,41 @@ gem 'lex-apollo'
|
|
|
26
26
|
```ruby
|
|
27
27
|
require 'legion/extensions/apollo'
|
|
28
28
|
|
|
29
|
-
client = Legion::Extensions::Apollo::Client.new
|
|
29
|
+
client = Legion::Extensions::Apollo::Client.new
|
|
30
30
|
|
|
31
|
-
#
|
|
31
|
+
# Build a store payload (published to RabbitMQ for the Apollo service to persist)
|
|
32
32
|
client.store_knowledge(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
content: 'BGP route reflectors reduce full-mesh IBGP complexity',
|
|
34
|
+
content_type: :fact,
|
|
35
|
+
source_agent: 'my-agent-001',
|
|
36
|
+
tags: ['bgp', 'routing', 'ibgp'],
|
|
37
|
+
context: { source: 'network_team_wiki' }
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
#
|
|
40
|
+
# Build a query payload
|
|
41
41
|
client.query_knowledge(
|
|
42
|
-
query:
|
|
43
|
-
domain: 'networking',
|
|
42
|
+
query: 'BGP route reflector configuration',
|
|
44
43
|
min_confidence: 0.6,
|
|
45
|
-
limit:
|
|
44
|
+
limit: 10
|
|
46
45
|
)
|
|
47
46
|
|
|
48
47
|
# Get related entries (concept graph traversal)
|
|
49
|
-
client.related_entries(entry_id: 'entry-uuid',
|
|
48
|
+
client.related_entries(entry_id: 'entry-uuid', depth: 2)
|
|
50
49
|
|
|
51
50
|
# Deprecate a stale entry
|
|
52
51
|
client.deprecate_entry(entry_id: 'entry-uuid', reason: 'superseded by RFC 7938')
|
|
53
52
|
```
|
|
54
53
|
|
|
54
|
+
Content types: `:fact`, `:concept`, `:procedure`, `:association`, `:observation`
|
|
55
|
+
|
|
55
56
|
### Expertise Queries
|
|
56
57
|
|
|
57
58
|
```ruby
|
|
58
59
|
# Get proficiency scores for a domain
|
|
59
|
-
client.get_expertise(domain: 'networking',
|
|
60
|
+
client.get_expertise(domain: 'networking', min_proficiency: 0.3)
|
|
60
61
|
|
|
61
|
-
# Find domains where
|
|
62
|
-
client.domains_at_risk(
|
|
62
|
+
# Find domains where coverage is thin (below min agent count)
|
|
63
|
+
client.domains_at_risk(min_agents: 2)
|
|
63
64
|
|
|
64
65
|
# Full agent knowledge profile
|
|
65
66
|
client.agent_profile(agent_id: 'my-agent-001')
|
|
@@ -68,11 +69,11 @@ client.agent_profile(agent_id: 'my-agent-001')
|
|
|
68
69
|
### Maintenance
|
|
69
70
|
|
|
70
71
|
```ruby
|
|
71
|
-
# Force confidence decay cycle
|
|
72
|
-
client.force_decay(
|
|
72
|
+
# Force confidence decay cycle (factor multiplied against each entry's confidence)
|
|
73
|
+
client.force_decay(factor: 0.5)
|
|
73
74
|
|
|
74
|
-
# Archive entries
|
|
75
|
-
client.archive_stale(
|
|
75
|
+
# Archive entries older than N days
|
|
76
|
+
client.archive_stale(days: 90)
|
|
76
77
|
|
|
77
78
|
# Resolve a corroboration dispute
|
|
78
79
|
client.resolve_dispute(entry_id: 'entry-uuid', resolution: :accept)
|
|
@@ -104,12 +105,13 @@ Apollo is wired into the GAIA tick cycle at the `knowledge_retrieval` phase (pha
|
|
|
104
105
|
|
|
105
106
|
Entries have a confidence score between 0.0 and 1.0:
|
|
106
107
|
|
|
107
|
-
- New entries start at
|
|
108
|
-
- Corroboration from
|
|
109
|
-
-
|
|
110
|
-
- Confidence decays hourly; entries below
|
|
108
|
+
- New entries start at `INITIAL_CONFIDENCE` (0.5) with status `candidate`
|
|
109
|
+
- Corroboration from a semantically similar entry (cosine > 0.9) boosts confidence by 0.3 and promotes to `confirmed`
|
|
110
|
+
- Each retrieval adds a small boost (+0.02, capped at 1.0)
|
|
111
|
+
- Confidence decays hourly by factor 0.998; entries below 0.1 are archived
|
|
112
|
+
- The GAIA write gate (`meets_write_gate?`) requires confidence > 0.6 and novelty > 0.3 for the tick write-back path
|
|
111
113
|
|
|
112
|
-
See `helpers/confidence.rb` for
|
|
114
|
+
See `helpers/confidence.rb` for all constants and math helpers.
|
|
113
115
|
|
|
114
116
|
## Requirements
|
|
115
117
|
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/actors/every'
|
|
4
|
+
require_relative '../runners/knowledge'
|
|
5
|
+
require_relative '../runners/entity_extractor'
|
|
6
|
+
|
|
7
|
+
module Legion
|
|
8
|
+
module Extensions
|
|
9
|
+
module Apollo
|
|
10
|
+
module Actor
|
|
11
|
+
class EntityWatchdog < Legion::Extensions::Actors::Every
|
|
12
|
+
include Legion::Extensions::Apollo::Runners::Knowledge
|
|
13
|
+
include Legion::Extensions::Apollo::Runners::EntityExtractor
|
|
14
|
+
|
|
15
|
+
DEDUP_THRESHOLD_DEFAULT = 0.92
|
|
16
|
+
TASK_LOG_LOOKBACK_SECONDS = 300
|
|
17
|
+
TASK_LOG_LIMIT = 50
|
|
18
|
+
|
|
19
|
+
def runner_class = Legion::Extensions::Apollo::Runners::EntityExtractor
|
|
20
|
+
def runner_function = 'scan_and_ingest'
|
|
21
|
+
def time = 120
|
|
22
|
+
def run_now? = false
|
|
23
|
+
def use_runner? = false
|
|
24
|
+
def check_subtask? = false
|
|
25
|
+
def generate_task? = false
|
|
26
|
+
|
|
27
|
+
def enabled?
|
|
28
|
+
defined?(Legion::Extensions::Apollo::Runners::EntityExtractor) &&
|
|
29
|
+
defined?(Legion::Transport)
|
|
30
|
+
rescue StandardError
|
|
31
|
+
false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def scan_and_ingest
|
|
35
|
+
texts = recent_task_log_texts
|
|
36
|
+
return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
|
|
37
|
+
|
|
38
|
+
ingested = 0
|
|
39
|
+
texts.each do |text|
|
|
40
|
+
result = extract_entities(
|
|
41
|
+
text: text,
|
|
42
|
+
entity_types: entity_types,
|
|
43
|
+
min_confidence: min_entity_confidence
|
|
44
|
+
)
|
|
45
|
+
next unless result[:success]
|
|
46
|
+
|
|
47
|
+
result[:entities].each do |entity|
|
|
48
|
+
next if entity_exists_in_apollo?(entity)
|
|
49
|
+
|
|
50
|
+
publish_entity_ingest(entity)
|
|
51
|
+
ingested += 1
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
log_debug("EntityWatchdog: ingested #{ingested} new entities from #{texts.size} log entries")
|
|
56
|
+
{ success: true, ingested: ingested, logs_scanned: texts.size }
|
|
57
|
+
rescue StandardError => e
|
|
58
|
+
log_error("EntityWatchdog scan_and_ingest failed: #{e.message}")
|
|
59
|
+
{ success: false, error: e.message }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def recent_task_log_texts
|
|
63
|
+
return [] unless defined?(Legion::Data) && defined?(Legion::Data::Model::TaskLog)
|
|
64
|
+
|
|
65
|
+
cutoff = Time.now - TASK_LOG_LOOKBACK_SECONDS
|
|
66
|
+
logs = Legion::Data::Model::TaskLog
|
|
67
|
+
.where { created_at >= cutoff }
|
|
68
|
+
.order(Sequel.desc(:created_at))
|
|
69
|
+
.limit(TASK_LOG_LIMIT)
|
|
70
|
+
.select_map(:message)
|
|
71
|
+
logs.map(&:to_s).reject(&:empty?).uniq
|
|
72
|
+
rescue StandardError
|
|
73
|
+
[]
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def entity_exists_in_apollo?(entity)
|
|
77
|
+
result = retrieve_relevant(
|
|
78
|
+
query: entity[:name].to_s,
|
|
79
|
+
limit: 1,
|
|
80
|
+
min_confidence: 0.1,
|
|
81
|
+
tags: [entity[:type].to_s]
|
|
82
|
+
)
|
|
83
|
+
return false unless result[:success] && result[:count].positive?
|
|
84
|
+
|
|
85
|
+
closest = result[:entries].first
|
|
86
|
+
distance = closest[:distance].to_f
|
|
87
|
+
distance <= (1.0 - dedup_similarity_threshold)
|
|
88
|
+
rescue StandardError
|
|
89
|
+
false
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def publish_entity_ingest(entity)
|
|
93
|
+
return unless defined?(Legion::Extensions::Apollo::Transport::Messages::Ingest)
|
|
94
|
+
|
|
95
|
+
Legion::Extensions::Apollo::Transport::Messages::Ingest.new(
|
|
96
|
+
content: "#{entity[:type].to_s.capitalize}: #{entity[:name]}",
|
|
97
|
+
content_type: 'concept',
|
|
98
|
+
tags: [entity[:type].to_s, 'entity_watchdog'],
|
|
99
|
+
source_agent: 'lex-apollo:entity_watchdog',
|
|
100
|
+
context: { entity_type: entity[:type], original_name: entity[:name] }
|
|
101
|
+
).publish
|
|
102
|
+
rescue StandardError => e
|
|
103
|
+
log_error("EntityWatchdog publish failed: #{e.message}")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def entity_types
|
|
107
|
+
if defined?(Legion::Settings)
|
|
108
|
+
types = Legion::Settings.dig(:apollo, :entity_watchdog, :types)
|
|
109
|
+
return Array(types).map(&:to_s) if types
|
|
110
|
+
end
|
|
111
|
+
%w[person service repository concept]
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def min_entity_confidence
|
|
115
|
+
if defined?(Legion::Settings)
|
|
116
|
+
val = Legion::Settings.dig(:apollo, :entity_watchdog, :min_confidence)
|
|
117
|
+
return val.to_f if val
|
|
118
|
+
end
|
|
119
|
+
0.7
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def dedup_similarity_threshold
|
|
123
|
+
if defined?(Legion::Settings)
|
|
124
|
+
val = Legion::Settings.dig(:apollo, :entity_watchdog, :dedup_threshold)
|
|
125
|
+
return val.to_f if val
|
|
126
|
+
end
|
|
127
|
+
DEDUP_THRESHOLD_DEFAULT
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private
|
|
131
|
+
|
|
132
|
+
def log_debug(message)
|
|
133
|
+
Legion::Logging.debug(message) if defined?(Legion::Logging)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def log_error(message)
|
|
137
|
+
Legion::Logging.error(message) if defined?(Legion::Logging)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -8,7 +8,7 @@ module Legion
|
|
|
8
8
|
INITIAL_CONFIDENCE = 0.5
|
|
9
9
|
CORROBORATION_BOOST = 0.3
|
|
10
10
|
RETRIEVAL_BOOST = 0.02
|
|
11
|
-
|
|
11
|
+
POWER_LAW_ALPHA = 0.1
|
|
12
12
|
DECAY_THRESHOLD = 0.1
|
|
13
13
|
CORROBORATION_SIMILARITY_THRESHOLD = 0.9
|
|
14
14
|
WRITE_CONFIDENCE_GATE = 0.6
|
|
@@ -20,16 +20,21 @@ module Legion
|
|
|
20
20
|
|
|
21
21
|
module_function
|
|
22
22
|
|
|
23
|
-
def apply_decay(confidence:,
|
|
24
|
-
|
|
23
|
+
def apply_decay(confidence:, age_hours: nil, alpha: POWER_LAW_ALPHA, **)
|
|
24
|
+
if age_hours
|
|
25
|
+
[confidence * ((age_hours.clamp(0, Float::INFINITY) + 2.0)**(-alpha)) / ((age_hours.clamp(0, Float::INFINITY) + 1.0)**(-alpha)), 0.0].max
|
|
26
|
+
else
|
|
27
|
+
factor = 1.0 / (1.0 + alpha)
|
|
28
|
+
[confidence * factor, 0.0].max
|
|
29
|
+
end
|
|
25
30
|
end
|
|
26
31
|
|
|
27
32
|
def apply_retrieval_boost(confidence:, **)
|
|
28
33
|
[confidence + RETRIEVAL_BOOST, 1.0].min
|
|
29
34
|
end
|
|
30
35
|
|
|
31
|
-
def apply_corroboration_boost(confidence:, **)
|
|
32
|
-
[confidence + CORROBORATION_BOOST, 1.0].min
|
|
36
|
+
def apply_corroboration_boost(confidence:, weight: 1.0, **)
|
|
37
|
+
[confidence + (CORROBORATION_BOOST * weight), 1.0].min
|
|
33
38
|
end
|
|
34
39
|
|
|
35
40
|
def decayed?(confidence:, **)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Apollo
|
|
6
|
+
module Runners
|
|
7
|
+
module EntityExtractor
|
|
8
|
+
DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
|
|
9
|
+
DEFAULT_MIN_CONFIDENCE = 0.7
|
|
10
|
+
|
|
11
|
+
def extract_entities(text:, entity_types: nil, min_confidence: DEFAULT_MIN_CONFIDENCE, **)
|
|
12
|
+
return { success: true, entities: [], source: :empty } if text.to_s.strip.empty?
|
|
13
|
+
|
|
14
|
+
return { success: true, entities: [], source: :unavailable } unless defined?(Legion::LLM) && Legion::LLM.started?
|
|
15
|
+
|
|
16
|
+
types = Array(entity_types).map(&:to_s)
|
|
17
|
+
types = DEFAULT_ENTITY_TYPES if types.empty?
|
|
18
|
+
|
|
19
|
+
result = Legion::LLM.structured(
|
|
20
|
+
messages: [
|
|
21
|
+
{ role: 'user', content: entity_extraction_prompt(text: text, entity_types: types) }
|
|
22
|
+
],
|
|
23
|
+
schema: entity_schema
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
raw_entities = result.dig(:data, :entities) || []
|
|
27
|
+
filtered = raw_entities.select do |entity|
|
|
28
|
+
(entity[:confidence] || 0.0) >= min_confidence &&
|
|
29
|
+
(types.empty? || types.include?(entity[:type].to_s))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
{ success: true, entities: filtered, source: :llm }
|
|
33
|
+
rescue StandardError => e
|
|
34
|
+
{ success: false, entities: [], error: e.message, source: :error }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def entity_extraction_prompt(text:, entity_types:, **)
|
|
38
|
+
type_list = Array(entity_types).join(', ')
|
|
39
|
+
<<~PROMPT.strip
|
|
40
|
+
Extract named entities from the following text. Return only entities of these types: #{type_list}.
|
|
41
|
+
|
|
42
|
+
For each entity provide:
|
|
43
|
+
- name: the canonical name as it appears (string)
|
|
44
|
+
- type: one of #{type_list} (string)
|
|
45
|
+
- confidence: your confidence this is a real entity of that type (float 0.0-1.0)
|
|
46
|
+
|
|
47
|
+
Text:
|
|
48
|
+
#{text}
|
|
49
|
+
PROMPT
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def entity_schema
|
|
53
|
+
{
|
|
54
|
+
type: 'object',
|
|
55
|
+
properties: {
|
|
56
|
+
entities: {
|
|
57
|
+
type: 'array',
|
|
58
|
+
items: {
|
|
59
|
+
type: 'object',
|
|
60
|
+
properties: {
|
|
61
|
+
name: { type: 'string' },
|
|
62
|
+
type: { type: 'string' },
|
|
63
|
+
confidence: { type: 'number' }
|
|
64
|
+
},
|
|
65
|
+
required: %w[name type confidence]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
required: ['entities']
|
|
70
|
+
}
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -53,7 +53,7 @@ module Legion
|
|
|
53
53
|
}
|
|
54
54
|
end
|
|
55
55
|
|
|
56
|
-
def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', context: {}, **)
|
|
56
|
+
def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists
|
|
57
57
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
58
58
|
|
|
59
59
|
embedding = Helpers::Embedding.generate(text: content)
|
|
@@ -64,14 +64,15 @@ module Legion
|
|
|
64
64
|
|
|
65
65
|
unless corroborated
|
|
66
66
|
new_entry = Legion::Data::Model::ApolloEntry.create(
|
|
67
|
-
content:
|
|
68
|
-
content_type:
|
|
69
|
-
confidence:
|
|
70
|
-
source_agent:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
67
|
+
content: content,
|
|
68
|
+
content_type: content_type_sym,
|
|
69
|
+
confidence: Helpers::Confidence::INITIAL_CONFIDENCE,
|
|
70
|
+
source_agent: source_agent,
|
|
71
|
+
source_provider: source_provider || derive_provider_from_agent(source_agent),
|
|
72
|
+
source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
|
|
73
|
+
tags: Sequel.pg_array(tag_array),
|
|
74
|
+
status: 'candidate',
|
|
75
|
+
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
75
76
|
)
|
|
76
77
|
existing_id = new_entry.id
|
|
77
78
|
end
|
|
@@ -220,8 +221,9 @@ module Legion
|
|
|
220
221
|
sim = Helpers::Similarity.cosine_similarity(vec_a: embedding, vec_b: entry.embedding)
|
|
221
222
|
next unless Helpers::Similarity.above_corroboration_threshold?(similarity: sim)
|
|
222
223
|
|
|
224
|
+
weight = same_source_provider?(source_agent, entry) ? 0.5 : 1.0
|
|
223
225
|
entry.update(
|
|
224
|
-
confidence: Helpers::Confidence.apply_corroboration_boost(confidence: entry.confidence),
|
|
226
|
+
confidence: Helpers::Confidence.apply_corroboration_boost(confidence: entry.confidence, weight: weight),
|
|
225
227
|
updated_at: Time.now
|
|
226
228
|
)
|
|
227
229
|
Legion::Data::Model::ApolloRelation.create(
|
|
@@ -237,6 +239,20 @@ module Legion
|
|
|
237
239
|
[false, nil]
|
|
238
240
|
end
|
|
239
241
|
|
|
242
|
+
def same_source_provider?(submitting_agent, entry)
|
|
243
|
+
stored = entry.respond_to?(:source_provider) ? entry.source_provider : nil
|
|
244
|
+
return false if stored.nil? || stored.to_s.empty? || stored.to_s == 'unknown'
|
|
245
|
+
|
|
246
|
+
derive_provider_from_agent(submitting_agent) == stored.to_s
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def derive_provider_from_agent(source_agent)
|
|
250
|
+
return 'unknown' if source_agent.nil? || source_agent == 'unknown'
|
|
251
|
+
|
|
252
|
+
provider = source_agent.to_s.split(/[-_]/).first.downcase
|
|
253
|
+
%w[claude openai gemini human system].include?(provider) ? provider : 'unknown'
|
|
254
|
+
end
|
|
255
|
+
|
|
240
256
|
def upsert_expertise(source_agent:, domain:)
|
|
241
257
|
expertise = Legion::Data::Model::ApolloExpertise
|
|
242
258
|
.where(agent_id: source_agent, domain: domain).first
|
|
@@ -62,6 +62,11 @@ module Legion
|
|
|
62
62
|
|
|
63
63
|
next unless match
|
|
64
64
|
|
|
65
|
+
candidate_provider = candidate.respond_to?(:source_provider) ? candidate.source_provider : nil
|
|
66
|
+
match_provider = match.respond_to?(:source_provider) ? match.source_provider : nil
|
|
67
|
+
both_known = known_provider?(candidate_provider) && known_provider?(match_provider)
|
|
68
|
+
next if both_known && candidate_provider == match_provider
|
|
69
|
+
|
|
65
70
|
candidate.update(
|
|
66
71
|
status: 'confirmed',
|
|
67
72
|
confirmed_at: Time.now,
|
|
@@ -88,7 +93,13 @@ module Legion
|
|
|
88
93
|
private
|
|
89
94
|
|
|
90
95
|
def decay_rate
|
|
91
|
-
(defined?(Legion::Settings) && Legion::Settings.dig(:apollo, :
|
|
96
|
+
alpha = (defined?(Legion::Settings) && Legion::Settings.dig(:apollo, :power_law_alpha)) ||
|
|
97
|
+
Helpers::Confidence::POWER_LAW_ALPHA
|
|
98
|
+
1.0 / (1.0 + alpha)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def known_provider?(provider)
|
|
102
|
+
!provider.nil? && !provider.to_s.empty? && provider.to_s != 'unknown'
|
|
92
103
|
end
|
|
93
104
|
|
|
94
105
|
def decay_threshold
|
|
@@ -7,6 +7,7 @@ require 'legion/extensions/apollo/helpers/graph_query'
|
|
|
7
7
|
require 'legion/extensions/apollo/runners/knowledge'
|
|
8
8
|
require 'legion/extensions/apollo/runners/expertise'
|
|
9
9
|
require 'legion/extensions/apollo/runners/maintenance'
|
|
10
|
+
require 'legion/extensions/apollo/runners/entity_extractor'
|
|
10
11
|
|
|
11
12
|
if defined?(Legion::Transport)
|
|
12
13
|
require 'legion/extensions/apollo/transport/exchanges/apollo'
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
unless defined?(Legion::Extensions::Actors::Every)
|
|
6
|
+
module Legion
|
|
7
|
+
module Extensions
|
|
8
|
+
module Actors
|
|
9
|
+
class Every
|
|
10
|
+
def initialize(**_opts); end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
$LOADED_FEATURES << 'legion/extensions/actors/every' unless $LOADED_FEATURES.include?('legion/extensions/actors/every')
|
|
17
|
+
|
|
18
|
+
require 'legion/extensions/apollo/runners/knowledge'
|
|
19
|
+
require 'legion/extensions/apollo/runners/entity_extractor'
|
|
20
|
+
require 'legion/extensions/apollo/actors/entity_watchdog'
|
|
21
|
+
|
|
22
|
+
RSpec.describe Legion::Extensions::Apollo::Actor::EntityWatchdog do
|
|
23
|
+
subject(:actor) { described_class.allocate }
|
|
24
|
+
|
|
25
|
+
describe 'actor configuration' do
|
|
26
|
+
it 'uses EntityExtractor as runner_class' do
|
|
27
|
+
expect(actor.runner_class).to eq(Legion::Extensions::Apollo::Runners::EntityExtractor)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'runs scan_and_ingest function' do
|
|
31
|
+
expect(actor.runner_function).to eq('scan_and_ingest')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'runs every 120 seconds' do
|
|
35
|
+
expect(actor.time).to eq(120)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it 'does not run immediately' do
|
|
39
|
+
expect(actor.run_now?).to be false
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'does not use the runner framework (calls manual directly)' do
|
|
43
|
+
expect(actor.use_runner?).to be false
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it 'does not generate tasks' do
|
|
47
|
+
expect(actor.generate_task?).to be false
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
describe '#scan_and_ingest' do
|
|
52
|
+
let(:entities) { [{ name: 'lex-synapse', type: 'repository', confidence: 0.95 }] }
|
|
53
|
+
let(:no_match) { { success: true, entries: [], count: 0 } }
|
|
54
|
+
|
|
55
|
+
before do
|
|
56
|
+
allow(actor).to receive(:recent_task_log_texts).and_return(['deploying lex-synapse to nomad'])
|
|
57
|
+
allow(actor).to receive(:extract_entities).with(text: 'deploying lex-synapse to nomad',
|
|
58
|
+
entity_types: anything,
|
|
59
|
+
min_confidence: anything)
|
|
60
|
+
.and_return({ success: true, entities: entities, source: :llm })
|
|
61
|
+
allow(actor).to receive(:retrieve_relevant).and_return(no_match)
|
|
62
|
+
allow(actor).to receive(:publish_entity_ingest)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it 'calls publish_entity_ingest for new entities' do
|
|
66
|
+
actor.scan_and_ingest
|
|
67
|
+
expect(actor).to have_received(:publish_entity_ingest).once
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
context 'when entity already exists in Apollo (high similarity)' do
|
|
71
|
+
let(:existing_match) do
|
|
72
|
+
{ success: true, entries: [{ id: 42, content: 'lex-synapse', distance: 0.02 }], count: 1 }
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
before { allow(actor).to receive(:retrieve_relevant).and_return(existing_match) }
|
|
76
|
+
|
|
77
|
+
it 'does not publish for duplicate entities' do
|
|
78
|
+
actor.scan_and_ingest
|
|
79
|
+
expect(actor).not_to have_received(:publish_entity_ingest)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
context 'when LLM extraction returns nothing' do
|
|
84
|
+
before do
|
|
85
|
+
allow(actor).to receive(:extract_entities).and_return({ success: true, entities: [], source: :unavailable })
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it 'does not publish anything' do
|
|
89
|
+
actor.scan_and_ingest
|
|
90
|
+
expect(actor).not_to have_received(:publish_entity_ingest)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
context 'when data layer is unavailable' do
|
|
95
|
+
before { allow(actor).to receive(:recent_task_log_texts).and_return([]) }
|
|
96
|
+
|
|
97
|
+
it 'returns early without calling extract_entities' do
|
|
98
|
+
expect(actor).not_to receive(:extract_entities)
|
|
99
|
+
actor.scan_and_ingest
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
describe '#entity_types' do
|
|
105
|
+
it 'returns the default list when settings are absent' do
|
|
106
|
+
expect(actor.entity_types).to eq(%w[person service repository concept])
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
describe '#dedup_similarity_threshold' do
|
|
111
|
+
it 'returns a float between 0 and 1' do
|
|
112
|
+
threshold = actor.dedup_similarity_threshold
|
|
113
|
+
expect(threshold).to be_a(Float)
|
|
114
|
+
expect(threshold).to be_between(0.0, 1.0)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
describe '#recent_task_log_texts' do
|
|
119
|
+
context 'when legion-data is not available' do
|
|
120
|
+
before { hide_const('Legion::Data') if defined?(Legion::Data) }
|
|
121
|
+
|
|
122
|
+
it 'returns an empty array' do
|
|
123
|
+
expect(actor.recent_task_log_texts).to eq([])
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -17,8 +17,8 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Confidence do
|
|
|
17
17
|
expect(described_class::RETRIEVAL_BOOST).to eq(0.02)
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
it 'defines
|
|
21
|
-
expect(described_class::
|
|
20
|
+
it 'defines POWER_LAW_ALPHA' do
|
|
21
|
+
expect(described_class::POWER_LAW_ALPHA).to eq(0.1)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
it 'defines DECAY_THRESHOLD' do
|
|
@@ -43,20 +43,28 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Confidence do
|
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
describe '.apply_decay' do
|
|
46
|
-
it '
|
|
46
|
+
it 'applies power-law decay with default alpha when no age given' do
|
|
47
47
|
result = described_class.apply_decay(confidence: 1.0)
|
|
48
|
-
|
|
48
|
+
expected = 1.0 / (1.0 + 0.1) # ~0.909091
|
|
49
|
+
expect(result).to be_within(0.0001).of(expected)
|
|
49
50
|
end
|
|
50
51
|
|
|
51
|
-
it '
|
|
52
|
-
result = described_class.apply_decay(confidence: 1.0,
|
|
53
|
-
expect(result).to
|
|
52
|
+
it 'applies age-based power-law decay when age_hours is provided' do
|
|
53
|
+
result = described_class.apply_decay(confidence: 1.0, age_hours: 10)
|
|
54
|
+
expect(result).to be > 0.0
|
|
55
|
+
expect(result).to be < 1.0
|
|
54
56
|
end
|
|
55
57
|
|
|
56
58
|
it 'clamps to 0.0 minimum' do
|
|
57
|
-
result = described_class.apply_decay(confidence: 0.001
|
|
59
|
+
result = described_class.apply_decay(confidence: 0.001)
|
|
58
60
|
expect(result).to be >= 0.0
|
|
59
61
|
end
|
|
62
|
+
|
|
63
|
+
it 'accepts a custom alpha' do
|
|
64
|
+
result = described_class.apply_decay(confidence: 1.0, alpha: 0.5)
|
|
65
|
+
expected = 1.0 / (1.0 + 0.5) # ~0.6667
|
|
66
|
+
expect(result).to be_within(0.0001).of(expected)
|
|
67
|
+
end
|
|
60
68
|
end
|
|
61
69
|
|
|
62
70
|
describe '.apply_retrieval_boost' do
|
|
@@ -81,6 +89,11 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Confidence do
|
|
|
81
89
|
result = described_class.apply_corroboration_boost(confidence: 0.9)
|
|
82
90
|
expect(result).to eq(1.0)
|
|
83
91
|
end
|
|
92
|
+
|
|
93
|
+
it 'applies half weight for same-source corroboration' do
|
|
94
|
+
result = described_class.apply_corroboration_boost(confidence: 0.5, weight: 0.5)
|
|
95
|
+
expect(result).to eq(0.65)
|
|
96
|
+
end
|
|
84
97
|
end
|
|
85
98
|
|
|
86
99
|
describe '.decayed?' do
|
|
@@ -13,8 +13,9 @@ RSpec.describe 'Apollo Decay Cycle' do
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
describe '#decay_rate' do
|
|
16
|
-
it 'returns
|
|
17
|
-
|
|
16
|
+
it 'returns power-law derived rate when settings unavailable' do
|
|
17
|
+
expected = 1.0 / (1.0 + 0.1) # ~0.909091
|
|
18
|
+
expect(maintenance.send(:decay_rate)).to be_within(0.0001).of(expected)
|
|
18
19
|
end
|
|
19
20
|
end
|
|
20
21
|
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'legion/extensions/apollo/helpers/confidence'
|
|
6
|
+
require 'legion/extensions/apollo/helpers/similarity'
|
|
7
|
+
require 'legion/extensions/apollo/helpers/graph_query'
|
|
8
|
+
require 'legion/extensions/apollo/runners/entity_extractor'
|
|
9
|
+
|
|
10
|
+
RSpec.describe Legion::Extensions::Apollo::Runners::EntityExtractor do
|
|
11
|
+
let(:runner) do
|
|
12
|
+
obj = Object.new
|
|
13
|
+
obj.extend(described_class)
|
|
14
|
+
obj
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
describe '#extract_entities' do
|
|
18
|
+
context 'when Legion::LLM is not available' do
|
|
19
|
+
before { hide_const('Legion::LLM') if defined?(Legion::LLM) }
|
|
20
|
+
|
|
21
|
+
it 'returns an empty entity list' do
|
|
22
|
+
result = runner.extract_entities(text: 'Jane works on lex-synapse')
|
|
23
|
+
expect(result[:success]).to be true
|
|
24
|
+
expect(result[:entities]).to eq([])
|
|
25
|
+
expect(result[:source]).to eq(:unavailable)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
context 'when Legion::LLM is available' do
|
|
30
|
+
let(:llm_result) do
|
|
31
|
+
{
|
|
32
|
+
data: {
|
|
33
|
+
entities: [
|
|
34
|
+
{ name: 'lex-synapse', type: 'repository', confidence: 0.9 },
|
|
35
|
+
{ name: 'Jane Doe', type: 'person', confidence: 0.8 }
|
|
36
|
+
]
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
before do
|
|
42
|
+
stub_const('Legion::LLM', Module.new do
|
|
43
|
+
def self.started? = true
|
|
44
|
+
|
|
45
|
+
def self.structured(**_opts) = { data: { entities: [] } }
|
|
46
|
+
end)
|
|
47
|
+
allow(Legion::LLM).to receive(:structured).and_return(llm_result)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'returns extracted entities' do
|
|
51
|
+
result = runner.extract_entities(text: 'Jane works on lex-synapse')
|
|
52
|
+
expect(result[:success]).to be true
|
|
53
|
+
expect(result[:entities].size).to eq(2)
|
|
54
|
+
expect(result[:source]).to eq(:llm)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it 'filters to configured entity types' do
|
|
58
|
+
result = runner.extract_entities(
|
|
59
|
+
text: 'Jane works on lex-synapse',
|
|
60
|
+
entity_types: ['repository']
|
|
61
|
+
)
|
|
62
|
+
expect(result[:entities].all? { |e| e[:type] == 'repository' }).to be true
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it 'applies minimum confidence filter' do
|
|
66
|
+
result = runner.extract_entities(
|
|
67
|
+
text: 'Jane works on lex-synapse',
|
|
68
|
+
min_confidence: 0.85
|
|
69
|
+
)
|
|
70
|
+
expect(result[:entities].size).to eq(1)
|
|
71
|
+
expect(result[:entities].first[:name]).to eq('lex-synapse')
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
context 'when LLM raises' do
|
|
76
|
+
before do
|
|
77
|
+
stub_const('Legion::LLM', Module.new do
|
|
78
|
+
def self.started? = true
|
|
79
|
+
|
|
80
|
+
def self.structured(**_opts) = raise(StandardError, 'timeout')
|
|
81
|
+
end)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it 'returns success false with error message' do
|
|
85
|
+
result = runner.extract_entities(text: 'anything')
|
|
86
|
+
expect(result[:success]).to be false
|
|
87
|
+
expect(result[:error]).to include('timeout')
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
context 'with empty text' do
|
|
92
|
+
it 'returns early with empty list' do
|
|
93
|
+
result = runner.extract_entities(text: '')
|
|
94
|
+
expect(result[:success]).to be true
|
|
95
|
+
expect(result[:entities]).to eq([])
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it 'handles nil text' do
|
|
99
|
+
result = runner.extract_entities(text: nil)
|
|
100
|
+
expect(result[:success]).to be true
|
|
101
|
+
expect(result[:entities]).to eq([])
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
describe '#entity_extraction_prompt' do
|
|
107
|
+
it 'returns a non-empty string' do
|
|
108
|
+
prompt = runner.entity_extraction_prompt(
|
|
109
|
+
text: 'test text', entity_types: %w[person service]
|
|
110
|
+
)
|
|
111
|
+
expect(prompt).to be_a(String)
|
|
112
|
+
expect(prompt).to include('person')
|
|
113
|
+
expect(prompt).to include('service')
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
describe '#entity_schema' do
|
|
118
|
+
it 'returns a JSON Schema hash' do
|
|
119
|
+
schema = runner.entity_schema
|
|
120
|
+
expect(schema[:type]).to eq('object')
|
|
121
|
+
expect(schema[:properties]).to have_key(:entities)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-apollo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -50,6 +50,7 @@ files:
|
|
|
50
50
|
- lib/legion/extensions/apollo.rb
|
|
51
51
|
- lib/legion/extensions/apollo/actors/corroboration_checker.rb
|
|
52
52
|
- lib/legion/extensions/apollo/actors/decay.rb
|
|
53
|
+
- lib/legion/extensions/apollo/actors/entity_watchdog.rb
|
|
53
54
|
- lib/legion/extensions/apollo/actors/expertise_aggregator.rb
|
|
54
55
|
- lib/legion/extensions/apollo/actors/ingest.rb
|
|
55
56
|
- lib/legion/extensions/apollo/actors/query_responder.rb
|
|
@@ -59,6 +60,7 @@ files:
|
|
|
59
60
|
- lib/legion/extensions/apollo/helpers/embedding.rb
|
|
60
61
|
- lib/legion/extensions/apollo/helpers/graph_query.rb
|
|
61
62
|
- lib/legion/extensions/apollo/helpers/similarity.rb
|
|
63
|
+
- lib/legion/extensions/apollo/runners/entity_extractor.rb
|
|
62
64
|
- lib/legion/extensions/apollo/runners/expertise.rb
|
|
63
65
|
- lib/legion/extensions/apollo/runners/knowledge.rb
|
|
64
66
|
- lib/legion/extensions/apollo/runners/maintenance.rb
|
|
@@ -70,6 +72,7 @@ files:
|
|
|
70
72
|
- lib/legion/extensions/apollo/transport/queues/query.rb
|
|
71
73
|
- lib/legion/extensions/apollo/version.rb
|
|
72
74
|
- spec/legion/extensions/apollo/actors/decay_spec.rb
|
|
75
|
+
- spec/legion/extensions/apollo/actors/entity_watchdog_spec.rb
|
|
73
76
|
- spec/legion/extensions/apollo/actors/expertise_aggregator_spec.rb
|
|
74
77
|
- spec/legion/extensions/apollo/actors/ingest_spec.rb
|
|
75
78
|
- spec/legion/extensions/apollo/client_spec.rb
|
|
@@ -80,6 +83,7 @@ files:
|
|
|
80
83
|
- spec/legion/extensions/apollo/helpers/graph_query_spec.rb
|
|
81
84
|
- spec/legion/extensions/apollo/helpers/similarity_spec.rb
|
|
82
85
|
- spec/legion/extensions/apollo/runners/decay_cycle_spec.rb
|
|
86
|
+
- spec/legion/extensions/apollo/runners/entity_extractor_spec.rb
|
|
83
87
|
- spec/legion/extensions/apollo/runners/expertise_spec.rb
|
|
84
88
|
- spec/legion/extensions/apollo/runners/knowledge_spec.rb
|
|
85
89
|
- spec/legion/extensions/apollo/runners/maintenance_spec.rb
|