lex-apollo 0.3.1 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1478924cdbe16dac455d02313dd3de7b42bb0d022da411213b537f44527d14c0
4
- data.tar.gz: 2e72cfa6dab1b790b1fe904ea23fe55ae093e0c6570a3b2790c8b7c0930b64cb
3
+ metadata.gz: e7d785a9fed9656eb22b307620a384f64e52646bca14728dc0de3ef2b1adb5bd
4
+ data.tar.gz: c64bcf1b75ee0ed43a747722a610c5cee8de823995d5f7455e31ae5c9174c56d
5
5
  SHA512:
6
- metadata.gz: a8aafbcf43d73647b48c27a8e400de209a723591b59b15f92f3144fd73ea4ff374a735cc4aa4720ffa48560461fe6951da231a0dc0114064f38d1f9b4cc170a6
7
- data.tar.gz: d5d04a370239f53a7ab891a957ace9f52b3d9a459c93494892134a4658153cc13e629de766aaf84f4598b9b0d3c2406df316869156592e778eb46ea89848ce83
6
+ metadata.gz: 0ff89d33e4993dd8c71e392202c66add619ab04045fa674b5f8f0c4d891b727ca7401865d349c68bdc3a0344e1773db4f86651ed7e96ecbcaf0098a9d05cdbc9
7
+ data.tar.gz: 29b69744b9870bee8e1fbfc030652aede63a51c3a12cd02a3bc69bfc805b6267cf8045346cb37dc2565e1e103ae6f3b559972536b02f3a62dd4b6f12223361a5
data/CHANGELOG.md CHANGED
@@ -1,5 +1,37 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.4] - 2026-03-20
4
+
5
+ ### Added
6
+ - `Helpers::EntityWatchdog`: regex-based entity detection for persons, services, repos, and configurable concepts
7
+ - GAIA `post_tick_reflection` handler for passive entity detection (enabled via `apollo.entity_watchdog.enabled`)
8
+ - Deduplication by type+value, configurable type filtering, and `link_or_create` for Apollo integration
9
+
10
+ ## [0.3.3] - 2026-03-20
11
+
12
+ ### Added
13
+ - `Runners::EntityExtractor`: LLM-backed structured extraction of people, services, repositories, and concepts from arbitrary text
14
+ - `Actors::EntityWatchdog`: interval actor (120s) that reads recent task logs, extracts entities, deduplicates against Apollo, and publishes ingest messages for net-new entities
15
+ - Settings support: `apollo.entity_watchdog.types`, `apollo.entity_watchdog.min_confidence`, `apollo.entity_watchdog.dedup_threshold`
16
+ - Fallback behavior when `Legion::LLM` is unavailable (returns empty entity list, no error)
17
+
18
+ ## [0.3.2] - 2026-03-20
19
+
20
+ ### Changed
21
+ - Replace exponential confidence decay (`confidence * 0.998`) with power-law decay
22
+ (`confidence / (1 + alpha)` per tick, where `alpha` defaults to 0.1)
23
+ - Configurable via `apollo.power_law_alpha` setting (default: 0.1)
24
+ - Source diversity enforcement in corroboration: same-source corroboration (matching
25
+ `source_provider`) receives 50% boost weight instead of full weight
26
+ - `check_corroboration` skips auto-promotion when both candidate and match have
27
+ the same known `source_provider` (correlated error prevention)
28
+ - `apply_corroboration_boost` accepts optional `weight:` kwarg (default: 1.0)
29
+
30
+ ### Added
31
+ - `source_provider` field populated on ingest via explicit kwarg or agent name inference
32
+ - `handle_ingest` accepts `source_provider:` kwarg; derives provider from agent name
33
+ convention when not explicitly provided
34
+
3
35
  ## [0.3.1] - 2026-03-17
4
36
 
5
37
  ### Added
data/README.md CHANGED
@@ -26,40 +26,41 @@ gem 'lex-apollo'
26
26
  ```ruby
27
27
  require 'legion/extensions/apollo'
28
28
 
29
- client = Legion::Extensions::Apollo::Client.new(agent_id: 'my-agent-001')
29
+ client = Legion::Extensions::Apollo::Client.new
30
30
 
31
- # Store a confirmed knowledge entry
31
+ # Build a store payload (published to RabbitMQ for the Apollo service to persist)
32
32
  client.store_knowledge(
33
- domain: 'networking',
34
- content: 'BGP route reflectors reduce full-mesh IBGP complexity',
35
- confidence: 0.9,
36
- source_agent_id: 'my-agent-001',
37
- tags: ['bgp', 'routing', 'ibgp']
33
+ content: 'BGP route reflectors reduce full-mesh IBGP complexity',
34
+ content_type: :fact,
35
+ source_agent: 'my-agent-001',
36
+ tags: ['bgp', 'routing', 'ibgp'],
37
+ context: { source: 'network_team_wiki' }
38
38
  )
39
39
 
40
- # Query for relevant knowledge
40
+ # Build a query payload
41
41
  client.query_knowledge(
42
- query: 'BGP route reflector configuration',
43
- domain: 'networking',
42
+ query: 'BGP route reflector configuration',
44
43
  min_confidence: 0.6,
45
- limit: 10
44
+ limit: 10
46
45
  )
47
46
 
48
47
  # Get related entries (concept graph traversal)
49
- client.related_entries(entry_id: 'entry-uuid', max_hops: 2)
48
+ client.related_entries(entry_id: 'entry-uuid', depth: 2)
50
49
 
51
50
  # Deprecate a stale entry
52
51
  client.deprecate_entry(entry_id: 'entry-uuid', reason: 'superseded by RFC 7938')
53
52
  ```
54
53
 
54
+ Content types: `:fact`, `:concept`, `:procedure`, `:association`, `:observation`
55
+
55
56
  ### Expertise Queries
56
57
 
57
58
  ```ruby
58
59
  # Get proficiency scores for a domain
59
- client.get_expertise(domain: 'networking', agent_id: 'my-agent-001')
60
+ client.get_expertise(domain: 'networking', min_proficiency: 0.3)
60
61
 
61
- # Find domains where knowledge coverage is thin
62
- client.domains_at_risk(min_entries: 5, min_confidence: 0.7)
62
+ # Find domains where coverage is thin (below min agent count)
63
+ client.domains_at_risk(min_agents: 2)
63
64
 
64
65
  # Full agent knowledge profile
65
66
  client.agent_profile(agent_id: 'my-agent-001')
@@ -68,11 +69,11 @@ client.agent_profile(agent_id: 'my-agent-001')
68
69
  ### Maintenance
69
70
 
70
71
  ```ruby
71
- # Force confidence decay cycle
72
- client.force_decay(domain: 'networking')
72
+ # Force confidence decay cycle (factor multiplied against each entry's confidence)
73
+ client.force_decay(factor: 0.5)
73
74
 
74
- # Archive entries below confidence threshold
75
- client.archive_stale(max_confidence: 0.2)
75
+ # Archive entries older than N days
76
+ client.archive_stale(days: 90)
76
77
 
77
78
  # Resolve a corroboration dispute
78
79
  client.resolve_dispute(entry_id: 'entry-uuid', resolution: :accept)
@@ -104,12 +105,13 @@ Apollo is wired into the GAIA tick cycle at the `knowledge_retrieval` phase (pha
104
105
 
105
106
  Entries have a confidence score between 0.0 and 1.0:
106
107
 
107
- - New entries start at the caller-supplied confidence value
108
- - Corroboration from multiple agents boosts confidence
109
- - Entries below `WRITE_GATE_THRESHOLD` are rejected on ingest
110
- - Confidence decays hourly; entries below `ARCHIVE_THRESHOLD` are archived
108
+ - New entries start at `INITIAL_CONFIDENCE` (0.5) with status `candidate`
109
+ - Corroboration from a semantically similar entry (cosine > 0.9) boosts confidence by 0.3 and promotes to `confirmed`
110
+ - Each retrieval adds a small boost (+0.02, capped at 1.0)
111
+ - Confidence decays hourly by factor 0.998; entries below 0.1 are archived
112
+ - The GAIA write gate (`meets_write_gate?`) requires confidence > 0.6 and novelty > 0.3 for the tick write-back path
111
113
 
112
- See `helpers/confidence.rb` for decay constants and boost logic.
114
+ See `helpers/confidence.rb` for all constants and math helpers.
113
115
 
114
116
  ## Requirements
115
117
 
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/every'
4
+ require_relative '../runners/knowledge'
5
+ require_relative '../runners/entity_extractor'
6
+
7
+ module Legion
8
+ module Extensions
9
+ module Apollo
10
+ module Actor
11
+ class EntityWatchdog < Legion::Extensions::Actors::Every
12
+ include Legion::Extensions::Apollo::Runners::Knowledge
13
+ include Legion::Extensions::Apollo::Runners::EntityExtractor
14
+
15
+ DEDUP_THRESHOLD_DEFAULT = 0.92
16
+ TASK_LOG_LOOKBACK_SECONDS = 300
17
+ TASK_LOG_LIMIT = 50
18
+
19
+ def runner_class = Legion::Extensions::Apollo::Runners::EntityExtractor
20
+ def runner_function = 'scan_and_ingest'
21
+ def time = 120
22
+ def run_now? = false
23
+ def use_runner? = false
24
+ def check_subtask? = false
25
+ def generate_task? = false
26
+
27
+ def enabled?
28
+ defined?(Legion::Extensions::Apollo::Runners::EntityExtractor) &&
29
+ defined?(Legion::Transport)
30
+ rescue StandardError
31
+ false
32
+ end
33
+
34
+ def scan_and_ingest
35
+ texts = recent_task_log_texts
36
+ return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
37
+
38
+ ingested = 0
39
+ texts.each do |text|
40
+ result = extract_entities(
41
+ text: text,
42
+ entity_types: entity_types,
43
+ min_confidence: min_entity_confidence
44
+ )
45
+ next unless result[:success]
46
+
47
+ result[:entities].each do |entity|
48
+ next if entity_exists_in_apollo?(entity)
49
+
50
+ publish_entity_ingest(entity)
51
+ ingested += 1
52
+ end
53
+ end
54
+
55
+ log_debug("EntityWatchdog: ingested #{ingested} new entities from #{texts.size} log entries")
56
+ { success: true, ingested: ingested, logs_scanned: texts.size }
57
+ rescue StandardError => e
58
+ log_error("EntityWatchdog scan_and_ingest failed: #{e.message}")
59
+ { success: false, error: e.message }
60
+ end
61
+
62
+ def recent_task_log_texts
63
+ return [] unless defined?(Legion::Data) && defined?(Legion::Data::Model::TaskLog)
64
+
65
+ cutoff = Time.now - TASK_LOG_LOOKBACK_SECONDS
66
+ logs = Legion::Data::Model::TaskLog
67
+ .where { created_at >= cutoff }
68
+ .order(Sequel.desc(:created_at))
69
+ .limit(TASK_LOG_LIMIT)
70
+ .select_map(:message)
71
+ logs.map(&:to_s).reject(&:empty?).uniq
72
+ rescue StandardError
73
+ []
74
+ end
75
+
76
+ def entity_exists_in_apollo?(entity)
77
+ result = retrieve_relevant(
78
+ query: entity[:name].to_s,
79
+ limit: 1,
80
+ min_confidence: 0.1,
81
+ tags: [entity[:type].to_s]
82
+ )
83
+ return false unless result[:success] && result[:count].positive?
84
+
85
+ closest = result[:entries].first
86
+ distance = closest[:distance].to_f
87
+ distance <= (1.0 - dedup_similarity_threshold)
88
+ rescue StandardError
89
+ false
90
+ end
91
+
92
+ def publish_entity_ingest(entity)
93
+ return unless defined?(Legion::Extensions::Apollo::Transport::Messages::Ingest)
94
+
95
+ Legion::Extensions::Apollo::Transport::Messages::Ingest.new(
96
+ content: "#{entity[:type].to_s.capitalize}: #{entity[:name]}",
97
+ content_type: 'concept',
98
+ tags: [entity[:type].to_s, 'entity_watchdog'],
99
+ source_agent: 'lex-apollo:entity_watchdog',
100
+ context: { entity_type: entity[:type], original_name: entity[:name] }
101
+ ).publish
102
+ rescue StandardError => e
103
+ log_error("EntityWatchdog publish failed: #{e.message}")
104
+ end
105
+
106
+ def entity_types
107
+ if defined?(Legion::Settings)
108
+ types = Legion::Settings.dig(:apollo, :entity_watchdog, :types)
109
+ return Array(types).map(&:to_s) if types
110
+ end
111
+ %w[person service repository concept]
112
+ end
113
+
114
+ def min_entity_confidence
115
+ if defined?(Legion::Settings)
116
+ val = Legion::Settings.dig(:apollo, :entity_watchdog, :min_confidence)
117
+ return val.to_f if val
118
+ end
119
+ 0.7
120
+ end
121
+
122
+ def dedup_similarity_threshold
123
+ if defined?(Legion::Settings)
124
+ val = Legion::Settings.dig(:apollo, :entity_watchdog, :dedup_threshold)
125
+ return val.to_f if val
126
+ end
127
+ DEDUP_THRESHOLD_DEFAULT
128
+ end
129
+
130
+ private
131
+
132
+ def log_debug(message)
133
+ Legion::Logging.debug(message) if defined?(Legion::Logging)
134
+ end
135
+
136
+ def log_error(message)
137
+ Legion::Logging.error(message) if defined?(Legion::Logging)
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -8,7 +8,7 @@ module Legion
8
8
  INITIAL_CONFIDENCE = 0.5
9
9
  CORROBORATION_BOOST = 0.3
10
10
  RETRIEVAL_BOOST = 0.02
11
- HOURLY_DECAY_FACTOR = 0.998
11
+ POWER_LAW_ALPHA = 0.1
12
12
  DECAY_THRESHOLD = 0.1
13
13
  CORROBORATION_SIMILARITY_THRESHOLD = 0.9
14
14
  WRITE_CONFIDENCE_GATE = 0.6
@@ -20,16 +20,21 @@ module Legion
20
20
 
21
21
  module_function
22
22
 
23
- def apply_decay(confidence:, factor: HOURLY_DECAY_FACTOR, **)
24
- [confidence * factor, 0.0].max
23
+ def apply_decay(confidence:, age_hours: nil, alpha: POWER_LAW_ALPHA, **)
24
+ if age_hours
25
+ [confidence * ((age_hours.clamp(0, Float::INFINITY) + 2.0)**(-alpha)) / ((age_hours.clamp(0, Float::INFINITY) + 1.0)**(-alpha)), 0.0].max
26
+ else
27
+ factor = 1.0 / (1.0 + alpha)
28
+ [confidence * factor, 0.0].max
29
+ end
25
30
  end
26
31
 
27
32
  def apply_retrieval_boost(confidence:, **)
28
33
  [confidence + RETRIEVAL_BOOST, 1.0].min
29
34
  end
30
35
 
31
- def apply_corroboration_boost(confidence:, **)
32
- [confidence + CORROBORATION_BOOST, 1.0].min
36
+ def apply_corroboration_boost(confidence:, weight: 1.0, **)
37
+ [confidence + (CORROBORATION_BOOST * weight), 1.0].min
33
38
  end
34
39
 
35
40
  def decayed?(confidence:, **)
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Apollo
6
+ module Helpers
7
+ module EntityWatchdog
8
+ ENTITY_PATTERNS = {
9
+ person: /\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)+\b/,
10
+ service: %r{\bhttps?://[^\s]+\b},
11
+ repo: %r{\b[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+\b}
12
+ }.freeze
13
+
14
+ class << self
15
+ def detect_entities(text:, types: nil)
16
+ return [] if text.nil? || text.empty?
17
+
18
+ types = (types || default_types).map(&:to_sym)
19
+ entities = []
20
+
21
+ types.each do |type_sym|
22
+ pattern = type_sym == :concept ? concept_pattern : ENTITY_PATTERNS[type_sym]
23
+ next unless pattern
24
+
25
+ text.scan(pattern).each do |match|
26
+ entities << { type: type_sym, value: match.strip, confidence: 0.5 }
27
+ end
28
+ end
29
+
30
+ entities.uniq { |e| [e[:type], e[:value].downcase] }
31
+ end
32
+
33
+ def link_or_create(entities:, source_context: nil)
34
+ return { success: true, linked: 0, created: 0 } if entities.nil? || entities.empty?
35
+
36
+ linked = 0
37
+ created = 0
38
+
39
+ entities.each do |entity|
40
+ existing = find_existing(entity)
41
+ if existing
42
+ bump_confidence(existing, source_context)
43
+ linked += 1
44
+ else
45
+ create_candidate(entity, source_context)
46
+ created += 1
47
+ end
48
+ end
49
+
50
+ { success: true, linked: linked, created: created }
51
+ end
52
+
53
+ def concept_pattern
54
+ keywords = if defined?(Legion::Settings)
55
+ Legion::Settings.dig(:apollo, :entity_watchdog, :concept_keywords) || []
56
+ else
57
+ []
58
+ end
59
+ return nil if keywords.empty?
60
+
61
+ Regexp.new("\\b(?:#{keywords.map { |k| Regexp.escape(k) }.join('|')})\\b", Regexp::IGNORECASE)
62
+ end
63
+
64
+ private
65
+
66
+ def default_types
67
+ if defined?(Legion::Settings)
68
+ Legion::Settings.dig(:apollo, :entity_watchdog, :types) || %w[person service repo concept]
69
+ else
70
+ %w[person service repo concept]
71
+ end
72
+ end
73
+
74
+ def find_existing(_entity)
75
+ return nil unless defined?(Runners::Knowledge) && respond_to?(:retrieve_relevant, true)
76
+
77
+ nil
78
+ end
79
+
80
+ def bump_confidence(_entry, _source_context)
81
+ # Increment retrieval confidence on existing Apollo entry
82
+ end
83
+
84
+ def create_candidate(entity, _source_context)
85
+ return unless defined?(Runners::Knowledge)
86
+
87
+ Legion::Logging.debug "[entity_watchdog] candidate: #{entity[:type]}=#{entity[:value]}" if defined?(Legion::Logging)
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Apollo
6
+ module Runners
7
+ module EntityExtractor
8
+ DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
9
+ DEFAULT_MIN_CONFIDENCE = 0.7
10
+
11
+ def extract_entities(text:, entity_types: nil, min_confidence: DEFAULT_MIN_CONFIDENCE, **)
12
+ return { success: true, entities: [], source: :empty } if text.to_s.strip.empty?
13
+
14
+ return { success: true, entities: [], source: :unavailable } unless defined?(Legion::LLM) && Legion::LLM.started?
15
+
16
+ types = Array(entity_types).map(&:to_s)
17
+ types = DEFAULT_ENTITY_TYPES if types.empty?
18
+
19
+ result = Legion::LLM.structured(
20
+ messages: [
21
+ { role: 'user', content: entity_extraction_prompt(text: text, entity_types: types) }
22
+ ],
23
+ schema: entity_schema
24
+ )
25
+
26
+ raw_entities = result.dig(:data, :entities) || []
27
+ filtered = raw_entities.select do |entity|
28
+ (entity[:confidence] || 0.0) >= min_confidence &&
29
+ (types.empty? || types.include?(entity[:type].to_s))
30
+ end
31
+
32
+ { success: true, entities: filtered, source: :llm }
33
+ rescue StandardError => e
34
+ { success: false, entities: [], error: e.message, source: :error }
35
+ end
36
+
37
+ def entity_extraction_prompt(text:, entity_types:, **)
38
+ type_list = Array(entity_types).join(', ')
39
+ <<~PROMPT.strip
40
+ Extract named entities from the following text. Return only entities of these types: #{type_list}.
41
+
42
+ For each entity provide:
43
+ - name: the canonical name as it appears (string)
44
+ - type: one of #{type_list} (string)
45
+ - confidence: your confidence this is a real entity of that type (float 0.0-1.0)
46
+
47
+ Text:
48
+ #{text}
49
+ PROMPT
50
+ end
51
+
52
+ def entity_schema
53
+ {
54
+ type: 'object',
55
+ properties: {
56
+ entities: {
57
+ type: 'array',
58
+ items: {
59
+ type: 'object',
60
+ properties: {
61
+ name: { type: 'string' },
62
+ type: { type: 'string' },
63
+ confidence: { type: 'number' }
64
+ },
65
+ required: %w[name type confidence]
66
+ }
67
+ }
68
+ },
69
+ required: ['entities']
70
+ }
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -53,7 +53,7 @@ module Legion
53
53
  }
54
54
  end
55
55
 
56
- def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', context: {}, **)
56
+ def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists
57
57
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
58
58
 
59
59
  embedding = Helpers::Embedding.generate(text: content)
@@ -64,14 +64,15 @@ module Legion
64
64
 
65
65
  unless corroborated
66
66
  new_entry = Legion::Data::Model::ApolloEntry.create(
67
- content: content,
68
- content_type: content_type_sym,
69
- confidence: Helpers::Confidence::INITIAL_CONFIDENCE,
70
- source_agent: source_agent,
71
- source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
72
- tags: Sequel.pg_array(tag_array),
73
- status: 'candidate',
74
- embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
67
+ content: content,
68
+ content_type: content_type_sym,
69
+ confidence: Helpers::Confidence::INITIAL_CONFIDENCE,
70
+ source_agent: source_agent,
71
+ source_provider: source_provider || derive_provider_from_agent(source_agent),
72
+ source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
73
+ tags: Sequel.pg_array(tag_array),
74
+ status: 'candidate',
75
+ embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
75
76
  )
76
77
  existing_id = new_entry.id
77
78
  end
@@ -220,8 +221,9 @@ module Legion
220
221
  sim = Helpers::Similarity.cosine_similarity(vec_a: embedding, vec_b: entry.embedding)
221
222
  next unless Helpers::Similarity.above_corroboration_threshold?(similarity: sim)
222
223
 
224
+ weight = same_source_provider?(source_agent, entry) ? 0.5 : 1.0
223
225
  entry.update(
224
- confidence: Helpers::Confidence.apply_corroboration_boost(confidence: entry.confidence),
226
+ confidence: Helpers::Confidence.apply_corroboration_boost(confidence: entry.confidence, weight: weight),
225
227
  updated_at: Time.now
226
228
  )
227
229
  Legion::Data::Model::ApolloRelation.create(
@@ -237,6 +239,20 @@ module Legion
237
239
  [false, nil]
238
240
  end
239
241
 
242
+ def same_source_provider?(submitting_agent, entry)
243
+ stored = entry.respond_to?(:source_provider) ? entry.source_provider : nil
244
+ return false if stored.nil? || stored.to_s.empty? || stored.to_s == 'unknown'
245
+
246
+ derive_provider_from_agent(submitting_agent) == stored.to_s
247
+ end
248
+
249
+ def derive_provider_from_agent(source_agent)
250
+ return 'unknown' if source_agent.nil? || source_agent == 'unknown'
251
+
252
+ provider = source_agent.to_s.split(/[-_]/).first.downcase
253
+ %w[claude openai gemini human system].include?(provider) ? provider : 'unknown'
254
+ end
255
+
240
256
  def upsert_expertise(source_agent:, domain:)
241
257
  expertise = Legion::Data::Model::ApolloExpertise
242
258
  .where(agent_id: source_agent, domain: domain).first
@@ -62,6 +62,11 @@ module Legion
62
62
 
63
63
  next unless match
64
64
 
65
+ candidate_provider = candidate.respond_to?(:source_provider) ? candidate.source_provider : nil
66
+ match_provider = match.respond_to?(:source_provider) ? match.source_provider : nil
67
+ both_known = known_provider?(candidate_provider) && known_provider?(match_provider)
68
+ next if both_known && candidate_provider == match_provider
69
+
65
70
  candidate.update(
66
71
  status: 'confirmed',
67
72
  confirmed_at: Time.now,
@@ -88,7 +93,13 @@ module Legion
88
93
  private
89
94
 
90
95
  def decay_rate
91
- (defined?(Legion::Settings) && Legion::Settings.dig(:apollo, :decay_rate)) || 0.998
96
+ alpha = (defined?(Legion::Settings) && Legion::Settings.dig(:apollo, :power_law_alpha)) ||
97
+ Helpers::Confidence::POWER_LAW_ALPHA
98
+ 1.0 / (1.0 + alpha)
99
+ end
100
+
101
+ def known_provider?(provider)
102
+ !provider.nil? && !provider.to_s.empty? && provider.to_s != 'unknown'
92
103
  end
93
104
 
94
105
  def decay_threshold
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.3.1'
6
+ VERSION = '0.3.4'
7
7
  end
8
8
  end
9
9
  end
@@ -7,6 +7,7 @@ require 'legion/extensions/apollo/helpers/graph_query'
7
7
  require 'legion/extensions/apollo/runners/knowledge'
8
8
  require 'legion/extensions/apollo/runners/expertise'
9
9
  require 'legion/extensions/apollo/runners/maintenance'
10
+ require 'legion/extensions/apollo/runners/entity_extractor'
10
11
 
11
12
  if defined?(Legion::Transport)
12
13
  require 'legion/extensions/apollo/transport/exchanges/apollo'
@@ -23,3 +24,20 @@ module Legion
23
24
  end
24
25
  end
25
26
  end
27
+
28
+ # Entity watchdog on post_tick_reflection
29
+ if defined?(Legion::Gaia::PhaseWiring) && begin
30
+ Legion::Settings.dig(:apollo, :entity_watchdog, :enabled)
31
+ rescue StandardError
32
+ false
33
+ end
34
+ require 'legion/extensions/apollo/helpers/entity_watchdog'
35
+ Legion::Gaia::PhaseWiring.register_handler(:post_tick_reflection) do |tick_results|
36
+ text = tick_results.is_a?(Hash) ? (tick_results[:content] || tick_results[:output] || '').to_s : tick_results.to_s
37
+ entities = Legion::Extensions::Apollo::Helpers::EntityWatchdog.detect_entities(text: text)
38
+ if entities.any?
39
+ Legion::Extensions::Apollo::Helpers::EntityWatchdog.link_or_create(entities: entities,
40
+ source_context: tick_results[:tick_id])
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ unless defined?(Legion::Extensions::Actors::Every)
6
+ module Legion
7
+ module Extensions
8
+ module Actors
9
+ class Every
10
+ def initialize(**_opts); end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
16
+ $LOADED_FEATURES << 'legion/extensions/actors/every' unless $LOADED_FEATURES.include?('legion/extensions/actors/every')
17
+
18
+ require 'legion/extensions/apollo/runners/knowledge'
19
+ require 'legion/extensions/apollo/runners/entity_extractor'
20
+ require 'legion/extensions/apollo/actors/entity_watchdog'
21
+
22
+ RSpec.describe Legion::Extensions::Apollo::Actor::EntityWatchdog do
23
+ subject(:actor) { described_class.allocate }
24
+
25
+ describe 'actor configuration' do
26
+ it 'uses EntityExtractor as runner_class' do
27
+ expect(actor.runner_class).to eq(Legion::Extensions::Apollo::Runners::EntityExtractor)
28
+ end
29
+
30
+ it 'runs scan_and_ingest function' do
31
+ expect(actor.runner_function).to eq('scan_and_ingest')
32
+ end
33
+
34
+ it 'runs every 120 seconds' do
35
+ expect(actor.time).to eq(120)
36
+ end
37
+
38
+ it 'does not run immediately' do
39
+ expect(actor.run_now?).to be false
40
+ end
41
+
42
+ it 'does not use the runner framework (calls manual directly)' do
43
+ expect(actor.use_runner?).to be false
44
+ end
45
+
46
+ it 'does not generate tasks' do
47
+ expect(actor.generate_task?).to be false
48
+ end
49
+ end
50
+
51
+ describe '#scan_and_ingest' do
52
+ let(:entities) { [{ name: 'lex-synapse', type: 'repository', confidence: 0.95 }] }
53
+ let(:no_match) { { success: true, entries: [], count: 0 } }
54
+
55
+ before do
56
+ allow(actor).to receive(:recent_task_log_texts).and_return(['deploying lex-synapse to nomad'])
57
+ allow(actor).to receive(:extract_entities).with(text: 'deploying lex-synapse to nomad',
58
+ entity_types: anything,
59
+ min_confidence: anything)
60
+ .and_return({ success: true, entities: entities, source: :llm })
61
+ allow(actor).to receive(:retrieve_relevant).and_return(no_match)
62
+ allow(actor).to receive(:publish_entity_ingest)
63
+ end
64
+
65
+ it 'calls publish_entity_ingest for new entities' do
66
+ actor.scan_and_ingest
67
+ expect(actor).to have_received(:publish_entity_ingest).once
68
+ end
69
+
70
+ context 'when entity already exists in Apollo (high similarity)' do
71
+ let(:existing_match) do
72
+ { success: true, entries: [{ id: 42, content: 'lex-synapse', distance: 0.02 }], count: 1 }
73
+ end
74
+
75
+ before { allow(actor).to receive(:retrieve_relevant).and_return(existing_match) }
76
+
77
+ it 'does not publish for duplicate entities' do
78
+ actor.scan_and_ingest
79
+ expect(actor).not_to have_received(:publish_entity_ingest)
80
+ end
81
+ end
82
+
83
+ context 'when LLM extraction returns nothing' do
84
+ before do
85
+ allow(actor).to receive(:extract_entities).and_return({ success: true, entities: [], source: :unavailable })
86
+ end
87
+
88
+ it 'does not publish anything' do
89
+ actor.scan_and_ingest
90
+ expect(actor).not_to have_received(:publish_entity_ingest)
91
+ end
92
+ end
93
+
94
+ context 'when data layer is unavailable' do
95
+ before { allow(actor).to receive(:recent_task_log_texts).and_return([]) }
96
+
97
+ it 'returns early without calling extract_entities' do
98
+ expect(actor).not_to receive(:extract_entities)
99
+ actor.scan_and_ingest
100
+ end
101
+ end
102
+ end
103
+
104
+ describe '#entity_types' do
105
+ it 'returns the default list when settings are absent' do
106
+ expect(actor.entity_types).to eq(%w[person service repository concept])
107
+ end
108
+ end
109
+
110
+ describe '#dedup_similarity_threshold' do
111
+ it 'returns a float between 0 and 1' do
112
+ threshold = actor.dedup_similarity_threshold
113
+ expect(threshold).to be_a(Float)
114
+ expect(threshold).to be_between(0.0, 1.0)
115
+ end
116
+ end
117
+
118
+ describe '#recent_task_log_texts' do
119
+ context 'when legion-data is not available' do
120
+ before { hide_const('Legion::Data') if defined?(Legion::Data) }
121
+
122
+ it 'returns an empty array' do
123
+ expect(actor.recent_task_log_texts).to eq([])
124
+ end
125
+ end
126
+ end
127
+ end
@@ -45,4 +45,29 @@ RSpec.describe Legion::Extensions::Apollo::GaiaIntegration do
45
45
  expect(result).to eq({ success: true })
46
46
  end
47
47
  end
48
+
49
+ describe 'entity watchdog phase handler' do
50
+ it 'detects entities from tick results' do
51
+ require 'legion/extensions/apollo/helpers/entity_watchdog'
52
+ tick_results = { content: 'Jane Doe deployed to https://api.example.com', tick_id: 'tick-1' }
53
+ entities = Legion::Extensions::Apollo::Helpers::EntityWatchdog.detect_entities(text: tick_results[:content])
54
+ expect(entities.size).to be >= 2
55
+ end
56
+
57
+ it 'links or creates entities from tick results' do
58
+ require 'legion/extensions/apollo/helpers/entity_watchdog'
59
+ tick_results = { content: 'Jane Doe at LegionIO/lex-mesh', tick_id: 'tick-2' }
60
+ entities = Legion::Extensions::Apollo::Helpers::EntityWatchdog.detect_entities(text: tick_results[:content])
61
+ result = Legion::Extensions::Apollo::Helpers::EntityWatchdog.link_or_create(
62
+ entities: entities, source_context: tick_results[:tick_id]
63
+ )
64
+ expect(result[:success]).to be true
65
+ end
66
+
67
+ it 'entry point has entity watchdog registration block' do
68
+ entry_point = File.read(File.expand_path('../../../../lib/legion/extensions/apollo.rb', __dir__))
69
+ expect(entry_point).to include('entity_watchdog')
70
+ expect(entry_point).to include('PhaseWiring.register_handler(:post_tick_reflection)')
71
+ end
72
+ end
48
73
  end
@@ -17,8 +17,8 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Confidence do
17
17
  expect(described_class::RETRIEVAL_BOOST).to eq(0.02)
18
18
  end
19
19
 
20
- it 'defines HOURLY_DECAY_FACTOR' do
21
- expect(described_class::HOURLY_DECAY_FACTOR).to eq(0.998)
20
+ it 'defines POWER_LAW_ALPHA' do
21
+ expect(described_class::POWER_LAW_ALPHA).to eq(0.1)
22
22
  end
23
23
 
24
24
  it 'defines DECAY_THRESHOLD' do
@@ -43,20 +43,28 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Confidence do
43
43
  end
44
44
 
45
45
  describe '.apply_decay' do
46
- it 'multiplies confidence by HOURLY_DECAY_FACTOR' do
46
+ it 'applies power-law decay with default alpha when no age given' do
47
47
  result = described_class.apply_decay(confidence: 1.0)
48
- expect(result).to eq(0.998)
48
+ expected = 1.0 / (1.0 + 0.1) # ~0.909091
49
+ expect(result).to be_within(0.0001).of(expected)
49
50
  end
50
51
 
51
- it 'accepts a custom factor' do
52
- result = described_class.apply_decay(confidence: 1.0, factor: 0.5)
53
- expect(result).to eq(0.5)
52
+ it 'applies age-based power-law decay when age_hours is provided' do
53
+ result = described_class.apply_decay(confidence: 1.0, age_hours: 10)
54
+ expect(result).to be > 0.0
55
+ expect(result).to be < 1.0
54
56
  end
55
57
 
56
58
  it 'clamps to 0.0 minimum' do
57
- result = described_class.apply_decay(confidence: 0.001, factor: 0.001)
59
+ result = described_class.apply_decay(confidence: 0.001)
58
60
  expect(result).to be >= 0.0
59
61
  end
62
+
63
+ it 'accepts a custom alpha' do
64
+ result = described_class.apply_decay(confidence: 1.0, alpha: 0.5)
65
+ expected = 1.0 / (1.0 + 0.5) # ~0.6667
66
+ expect(result).to be_within(0.0001).of(expected)
67
+ end
60
68
  end
61
69
 
62
70
  describe '.apply_retrieval_boost' do
@@ -81,6 +89,11 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Confidence do
81
89
  result = described_class.apply_corroboration_boost(confidence: 0.9)
82
90
  expect(result).to eq(1.0)
83
91
  end
92
+
93
+ it 'applies half weight for same-source corroboration' do
94
+ result = described_class.apply_corroboration_boost(confidence: 0.5, weight: 0.5)
95
+ expect(result).to eq(0.65)
96
+ end
84
97
  end
85
98
 
86
99
  describe '.decayed?' do
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'legion/extensions/apollo/helpers/entity_watchdog'
5
+
6
+ RSpec.describe Legion::Extensions::Apollo::Helpers::EntityWatchdog do
7
+ describe '.detect_entities' do
8
+ it 'detects person names (capitalized multi-word)' do
9
+ entities = described_class.detect_entities(text: 'Talked to Jane Doe about the project')
10
+ person = entities.find { |e| e[:type] == :person }
11
+ expect(person).not_to be_nil
12
+ expect(person[:value]).to eq('Jane Doe')
13
+ end
14
+
15
+ it 'detects service URLs' do
16
+ entities = described_class.detect_entities(text: 'Deployed to https://api.example.com/v1')
17
+ service = entities.find { |e| e[:type] == :service }
18
+ expect(service).not_to be_nil
19
+ expect(service[:value]).to include('example.com')
20
+ end
21
+
22
+ it 'detects repo references' do
23
+ entities = described_class.detect_entities(text: 'Check LegionIO/lex-mesh for the code')
24
+ repo = entities.find { |e| e[:type] == :repo }
25
+ expect(repo).not_to be_nil
26
+ expect(repo[:value]).to eq('LegionIO/lex-mesh')
27
+ end
28
+
29
+ it 'detects concept keywords from settings' do
30
+ allow(described_class).to receive(:concept_pattern).and_return(/\b(?:kubernetes|terraform)\b/i)
31
+ entities = described_class.detect_entities(text: 'Using Terraform to deploy Kubernetes')
32
+ concepts = entities.select { |e| e[:type] == :concept }
33
+ expect(concepts.size).to eq(2)
34
+ end
35
+
36
+ it 'deduplicates entities by type and lowercase value' do
37
+ entities = described_class.detect_entities(text: 'Jane Doe met Jane Doe again')
38
+ persons = entities.select { |e| e[:type] == :person }
39
+ expect(persons.size).to eq(1)
40
+ end
41
+
42
+ it 'returns empty array for text with no entities' do
43
+ entities = described_class.detect_entities(text: 'nothing special here')
44
+ expect(entities).to be_empty
45
+ end
46
+
47
+ it 'filters by specified types' do
48
+ entities = described_class.detect_entities(
49
+ text: 'Jane Doe at https://example.com with LegionIO/lex-mesh',
50
+ types: [:person]
51
+ )
52
+ expect(entities.all? { |e| e[:type] == :person }).to be true
53
+ end
54
+ end
55
+
56
+ describe '.link_or_create' do
57
+ it 'returns counts for empty entities' do
58
+ result = described_class.link_or_create(entities: [])
59
+ expect(result[:success]).to be true
60
+ expect(result[:linked]).to eq(0)
61
+ expect(result[:created]).to eq(0)
62
+ end
63
+ end
64
+ end
@@ -13,8 +13,9 @@ RSpec.describe 'Apollo Decay Cycle' do
13
13
  end
14
14
 
15
15
  describe '#decay_rate' do
16
- it 'returns default rate when settings unavailable' do
17
- expect(maintenance.send(:decay_rate)).to eq(0.998)
16
+ it 'returns power-law derived rate when settings unavailable' do
17
+ expected = 1.0 / (1.0 + 0.1) # ~0.909091
18
+ expect(maintenance.send(:decay_rate)).to be_within(0.0001).of(expected)
18
19
  end
19
20
  end
20
21
 
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ require 'legion/extensions/apollo/helpers/confidence'
6
+ require 'legion/extensions/apollo/helpers/similarity'
7
+ require 'legion/extensions/apollo/helpers/graph_query'
8
+ require 'legion/extensions/apollo/runners/entity_extractor'
9
+
10
+ RSpec.describe Legion::Extensions::Apollo::Runners::EntityExtractor do
11
+ let(:runner) do
12
+ obj = Object.new
13
+ obj.extend(described_class)
14
+ obj
15
+ end
16
+
17
+ describe '#extract_entities' do
18
+ context 'when Legion::LLM is not available' do
19
+ before { hide_const('Legion::LLM') if defined?(Legion::LLM) }
20
+
21
+ it 'returns an empty entity list' do
22
+ result = runner.extract_entities(text: 'Jane works on lex-synapse')
23
+ expect(result[:success]).to be true
24
+ expect(result[:entities]).to eq([])
25
+ expect(result[:source]).to eq(:unavailable)
26
+ end
27
+ end
28
+
29
+ context 'when Legion::LLM is available' do
30
+ let(:llm_result) do
31
+ {
32
+ data: {
33
+ entities: [
34
+ { name: 'lex-synapse', type: 'repository', confidence: 0.9 },
35
+ { name: 'Jane Doe', type: 'person', confidence: 0.8 }
36
+ ]
37
+ }
38
+ }
39
+ end
40
+
41
+ before do
42
+ stub_const('Legion::LLM', Module.new do
43
+ def self.started? = true
44
+
45
+ def self.structured(**_opts) = { data: { entities: [] } }
46
+ end)
47
+ allow(Legion::LLM).to receive(:structured).and_return(llm_result)
48
+ end
49
+
50
+ it 'returns extracted entities' do
51
+ result = runner.extract_entities(text: 'Jane works on lex-synapse')
52
+ expect(result[:success]).to be true
53
+ expect(result[:entities].size).to eq(2)
54
+ expect(result[:source]).to eq(:llm)
55
+ end
56
+
57
+ it 'filters to configured entity types' do
58
+ result = runner.extract_entities(
59
+ text: 'Jane works on lex-synapse',
60
+ entity_types: ['repository']
61
+ )
62
+ expect(result[:entities].all? { |e| e[:type] == 'repository' }).to be true
63
+ end
64
+
65
+ it 'applies minimum confidence filter' do
66
+ result = runner.extract_entities(
67
+ text: 'Jane works on lex-synapse',
68
+ min_confidence: 0.85
69
+ )
70
+ expect(result[:entities].size).to eq(1)
71
+ expect(result[:entities].first[:name]).to eq('lex-synapse')
72
+ end
73
+ end
74
+
75
+ context 'when LLM raises' do
76
+ before do
77
+ stub_const('Legion::LLM', Module.new do
78
+ def self.started? = true
79
+
80
+ def self.structured(**_opts) = raise(StandardError, 'timeout')
81
+ end)
82
+ end
83
+
84
+ it 'returns success false with error message' do
85
+ result = runner.extract_entities(text: 'anything')
86
+ expect(result[:success]).to be false
87
+ expect(result[:error]).to include('timeout')
88
+ end
89
+ end
90
+
91
+ context 'with empty text' do
92
+ it 'returns early with empty list' do
93
+ result = runner.extract_entities(text: '')
94
+ expect(result[:success]).to be true
95
+ expect(result[:entities]).to eq([])
96
+ end
97
+
98
+ it 'handles nil text' do
99
+ result = runner.extract_entities(text: nil)
100
+ expect(result[:success]).to be true
101
+ expect(result[:entities]).to eq([])
102
+ end
103
+ end
104
+ end
105
+
106
+ describe '#entity_extraction_prompt' do
107
+ it 'returns a non-empty string' do
108
+ prompt = runner.entity_extraction_prompt(
109
+ text: 'test text', entity_types: %w[person service]
110
+ )
111
+ expect(prompt).to be_a(String)
112
+ expect(prompt).to include('person')
113
+ expect(prompt).to include('service')
114
+ end
115
+ end
116
+
117
+ describe '#entity_schema' do
118
+ it 'returns a JSON Schema hash' do
119
+ schema = runner.entity_schema
120
+ expect(schema[:type]).to eq('object')
121
+ expect(schema[:properties]).to have_key(:entities)
122
+ end
123
+ end
124
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -50,6 +50,7 @@ files:
50
50
  - lib/legion/extensions/apollo.rb
51
51
  - lib/legion/extensions/apollo/actors/corroboration_checker.rb
52
52
  - lib/legion/extensions/apollo/actors/decay.rb
53
+ - lib/legion/extensions/apollo/actors/entity_watchdog.rb
53
54
  - lib/legion/extensions/apollo/actors/expertise_aggregator.rb
54
55
  - lib/legion/extensions/apollo/actors/ingest.rb
55
56
  - lib/legion/extensions/apollo/actors/query_responder.rb
@@ -57,8 +58,10 @@ files:
57
58
  - lib/legion/extensions/apollo/gaia_integration.rb
58
59
  - lib/legion/extensions/apollo/helpers/confidence.rb
59
60
  - lib/legion/extensions/apollo/helpers/embedding.rb
61
+ - lib/legion/extensions/apollo/helpers/entity_watchdog.rb
60
62
  - lib/legion/extensions/apollo/helpers/graph_query.rb
61
63
  - lib/legion/extensions/apollo/helpers/similarity.rb
64
+ - lib/legion/extensions/apollo/runners/entity_extractor.rb
62
65
  - lib/legion/extensions/apollo/runners/expertise.rb
63
66
  - lib/legion/extensions/apollo/runners/knowledge.rb
64
67
  - lib/legion/extensions/apollo/runners/maintenance.rb
@@ -70,6 +73,7 @@ files:
70
73
  - lib/legion/extensions/apollo/transport/queues/query.rb
71
74
  - lib/legion/extensions/apollo/version.rb
72
75
  - spec/legion/extensions/apollo/actors/decay_spec.rb
76
+ - spec/legion/extensions/apollo/actors/entity_watchdog_spec.rb
73
77
  - spec/legion/extensions/apollo/actors/expertise_aggregator_spec.rb
74
78
  - spec/legion/extensions/apollo/actors/ingest_spec.rb
75
79
  - spec/legion/extensions/apollo/client_spec.rb
@@ -77,9 +81,11 @@ files:
77
81
  - spec/legion/extensions/apollo/gaia_integration_spec.rb
78
82
  - spec/legion/extensions/apollo/helpers/confidence_spec.rb
79
83
  - spec/legion/extensions/apollo/helpers/embedding_spec.rb
84
+ - spec/legion/extensions/apollo/helpers/entity_watchdog_spec.rb
80
85
  - spec/legion/extensions/apollo/helpers/graph_query_spec.rb
81
86
  - spec/legion/extensions/apollo/helpers/similarity_spec.rb
82
87
  - spec/legion/extensions/apollo/runners/decay_cycle_spec.rb
88
+ - spec/legion/extensions/apollo/runners/entity_extractor_spec.rb
83
89
  - spec/legion/extensions/apollo/runners/expertise_spec.rb
84
90
  - spec/legion/extensions/apollo/runners/knowledge_spec.rb
85
91
  - spec/legion/extensions/apollo/runners/maintenance_spec.rb