lex-apollo 0.4.18 → 0.4.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/lib/legion/extensions/apollo/actors/entity_watchdog.rb +8 -4
- data/lib/legion/extensions/apollo/actors/writeback_vectorize.rb +6 -0
- data/lib/legion/extensions/apollo/api.rb +5 -3
- data/lib/legion/extensions/apollo/gaia_integration.rb +1 -1
- data/lib/legion/extensions/apollo/runners/entity_extractor.rb +14 -3
- data/lib/legion/extensions/apollo/runners/expertise.rb +41 -28
- data/lib/legion/extensions/apollo/runners/gas.rb +65 -25
- data/lib/legion/extensions/apollo/runners/knowledge.rb +192 -48
- data/lib/legion/extensions/apollo/runners/maintenance.rb +14 -2
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/spec/legion/extensions/apollo/contradiction_spec.rb +21 -0
- data/spec/legion/extensions/apollo/gaia_integration_spec.rb +14 -0
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +130 -4
- data/spec/spec_helper.rb +1 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f197e46e616eb71f939175480496d17775c67985bf70d631ac8d089724c7ac7d
|
|
4
|
+
data.tar.gz: c8f6ee951339eda218647c3bee95bfda3f32c6ee62a44abcaad8940b40214bd9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5b4822965e47e806bf21b1e07e3a675fff365bdea474514278ab61efc1be2e3fc557756f2ad8ab32e63b2d1eb24286dfe25c44ceacbba723bd877d1402d7a5ab
|
|
7
|
+
data.tar.gz: b67b970d6cf8734abd96c5de5e993e802ba924fd966307e60fd7911f7879559e3835d262ed653a269b740bbffd2574df9e156afe4e4c9dfd6a15b70afce6d658
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.20] - 2026-04-25
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- `Knowledge#handle_ingest` now sanitizes stored content for PostgreSQL by scrubbing invalid UTF-8 and removing null bytes before inserts.
|
|
7
|
+
- `Knowledge#handle_ingest` now truncates short metadata fields to schema-safe lengths before writing entries.
|
|
8
|
+
- `Knowledge#handle_query` now treats very short queries as browse/list requests, returning recent non-archived entries without embedding noise while preserving explicit status filters.
|
|
9
|
+
- `Knowledge` Sequel rescue paths now log exception class, message, and a short backtrace before returning structured errors.
|
|
10
|
+
- Apollo runners and actors now emit debug/info lifecycle logs around ingest, query, browse, maintenance, expertise, entity extraction, writeback vectorization, entity watchdog, and GAS pipeline paths so debug-level logging can trace workflows end to end.
|
|
11
|
+
|
|
12
|
+
## [0.4.19] - 2026-04-24
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- `store_knowledge` no longer rejects LLM-provided content_type values — normalizes free-form strings (`"reasoning"`, `"text"`, `"text/plain"`, `":fact"`, `"inference"`) to valid symbols via alias map with `:observation` fallback
|
|
16
|
+
- `GaiaIntegration.publish_insight` now passes `:observation` instead of the domain string as content_type (was sending `"general"` or domain names which failed validation)
|
|
17
|
+
- `llm_detects_conflict?` truncates content to 4000 chars before sending to LLM to prevent context overflow errors (was passing full entry content, hitting 65536-token limit)
|
|
18
|
+
|
|
3
19
|
## [0.4.18] - 2026-04-24
|
|
4
20
|
|
|
5
21
|
### Fixed
|
|
@@ -34,6 +34,7 @@ module Legion
|
|
|
34
34
|
|
|
35
35
|
def scan_and_ingest
|
|
36
36
|
texts = recent_task_log_texts
|
|
37
|
+
log.debug("EntityWatchdog scan_and_ingest log_texts=#{texts.size}")
|
|
37
38
|
return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
|
|
38
39
|
|
|
39
40
|
ingested = 0
|
|
@@ -53,10 +54,10 @@ module Legion
|
|
|
53
54
|
end
|
|
54
55
|
end
|
|
55
56
|
|
|
56
|
-
log.
|
|
57
|
+
log.info("EntityWatchdog ingested=#{ingested} logs_scanned=#{texts.size}")
|
|
57
58
|
{ success: true, ingested: ingested, logs_scanned: texts.size }
|
|
58
59
|
rescue StandardError => e
|
|
59
|
-
|
|
60
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.scan_and_ingest')
|
|
60
61
|
{ success: false, error: e.message }
|
|
61
62
|
end
|
|
62
63
|
|
|
@@ -71,7 +72,9 @@ module Legion
|
|
|
71
72
|
.order(Sequel.desc(:created_at))
|
|
72
73
|
.limit(log_limit)
|
|
73
74
|
.select_map(:message)
|
|
74
|
-
logs.map(&:to_s).reject(&:empty?).uniq
|
|
75
|
+
texts = logs.map(&:to_s).reject(&:empty?).uniq
|
|
76
|
+
log.debug("EntityWatchdog recent_task_log_texts lookback=#{lookback} limit=#{log_limit} raw=#{logs.size} unique=#{texts.size}")
|
|
77
|
+
texts
|
|
75
78
|
rescue StandardError => e
|
|
76
79
|
log.warn("EntityWatchdog recent_task_log_texts failed: #{e.message}")
|
|
77
80
|
[]
|
|
@@ -104,8 +107,9 @@ module Legion
|
|
|
104
107
|
source_agent: 'lex-apollo:entity_watchdog',
|
|
105
108
|
context: { entity_type: entity[:type], original_name: entity[:name] }
|
|
106
109
|
).publish
|
|
110
|
+
log.info("EntityWatchdog published entity type=#{entity[:type]} name=#{entity[:name]}")
|
|
107
111
|
rescue StandardError => e
|
|
108
|
-
|
|
112
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.publish_entity_ingest')
|
|
109
113
|
end
|
|
110
114
|
|
|
111
115
|
def entity_types
|
|
@@ -14,21 +14,27 @@ module Legion
|
|
|
14
14
|
|
|
15
15
|
def handle_vectorize(payload)
|
|
16
16
|
payload = symbolize(payload)
|
|
17
|
+
log.debug("WritebackVectorize handle_vectorize content_length=#{payload[:content].to_s.length} content_type=#{payload[:content_type] || 'nil'}")
|
|
17
18
|
result = Legion::LLM::Embeddings.generate(text: payload[:content])
|
|
18
19
|
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
19
20
|
embedding = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
|
|
21
|
+
log.debug("WritebackVectorize embedding_dimensions=#{embedding.length} vector_generated=#{vector.is_a?(Array) && vector.any?}")
|
|
20
22
|
enriched = payload.merge(embedding: embedding)
|
|
21
23
|
|
|
22
24
|
if Helpers::Capability.can_write?
|
|
25
|
+
log.debug('WritebackVectorize route=direct_ingest')
|
|
23
26
|
Runners::Knowledge.handle_ingest(**enriched)
|
|
24
27
|
else
|
|
28
|
+
log.debug('WritebackVectorize route=transport_writeback')
|
|
25
29
|
Transport::Messages::Writeback.new(
|
|
26
30
|
**enriched, has_embedding: true
|
|
27
31
|
).publish
|
|
28
32
|
end
|
|
29
33
|
|
|
34
|
+
log.info('WritebackVectorize completed action=vectorized')
|
|
30
35
|
{ success: true, action: :vectorized }
|
|
31
36
|
rescue StandardError => e
|
|
37
|
+
handle_exception(e, level: :error, operation: 'apollo.writeback_vectorize.handle_vectorize')
|
|
32
38
|
{ success: false, error: e.message }
|
|
33
39
|
end
|
|
34
40
|
|
|
@@ -51,15 +51,17 @@ module Legion
|
|
|
51
51
|
req = json_body
|
|
52
52
|
halt 400, { error: 'query is required' }.to_json unless req[:query]
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
query_options = {
|
|
55
55
|
query: req[:query],
|
|
56
56
|
limit: req[:limit] || 10,
|
|
57
57
|
min_confidence: req[:min_confidence] || 0.3,
|
|
58
|
-
status: req[:status] || [:confirmed],
|
|
59
58
|
tags: req[:tags],
|
|
60
59
|
domain: req[:domain],
|
|
61
60
|
agent_id: req[:agent_id] || 'api'
|
|
62
|
-
|
|
61
|
+
}
|
|
62
|
+
query_options[:status] = req[:status] if req.key?(:status)
|
|
63
|
+
|
|
64
|
+
result = runner.handle_query(**query_options)
|
|
63
65
|
status result[:success] ? 200 : 500
|
|
64
66
|
result.to_json
|
|
65
67
|
end
|
|
@@ -15,7 +15,7 @@ module Legion
|
|
|
15
15
|
client = Legion::Extensions::Apollo::Client.new(agent_id: agent_id)
|
|
16
16
|
client.store_knowledge(
|
|
17
17
|
content: insight[:content],
|
|
18
|
-
content_type:
|
|
18
|
+
content_type: :observation,
|
|
19
19
|
source_agent: agent_id,
|
|
20
20
|
tags: Array(insight[:tags])
|
|
21
21
|
)
|
|
@@ -4,17 +4,24 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Apollo
|
|
6
6
|
module Runners
|
|
7
|
-
module EntityExtractor
|
|
7
|
+
module EntityExtractor
|
|
8
8
|
DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
|
|
9
9
|
DEFAULT_MIN_CONFIDENCE = 0.7
|
|
10
10
|
|
|
11
11
|
def extract_entities(text:, entity_types: nil, min_confidence: Helpers::Confidence.apollo_setting(:entity_extractor, :min_confidence, default: DEFAULT_MIN_CONFIDENCE), **) # rubocop:disable Layout/LineLength
|
|
12
|
-
|
|
12
|
+
if text.to_s.strip.empty?
|
|
13
|
+
log.debug('Apollo EntityExtractor.extract_entities skipped reason=empty_text')
|
|
14
|
+
return { success: true, entities: [], source: :empty }
|
|
15
|
+
end
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
unless defined?(Legion::LLM) && Legion::LLM.started?
|
|
18
|
+
log.debug('Apollo EntityExtractor.extract_entities skipped reason=llm_unavailable')
|
|
19
|
+
return { success: true, entities: [], source: :unavailable }
|
|
20
|
+
end
|
|
15
21
|
|
|
16
22
|
types = Array(entity_types).map(&:to_s)
|
|
17
23
|
types = DEFAULT_ENTITY_TYPES if types.empty?
|
|
24
|
+
log.debug("Apollo EntityExtractor.extract_entities text_length=#{text.to_s.length} types=#{types.join(',')} min_confidence=#{min_confidence}")
|
|
18
25
|
|
|
19
26
|
result = Legion::LLM.structured(
|
|
20
27
|
messages: [
|
|
@@ -29,9 +36,11 @@ module Legion
|
|
|
29
36
|
(entity[:confidence] || 0.0) >= min_confidence &&
|
|
30
37
|
(types.empty? || types.include?(entity[:type].to_s))
|
|
31
38
|
end
|
|
39
|
+
log.info("Apollo EntityExtractor.extract_entities raw=#{raw_entities.size} filtered=#{filtered.size}")
|
|
32
40
|
|
|
33
41
|
{ success: true, entities: filtered, source: :llm }
|
|
34
42
|
rescue StandardError => e
|
|
43
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_extractor.extract_entities')
|
|
35
44
|
{ success: false, entities: [], error: e.message, source: :error }
|
|
36
45
|
end
|
|
37
46
|
|
|
@@ -70,6 +79,8 @@ module Legion
|
|
|
70
79
|
required: ['entities']
|
|
71
80
|
}
|
|
72
81
|
end
|
|
82
|
+
|
|
83
|
+
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
73
84
|
end
|
|
74
85
|
end
|
|
75
86
|
end
|
|
@@ -18,52 +18,65 @@ module Legion
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def aggregate(**)
|
|
21
|
-
|
|
21
|
+
unless defined?(Legion::Data::Model::ApolloEntry)
|
|
22
|
+
log.warn('Apollo Expertise.aggregate skipped: apollo_data_not_available')
|
|
23
|
+
return { success: false, error: 'apollo_data_not_available' }
|
|
24
|
+
end
|
|
22
25
|
|
|
23
26
|
entries = Legion::Data::Model::ApolloEntry
|
|
24
27
|
.select(:source_agent, :tags, :confidence)
|
|
25
28
|
.exclude(source_agent: nil)
|
|
26
29
|
.all
|
|
27
|
-
|
|
28
|
-
groups = {}
|
|
29
|
-
entries.each do |entry|
|
|
30
|
-
agent = entry.source_agent
|
|
31
|
-
domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
|
|
32
|
-
key = "#{agent}:#{domain}"
|
|
33
|
-
groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
|
|
34
|
-
groups[key][:confidences] << entry.confidence.to_f
|
|
35
|
-
end
|
|
30
|
+
log.debug("Apollo Expertise.aggregate entries=#{entries.size}")
|
|
36
31
|
|
|
37
32
|
agent_set = Set.new
|
|
38
33
|
domain_set = Set.new
|
|
39
34
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
count = group[:confidences].size
|
|
43
|
-
cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
|
|
44
|
-
proficiency = [avg * Math.log2(count + 1), cap].min
|
|
45
|
-
|
|
46
|
-
existing = Legion::Data::Model::ApolloExpertise
|
|
47
|
-
.where(agent_id: group[:agent_id], domain: group[:domain]).first
|
|
48
|
-
|
|
49
|
-
if existing
|
|
50
|
-
existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
|
|
51
|
-
else
|
|
52
|
-
Legion::Data::Model::ApolloExpertise.create(
|
|
53
|
-
agent_id: group[:agent_id], domain: group[:domain],
|
|
54
|
-
proficiency: proficiency, entry_count: count, last_active_at: Time.now
|
|
55
|
-
)
|
|
56
|
-
end
|
|
57
|
-
|
|
35
|
+
expertise_groups(entries).each_value do |group|
|
|
36
|
+
upsert_expertise_group(group)
|
|
58
37
|
agent_set << group[:agent_id]
|
|
59
38
|
domain_set << group[:domain]
|
|
60
39
|
end
|
|
61
40
|
|
|
62
41
|
{ success: true, agents: agent_set.size, domains: domain_set.size }
|
|
42
|
+
.tap { |result| log.info("Apollo Expertise.aggregate agents=#{result[:agents]} domains=#{result[:domains]}") }
|
|
63
43
|
rescue Sequel::Error => e
|
|
44
|
+
handle_exception(e, level: :error, operation: 'apollo.expertise.aggregate')
|
|
64
45
|
{ success: false, error: e.message }
|
|
65
46
|
end
|
|
66
47
|
|
|
48
|
+
def expertise_groups(entries)
|
|
49
|
+
entries.each_with_object({}) do |entry, groups|
|
|
50
|
+
agent = entry.source_agent
|
|
51
|
+
domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
|
|
52
|
+
key = "#{agent}:#{domain}"
|
|
53
|
+
groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
|
|
54
|
+
groups[key][:confidences] << entry.confidence.to_f
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def upsert_expertise_group(group)
|
|
59
|
+
count = group[:confidences].size
|
|
60
|
+
proficiency = expertise_proficiency(group[:confidences])
|
|
61
|
+
existing = Legion::Data::Model::ApolloExpertise
|
|
62
|
+
.where(agent_id: group[:agent_id], domain: group[:domain]).first
|
|
63
|
+
|
|
64
|
+
if existing
|
|
65
|
+
existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
|
|
66
|
+
else
|
|
67
|
+
Legion::Data::Model::ApolloExpertise.create(
|
|
68
|
+
agent_id: group[:agent_id], domain: group[:domain],
|
|
69
|
+
proficiency: proficiency, entry_count: count, last_active_at: Time.now
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def expertise_proficiency(confidences)
|
|
75
|
+
avg = confidences.sum / confidences.size
|
|
76
|
+
cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
|
|
77
|
+
[avg * Math.log2(confidences.size + 1), cap].min
|
|
78
|
+
end
|
|
79
|
+
|
|
67
80
|
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
68
81
|
end
|
|
69
82
|
end
|
|
@@ -4,7 +4,10 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Apollo
|
|
6
6
|
module Runners
|
|
7
|
-
module Gas
|
|
7
|
+
module Gas
|
|
8
|
+
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
9
|
+
extend Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
10
|
+
|
|
8
11
|
RELATION_TYPES = %w[
|
|
9
12
|
similar_to contradicts depends_on causes
|
|
10
13
|
part_of supersedes supports_by extends
|
|
@@ -16,10 +19,6 @@ module Legion
|
|
|
16
19
|
|
|
17
20
|
module_function
|
|
18
21
|
|
|
19
|
-
def log
|
|
20
|
-
Legion::Logging
|
|
21
|
-
end
|
|
22
|
-
|
|
23
22
|
def json_load(str)
|
|
24
23
|
::JSON.parse(str, symbolize_names: true)
|
|
25
24
|
end
|
|
@@ -31,7 +30,12 @@ module Legion
|
|
|
31
30
|
def fallback_confidence = Helpers::Confidence.apollo_setting(:gas, :fallback_confidence, default: 0.5)
|
|
32
31
|
|
|
33
32
|
def process(audit_event)
|
|
34
|
-
|
|
33
|
+
unless processable?(audit_event)
|
|
34
|
+
log.debug('GAS process skipped reason=no_content')
|
|
35
|
+
return { phases_completed: 0, reason: 'no content' }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
log.debug("GAS process start request_id=#{audit_event[:request_id] || 'nil'} messages=#{Array(audit_event[:messages]).size} response_length=#{audit_event[:response_content].to_s.length}") # rubocop:disable Layout/LineLength
|
|
35
39
|
|
|
36
40
|
facts = phase_comprehend(audit_event)
|
|
37
41
|
entities = phase_extract(audit_event, facts)
|
|
@@ -40,7 +44,7 @@ module Legion
|
|
|
40
44
|
deposit_result = phase_deposit(facts, entities, relations, synthesis, audit_event)
|
|
41
45
|
anticipations = phase_anticipate(facts, synthesis)
|
|
42
46
|
|
|
43
|
-
{
|
|
47
|
+
result = {
|
|
44
48
|
phases_completed: 6,
|
|
45
49
|
facts: facts.length,
|
|
46
50
|
entities: entities.length,
|
|
@@ -49,8 +53,10 @@ module Legion
|
|
|
49
53
|
deposited: deposit_result,
|
|
50
54
|
anticipations: anticipations.length
|
|
51
55
|
}
|
|
56
|
+
log.info("GAS process complete facts=#{result[:facts]} entities=#{result[:entities]} relations=#{result[:relations]} synthesis=#{result[:synthesis]} anticipations=#{result[:anticipations]}") # rubocop:disable Layout/LineLength
|
|
57
|
+
result
|
|
52
58
|
rescue StandardError => e
|
|
53
|
-
log.
|
|
59
|
+
log.error("GAS pipeline error: #{e.message}")
|
|
54
60
|
{ phases_completed: 0, error: e.message }
|
|
55
61
|
end
|
|
56
62
|
|
|
@@ -63,19 +69,24 @@ module Legion
|
|
|
63
69
|
messages = audit_event[:messages]
|
|
64
70
|
response = audit_event[:response_content]
|
|
65
71
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
mode = llm_available? ? :llm : :mechanical
|
|
73
|
+
log.debug("GAS phase_comprehend mode=#{mode} messages=#{Array(messages).size} response_length=#{response.to_s.length}")
|
|
74
|
+
facts = mode == :llm ? llm_comprehend(messages, response) : mechanical_comprehend(messages, response)
|
|
75
|
+
log.debug("GAS phase_comprehend facts=#{facts.size}")
|
|
76
|
+
facts
|
|
71
77
|
end
|
|
72
78
|
|
|
73
79
|
# Phase 2: Extract - entity extraction (delegates to existing EntityExtractor)
|
|
74
80
|
def phase_extract(audit_event, _facts)
|
|
75
|
-
|
|
81
|
+
unless defined?(Runners::EntityExtractor)
|
|
82
|
+
log.debug('GAS phase_extract skipped reason=entity_extractor_unavailable')
|
|
83
|
+
return []
|
|
84
|
+
end
|
|
76
85
|
|
|
77
86
|
result = Runners::EntityExtractor.extract_entities(text: audit_event[:response_content])
|
|
78
|
-
result[:success] ? (result[:entities] || []) : []
|
|
87
|
+
entities = result[:success] ? (result[:entities] || []) : []
|
|
88
|
+
log.debug("GAS phase_extract success=#{result[:success]} entities=#{entities.size}")
|
|
89
|
+
entities
|
|
79
90
|
rescue StandardError => e
|
|
80
91
|
log.warn("GAS phase_extract failed: #{e.message}")
|
|
81
92
|
[]
|
|
@@ -83,10 +94,16 @@ module Legion
|
|
|
83
94
|
|
|
84
95
|
# Phase 3: Relate - classify relationships between new and existing entries
|
|
85
96
|
def phase_relate(facts, _entities)
|
|
86
|
-
|
|
97
|
+
unless defined?(Runners::Knowledge)
|
|
98
|
+
log.debug('GAS phase_relate skipped reason=knowledge_runner_unavailable')
|
|
99
|
+
return []
|
|
100
|
+
end
|
|
87
101
|
|
|
88
102
|
existing = fetch_similar_entries(facts)
|
|
89
|
-
|
|
103
|
+
if existing.empty?
|
|
104
|
+
log.debug("GAS phase_relate skipped reason=no_existing_entries facts=#{facts.size}")
|
|
105
|
+
return []
|
|
106
|
+
end
|
|
90
107
|
|
|
91
108
|
relations = []
|
|
92
109
|
facts.each do |fact|
|
|
@@ -95,15 +112,24 @@ module Legion
|
|
|
95
112
|
relations << relation if relation
|
|
96
113
|
end
|
|
97
114
|
end
|
|
115
|
+
log.debug("GAS phase_relate facts=#{facts.size} existing=#{existing.size} relations=#{relations.size}")
|
|
98
116
|
relations
|
|
99
117
|
end
|
|
100
118
|
|
|
101
119
|
# Phase 4: Synthesize - generate derivative knowledge
|
|
102
120
|
def phase_synthesize(facts, _relations)
|
|
103
|
-
|
|
104
|
-
|
|
121
|
+
if facts.length < 2
|
|
122
|
+
log.debug("GAS phase_synthesize skipped reason=insufficient_facts facts=#{facts.length}")
|
|
123
|
+
return []
|
|
124
|
+
end
|
|
125
|
+
unless llm_available?
|
|
126
|
+
log.debug('GAS phase_synthesize skipped reason=llm_unavailable')
|
|
127
|
+
return []
|
|
128
|
+
end
|
|
105
129
|
|
|
106
|
-
llm_synthesize(facts)
|
|
130
|
+
synthesis = llm_synthesize(facts)
|
|
131
|
+
log.debug("GAS phase_synthesize synthesis=#{synthesis.size}")
|
|
132
|
+
synthesis
|
|
107
133
|
rescue StandardError => e
|
|
108
134
|
log.warn("GAS phase_synthesize failed: #{e.message}")
|
|
109
135
|
[]
|
|
@@ -111,7 +137,10 @@ module Legion
|
|
|
111
137
|
|
|
112
138
|
# Phase 5: Deposit - atomic write to Apollo
|
|
113
139
|
def phase_deposit(facts, _entities, _relations, _synthesis, audit_event)
|
|
114
|
-
|
|
140
|
+
unless defined?(Runners::Knowledge)
|
|
141
|
+
log.debug('GAS phase_deposit skipped reason=knowledge_runner_unavailable')
|
|
142
|
+
return { deposited: 0 }
|
|
143
|
+
end
|
|
115
144
|
|
|
116
145
|
deposited = 0
|
|
117
146
|
facts.each do |fact|
|
|
@@ -128,15 +157,24 @@ module Legion
|
|
|
128
157
|
rescue StandardError => e
|
|
129
158
|
log.warn("GAS deposit error: #{e.message}")
|
|
130
159
|
end
|
|
160
|
+
log.info("GAS phase_deposit deposited=#{deposited} facts=#{facts.size}")
|
|
131
161
|
{ deposited: deposited }
|
|
132
162
|
end
|
|
133
163
|
|
|
134
164
|
# Phase 6: Anticipate - pre-cache likely follow-up questions
|
|
135
165
|
def phase_anticipate(facts, _synthesis)
|
|
136
|
-
|
|
137
|
-
|
|
166
|
+
if facts.empty?
|
|
167
|
+
log.debug('GAS phase_anticipate skipped reason=no_facts')
|
|
168
|
+
return []
|
|
169
|
+
end
|
|
170
|
+
unless llm_available?
|
|
171
|
+
log.debug('GAS phase_anticipate skipped reason=llm_unavailable')
|
|
172
|
+
return []
|
|
173
|
+
end
|
|
138
174
|
|
|
139
|
-
llm_anticipate(facts)
|
|
175
|
+
anticipations = llm_anticipate(facts)
|
|
176
|
+
log.debug("GAS phase_anticipate anticipations=#{anticipations.size}")
|
|
177
|
+
anticipations
|
|
140
178
|
rescue StandardError => e
|
|
141
179
|
log.warn("GAS phase_anticipate failed: #{e.message}")
|
|
142
180
|
[]
|
|
@@ -153,7 +191,9 @@ module Legion
|
|
|
153
191
|
log.warn("GAS fetch_similar_entries failed for fact: #{e.message}")
|
|
154
192
|
next
|
|
155
193
|
end
|
|
156
|
-
entries.uniq { |e| e[:id] }
|
|
194
|
+
unique = entries.uniq { |e| e[:id] }
|
|
195
|
+
log.debug("GAS fetch_similar_entries facts=#{facts.size} entries=#{unique.size}")
|
|
196
|
+
unique
|
|
157
197
|
end
|
|
158
198
|
|
|
159
199
|
def classify_relation(fact, entry)
|
|
@@ -14,11 +14,19 @@ module Legion
|
|
|
14
14
|
'general' => :all
|
|
15
15
|
}.freeze
|
|
16
16
|
|
|
17
|
+
CONTENT_TYPE_ALIASES = {
|
|
18
|
+
reasoning: :concept, analysis: :concept, explanation: :concept,
|
|
19
|
+
text: :observation, general: :observation, note: :observation, summary: :observation,
|
|
20
|
+
rule: :procedure, step: :procedure, instruction: :procedure,
|
|
21
|
+
link: :association, relation: :association, connection: :association,
|
|
22
|
+
inference: :association, implication: :association
|
|
23
|
+
}.freeze
|
|
24
|
+
DEFAULT_QUERY_STATUS = [:confirmed].freeze
|
|
25
|
+
UNSET = Object.new.freeze
|
|
26
|
+
|
|
17
27
|
def store_knowledge(content:, content_type:, tags: [], source_agent: nil, context: {}, **)
|
|
18
|
-
content_type = content_type
|
|
19
|
-
|
|
20
|
-
raise ArgumentError, "invalid content_type: #{content_type}. Must be one of #{Helpers::Confidence::CONTENT_TYPES}"
|
|
21
|
-
end
|
|
28
|
+
content_type = normalize_content_type(content_type)
|
|
29
|
+
log.debug("Apollo Knowledge.store_knowledge content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent || 'nil'} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
22
30
|
|
|
23
31
|
if defined?(Legion::Data::Model::ApolloEntry)
|
|
24
32
|
return handle_ingest(content: content, content_type: content_type,
|
|
@@ -36,6 +44,7 @@ module Legion
|
|
|
36
44
|
end
|
|
37
45
|
|
|
38
46
|
def query_knowledge(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: %i[confirmed candidate], tags: nil, **) # rubocop:disable Layout/LineLength
|
|
47
|
+
log.debug("Apollo Knowledge.query_knowledge query_length=#{query.to_s.length} limit=#{limit} statuses=#{Array(status).join(',')} tags=#{Array(tags).size} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
39
48
|
if defined?(Legion::Data::Model::ApolloEntry)
|
|
40
49
|
return handle_query(query: query, limit: limit, min_confidence: min_confidence,
|
|
41
50
|
status: status, tags: tags, **)
|
|
@@ -52,6 +61,7 @@ module Legion
|
|
|
52
61
|
end
|
|
53
62
|
|
|
54
63
|
def related_entries(entry_id:, relation_types: nil, depth: Helpers::GraphQuery.default_depth, **)
|
|
64
|
+
log.debug("Apollo Knowledge.related_entries entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
55
65
|
return handle_traverse(entry_id: entry_id, depth: depth, relation_types: relation_types, **) if defined?(Legion::Data::Model::ApolloEntry)
|
|
56
66
|
|
|
57
67
|
{
|
|
@@ -70,74 +80,73 @@ module Legion
|
|
|
70
80
|
}
|
|
71
81
|
end
|
|
72
82
|
|
|
73
|
-
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
83
|
+
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
74
84
|
return { status: :skipped } if skip
|
|
75
|
-
|
|
85
|
+
|
|
86
|
+
content = normalize_text_input(content)
|
|
87
|
+
log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
|
|
88
|
+
return { success: false, error: 'content is required' } if content.strip.empty?
|
|
76
89
|
return { success: false, error: 'content_type is required' } if content_type.nil?
|
|
77
90
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
78
91
|
|
|
79
|
-
# Content hash dedup
|
|
80
92
|
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
.first
|
|
86
|
-
if existing
|
|
87
|
-
existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
88
|
-
return { success: true, entry_id: existing.id, deduped: true }
|
|
89
|
-
end
|
|
93
|
+
existing = active_duplicate_for_hash(hash)
|
|
94
|
+
if existing
|
|
95
|
+
log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
|
|
96
|
+
return { success: true, entry_id: existing.id, deduped: true }
|
|
90
97
|
end
|
|
91
98
|
|
|
92
99
|
embedding = embed_text(content)
|
|
93
100
|
content_type_sym = content_type.to_s
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
tags: Sequel.pg_array(tag_array),
|
|
109
|
-
status: 'candidate',
|
|
110
|
-
knowledge_domain: domain,
|
|
111
|
-
submitted_by: submitted_by,
|
|
112
|
-
submitted_from: submitted_from,
|
|
113
|
-
content_hash: hash,
|
|
114
|
-
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
101
|
+
metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
|
|
102
|
+
source_provider: source_provider, source_channel: source_channel,
|
|
103
|
+
submitted_by: submitted_by, submitted_from: submitted_from)
|
|
104
|
+
|
|
105
|
+
corroborated, existing_id = find_corroboration(
|
|
106
|
+
embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if corroborated
|
|
110
|
+
log.info("Apollo Knowledge.handle_ingest corroborated entry_id=#{existing_id} source_agent=#{metadata[:source_agent]}")
|
|
111
|
+
else
|
|
112
|
+
existing_id = create_candidate_entry(
|
|
113
|
+
content: content, content_type: content_type_sym, context: context,
|
|
114
|
+
metadata: metadata, content_hash: hash, embedding: embedding
|
|
115
115
|
)
|
|
116
|
-
existing_id = new_entry.id
|
|
117
116
|
end
|
|
118
117
|
|
|
119
|
-
upsert_expertise(source_agent: source_agent, domain: domain)
|
|
118
|
+
upsert_expertise(source_agent: metadata[:source_agent], domain: metadata[:domain])
|
|
120
119
|
|
|
121
120
|
Legion::Data::Model::ApolloAccessLog.create(
|
|
122
|
-
entry_id: existing_id, agent_id: source_agent, action: 'ingest'
|
|
121
|
+
entry_id: existing_id, agent_id: metadata[:source_agent], action: 'ingest'
|
|
123
122
|
)
|
|
124
123
|
|
|
125
124
|
contradictions = detect_contradictions(existing_id, embedding, content)
|
|
125
|
+
log.debug("Apollo Knowledge.handle_ingest complete entry_id=#{existing_id} corroborated=#{corroborated} contradictions=#{contradictions.size}")
|
|
126
126
|
|
|
127
127
|
{ success: true, entry_id: existing_id, status: corroborated ? 'corroborated' : 'candidate',
|
|
128
128
|
corroborated: corroborated, contradictions: contradictions }
|
|
129
129
|
rescue Sequel::Error => e
|
|
130
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_ingest')
|
|
130
131
|
{ success: false, error: e.message }
|
|
131
132
|
end
|
|
132
133
|
|
|
133
|
-
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status:
|
|
134
|
+
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
|
|
134
135
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
135
136
|
|
|
136
137
|
query = normalize_text_input(query)
|
|
138
|
+
status_defaulted = status.equal?(UNSET)
|
|
139
|
+
requested_status = status_defaulted ? DEFAULT_QUERY_STATUS : status
|
|
140
|
+
log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
141
|
+
if browse_query?(query)
|
|
142
|
+
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
143
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain)
|
|
144
|
+
end
|
|
145
|
+
|
|
137
146
|
embedding = embed_text(query)
|
|
138
147
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
139
148
|
limit: limit, min_confidence: min_confidence,
|
|
140
|
-
statuses: Array(
|
|
149
|
+
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
|
|
141
150
|
)
|
|
142
151
|
|
|
143
152
|
db = Legion::Data::Model::ApolloEntry.db
|
|
@@ -168,14 +177,17 @@ module Legion
|
|
|
168
177
|
knowledge_domain: entry[:knowledge_domain] }
|
|
169
178
|
end
|
|
170
179
|
|
|
180
|
+
log.info("Apollo Knowledge.handle_query results=#{formatted.size} mode=semantic agent_id=#{agent_id}")
|
|
171
181
|
{ success: true, entries: formatted, count: formatted.size }
|
|
172
182
|
rescue Sequel::Error => e
|
|
183
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_query')
|
|
173
184
|
{ success: false, error: e.message }
|
|
174
185
|
end
|
|
175
186
|
|
|
176
187
|
def handle_traverse(entry_id:, depth: Helpers::GraphQuery.default_depth, relation_types: nil, agent_id: 'unknown', **)
|
|
177
188
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
178
189
|
|
|
190
|
+
log.debug("Apollo Knowledge.handle_traverse entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
179
191
|
# Whitelist relation_types to prevent SQL injection (they are string-interpolated in build_traversal_sql)
|
|
180
192
|
if relation_types
|
|
181
193
|
allowed = Helpers::Confidence::RELATION_TYPES
|
|
@@ -198,14 +210,17 @@ module Legion
|
|
|
198
210
|
depth: entry[:depth], activation: entry[:activation] }
|
|
199
211
|
end
|
|
200
212
|
|
|
213
|
+
log.info("Apollo Knowledge.handle_traverse results=#{formatted.size} entry_id=#{entry_id}")
|
|
201
214
|
{ success: true, entries: formatted, count: formatted.size }
|
|
202
215
|
rescue Sequel::Error => e
|
|
216
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_traverse')
|
|
203
217
|
{ success: false, error: e.message }
|
|
204
218
|
end
|
|
205
219
|
|
|
206
220
|
def redistribute_knowledge(agent_id:, min_confidence: Helpers::Confidence.apollo_setting(:query, :redistribute_min_confidence, default: 0.5), **)
|
|
207
221
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
208
222
|
|
|
223
|
+
log.debug("Apollo Knowledge.redistribute_knowledge agent_id=#{agent_id} min_confidence=#{min_confidence}")
|
|
209
224
|
entries = Legion::Data::Model::ApolloEntry
|
|
210
225
|
.where(source_agent: agent_id, status: 'confirmed')
|
|
211
226
|
.where { confidence > min_confidence }
|
|
@@ -233,6 +248,7 @@ module Legion
|
|
|
233
248
|
log.info("[apollo] redistributed #{redistributed} entries from departing agent=#{agent_id}")
|
|
234
249
|
{ success: true, redistributed: redistributed, agent_id: agent_id }
|
|
235
250
|
rescue Sequel::Error => e
|
|
251
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.redistribute_knowledge')
|
|
236
252
|
{ success: false, error: e.message }
|
|
237
253
|
end
|
|
238
254
|
|
|
@@ -242,6 +258,7 @@ module Legion
|
|
|
242
258
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
243
259
|
|
|
244
260
|
query = normalize_text_input(query)
|
|
261
|
+
log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
245
262
|
return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
|
|
246
263
|
|
|
247
264
|
embedding = embed_text(query)
|
|
@@ -268,8 +285,10 @@ module Legion
|
|
|
268
285
|
knowledge_domain: entry[:knowledge_domain] }
|
|
269
286
|
end
|
|
270
287
|
|
|
288
|
+
log.info("Apollo Knowledge.retrieve_relevant results=#{formatted.size} limit=#{limit}")
|
|
271
289
|
{ success: true, entries: formatted, count: formatted.size }
|
|
272
290
|
rescue Sequel::Error => e
|
|
291
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.retrieve_relevant')
|
|
273
292
|
{ success: false, error: e.message }
|
|
274
293
|
end
|
|
275
294
|
|
|
@@ -278,6 +297,7 @@ module Legion
|
|
|
278
297
|
return { success: false, error: 'apollo_data_not_available' }
|
|
279
298
|
end
|
|
280
299
|
|
|
300
|
+
log.debug("Apollo Knowledge.prepare_mesh_export target_domain=#{target_domain} min_confidence=#{min_confidence} limit=#{limit}")
|
|
281
301
|
conn = Legion::Data.connection
|
|
282
302
|
allowed = allowed_domains_for(target_domain)
|
|
283
303
|
|
|
@@ -297,7 +317,9 @@ module Legion
|
|
|
297
317
|
end
|
|
298
318
|
|
|
299
319
|
{ success: true, entries: formatted, count: formatted.size, target_domain: target_domain }
|
|
320
|
+
.tap { |result| log.info("Apollo Knowledge.prepare_mesh_export results=#{result[:count]} target_domain=#{target_domain}") }
|
|
300
321
|
rescue Sequel::Error => e
|
|
322
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.prepare_mesh_export')
|
|
301
323
|
{ success: false, error: e.message }
|
|
302
324
|
end
|
|
303
325
|
|
|
@@ -306,6 +328,7 @@ module Legion
|
|
|
306
328
|
return { deleted: 0, redacted: 0, error: 'apollo_data_not_available' }
|
|
307
329
|
end
|
|
308
330
|
|
|
331
|
+
log.warn("Apollo Knowledge.handle_erasure_request agent_id=#{agent_id}")
|
|
309
332
|
conn = Legion::Data.connection
|
|
310
333
|
|
|
311
334
|
# Delete entries solely from dead agent (not confirmed by others)
|
|
@@ -320,29 +343,142 @@ module Legion
|
|
|
320
343
|
.update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
|
|
321
344
|
|
|
322
345
|
{ deleted: deleted, redacted: redacted, agent_id: agent_id }
|
|
346
|
+
.tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
|
|
323
347
|
rescue Sequel::Error => e
|
|
348
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_erasure_request')
|
|
324
349
|
{ deleted: 0, redacted: 0, error: e.message }
|
|
325
350
|
end
|
|
326
351
|
|
|
352
|
+
CONFLICT_CHECK_MAX_CHARS = 4000
|
|
353
|
+
|
|
327
354
|
private
|
|
328
355
|
|
|
356
|
+
def normalize_content_type(raw)
|
|
357
|
+
sym = raw.to_s.delete_prefix(':').gsub(%r{[/\s]}, '_').strip.downcase.to_sym
|
|
358
|
+
sym = CONTENT_TYPE_ALIASES.fetch(sym, sym)
|
|
359
|
+
Helpers::Confidence::CONTENT_TYPES.include?(sym) ? sym : :observation
|
|
360
|
+
end
|
|
361
|
+
|
|
329
362
|
def embed_text(text)
|
|
330
363
|
text = normalize_text_input(text)
|
|
364
|
+
log.debug("Apollo Knowledge.embed_text text_length=#{text.length}")
|
|
331
365
|
result = Legion::LLM::Embeddings.generate(text: text)
|
|
332
366
|
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
333
|
-
vector.is_a?(Array) && vector.any?
|
|
367
|
+
if vector.is_a?(Array) && vector.any?
|
|
368
|
+
log.debug("Apollo Knowledge.embed_text vector_dimensions=#{vector.length}")
|
|
369
|
+
vector
|
|
370
|
+
else
|
|
371
|
+
log.warn('Apollo Knowledge.embed_text returned no vector; using zero-vector fallback')
|
|
372
|
+
Array.new(1024, 0.0)
|
|
373
|
+
end
|
|
334
374
|
rescue StandardError => e
|
|
335
375
|
log.warn("Apollo Knowledge.embed_text failed: #{e.message}")
|
|
336
376
|
Array.new(1024, 0.0)
|
|
337
377
|
end
|
|
338
378
|
|
|
339
379
|
def normalize_text_input(value)
|
|
340
|
-
|
|
380
|
+
result = if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
|
|
381
|
+
Legion::Apollo.send(:normalize_text_input, value)
|
|
382
|
+
else
|
|
383
|
+
value.to_s
|
|
384
|
+
end
|
|
341
385
|
|
|
342
|
-
|
|
386
|
+
sanitize_for_postgres(result)
|
|
343
387
|
rescue StandardError => e
|
|
344
388
|
log.warn("Apollo Knowledge.normalize_text_input failed: #{e.message}")
|
|
345
|
-
|
|
389
|
+
''
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def sanitize_for_postgres(value)
|
|
393
|
+
return value unless value.is_a?(String)
|
|
394
|
+
|
|
395
|
+
string = value.encoding == Encoding::UTF_8 ? value.dup : value.dup.force_encoding(Encoding::UTF_8)
|
|
396
|
+
changed = string.include?("\x00") || !string.valid_encoding?
|
|
397
|
+
string = string.scrub('') unless string.valid_encoding?
|
|
398
|
+
sanitized = string.delete("\x00")
|
|
399
|
+
log.debug("Apollo Knowledge.sanitize_for_postgres sanitized original_length=#{value.bytesize} sanitized_length=#{sanitized.bytesize}") if changed
|
|
400
|
+
sanitized
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def truncate_for_column(value, max_length)
|
|
404
|
+
return nil if value.nil?
|
|
405
|
+
|
|
406
|
+
normalize_text_input(value)[0, max_length]
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def active_duplicate_for_hash(hash)
|
|
410
|
+
return nil unless hash
|
|
411
|
+
|
|
412
|
+
existing = Legion::Data::Model::ApolloEntry
|
|
413
|
+
.where(content_hash: hash)
|
|
414
|
+
.exclude(status: 'archived')
|
|
415
|
+
.first
|
|
416
|
+
existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
417
|
+
log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
|
|
418
|
+
existing
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
|
|
422
|
+
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
423
|
+
agent = truncate_for_column(source_agent, 50) || 'unknown'
|
|
424
|
+
|
|
425
|
+
{ tags: tag_array,
|
|
426
|
+
domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
|
|
427
|
+
source_agent: agent,
|
|
428
|
+
source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
|
|
429
|
+
source_channel: truncate_for_column(source_channel, 100),
|
|
430
|
+
submitted_by: truncate_for_column(submitted_by, 255),
|
|
431
|
+
submitted_from: truncate_for_column(submitted_from, 255) }
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
|
|
435
|
+
new_entry = Legion::Data::Model::ApolloEntry.create(
|
|
436
|
+
content: content,
|
|
437
|
+
content_type: content_type,
|
|
438
|
+
confidence: Helpers::Confidence.initial_confidence,
|
|
439
|
+
source_agent: metadata[:source_agent],
|
|
440
|
+
source_provider: metadata[:source_provider],
|
|
441
|
+
source_channel: metadata[:source_channel],
|
|
442
|
+
source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
|
|
443
|
+
tags: Sequel.pg_array(metadata[:tags]),
|
|
444
|
+
status: 'candidate',
|
|
445
|
+
knowledge_domain: metadata[:domain],
|
|
446
|
+
submitted_by: metadata[:submitted_by],
|
|
447
|
+
submitted_from: metadata[:submitted_from],
|
|
448
|
+
content_hash: content_hash,
|
|
449
|
+
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
450
|
+
)
|
|
451
|
+
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
|
|
452
|
+
new_entry.id
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def browse_query?(query)
|
|
456
|
+
query.to_s.strip.length < 3
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
|
|
460
|
+
log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
461
|
+
dataset = Legion::Data::Model::ApolloEntry.exclude(status: 'archived')
|
|
462
|
+
requested = Array(status).map(&:to_s).reject(&:empty?)
|
|
463
|
+
dataset = dataset.where(status: requested) unless status_defaulted || requested.empty?
|
|
464
|
+
dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
|
|
465
|
+
dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
|
|
466
|
+
|
|
467
|
+
entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
|
|
468
|
+
format_entry(entry.is_a?(Hash) ? entry : entry.values)
|
|
469
|
+
end
|
|
470
|
+
log.info("Apollo Knowledge.list_entries_chronologically results=#{entries.size}")
|
|
471
|
+
{ success: true, mode: :browse, query: query, entries: entries, count: entries.size }
|
|
472
|
+
rescue Sequel::Error => e
|
|
473
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.list_entries_chronologically')
|
|
474
|
+
{ success: false, error: e.message }
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def format_entry(entry)
|
|
478
|
+
{ id: entry[:id], content: entry[:content], content_type: entry[:content_type],
|
|
479
|
+
confidence: entry[:confidence], distance: entry[:distance]&.to_f,
|
|
480
|
+
tags: entry[:tags], source_agent: entry[:source_agent],
|
|
481
|
+
knowledge_domain: entry[:knowledge_domain] }
|
|
346
482
|
end
|
|
347
483
|
|
|
348
484
|
def allowed_domains_for(target_domain)
|
|
@@ -366,6 +502,7 @@ module Legion
|
|
|
366
502
|
rel_weight = Helpers::Confidence.apollo_setting(:contradiction, :relation_weight, default: 0.8)
|
|
367
503
|
|
|
368
504
|
db = Legion::Data::Model::ApolloEntry.db
|
|
505
|
+
log.debug("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} similar_limit=#{sim_limit} threshold=#{sim_threshold}")
|
|
369
506
|
similar = db.fetch(
|
|
370
507
|
"SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
|
|
371
508
|
entry_id: entry_id,
|
|
@@ -390,19 +527,22 @@ module Legion
|
|
|
390
527
|
Legion::Data::Model::ApolloEntry.where(id: [entry_id, existing[:id]]).update(status: 'disputed')
|
|
391
528
|
contradictions << existing[:id]
|
|
392
529
|
end
|
|
530
|
+
log.info("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} contradictions=#{contradictions.size}") if contradictions.any?
|
|
393
531
|
contradictions
|
|
394
532
|
rescue Sequel::Error => e
|
|
395
|
-
|
|
533
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.detect_contradictions')
|
|
396
534
|
[]
|
|
397
535
|
end
|
|
398
536
|
|
|
399
537
|
def llm_detects_conflict?(content_a, content_b)
|
|
400
538
|
return false unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:structured)
|
|
401
539
|
|
|
540
|
+
a = content_a.to_s[0, CONFLICT_CHECK_MAX_CHARS]
|
|
541
|
+
b = content_b.to_s[0, CONFLICT_CHECK_MAX_CHARS]
|
|
402
542
|
result = Legion::LLM.structured(
|
|
403
543
|
messages: [
|
|
404
544
|
{ role: 'system', content: 'Do these two statements contradict each other? Return JSON.' },
|
|
405
|
-
{ role: 'user', content: "A: #{
|
|
545
|
+
{ role: 'user', content: "A: #{a}\n\nB: #{b}" }
|
|
406
546
|
],
|
|
407
547
|
schema: { type: 'object', properties: { contradicts: { type: 'boolean' } } },
|
|
408
548
|
caller: { extension: 'lex-apollo', runner: 'knowledge' }
|
|
@@ -415,6 +555,7 @@ module Legion
|
|
|
415
555
|
|
|
416
556
|
def find_corroboration(embedding, content_type_sym, source_agent, source_channel = nil)
|
|
417
557
|
scan_limit = Helpers::Confidence.apollo_setting(:corroboration, :scan_limit, default: 50)
|
|
558
|
+
log.debug("Apollo Knowledge.find_corroboration content_type=#{content_type_sym} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'} scan_limit=#{scan_limit}") # rubocop:disable Layout/LineLength
|
|
418
559
|
existing = Legion::Data::Model::ApolloEntry
|
|
419
560
|
.where(content_type: content_type_sym)
|
|
420
561
|
.exclude(embedding: nil)
|
|
@@ -447,9 +588,11 @@ module Legion
|
|
|
447
588
|
source_agent: source_agent,
|
|
448
589
|
weight: sim
|
|
449
590
|
)
|
|
591
|
+
log.info("Apollo Knowledge.find_corroboration matched entry_id=#{entry.id} source_agent=#{source_agent} similarity=#{sim}")
|
|
450
592
|
return [true, entry.id]
|
|
451
593
|
end
|
|
452
594
|
|
|
595
|
+
log.debug("Apollo Knowledge.find_corroboration no_match source_agent=#{source_agent}")
|
|
453
596
|
[false, nil]
|
|
454
597
|
end
|
|
455
598
|
|
|
@@ -468,6 +611,7 @@ module Legion
|
|
|
468
611
|
end
|
|
469
612
|
|
|
470
613
|
def upsert_expertise(source_agent:, domain:)
|
|
614
|
+
log.debug("Apollo Knowledge.upsert_expertise source_agent=#{source_agent} domain=#{domain}")
|
|
471
615
|
expertise = Legion::Data::Model::ApolloExpertise
|
|
472
616
|
.where(agent_id: source_agent, domain: domain).first
|
|
473
617
|
if expertise
|
|
@@ -25,7 +25,11 @@ module Legion
|
|
|
25
25
|
min_confidence ||= Helpers::Confidence.decay_threshold
|
|
26
26
|
min_age_hours = Helpers::Confidence.decay_min_age_hours
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
log.debug("Apollo Maintenance.run_decay_cycle alpha=#{alpha} min_confidence=#{min_confidence} min_age_hours=#{min_age_hours}")
|
|
29
|
+
unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
|
|
30
|
+
log.warn('Apollo Maintenance.run_decay_cycle skipped: apollo_data_not_available')
|
|
31
|
+
return { decayed: 0, archived: 0 }
|
|
32
|
+
end
|
|
29
33
|
|
|
30
34
|
conn = Legion::Data.connection
|
|
31
35
|
|
|
@@ -54,15 +58,21 @@ module Legion
|
|
|
54
58
|
|
|
55
59
|
{ decayed: decayed, archived: archived, alpha: alpha, threshold: min_confidence,
|
|
56
60
|
min_age_hours: min_age_hours }
|
|
61
|
+
.tap { |result| log.info("Apollo Maintenance.run_decay_cycle decayed=#{result[:decayed]} archived=#{result[:archived]} alpha=#{alpha} threshold=#{min_confidence}") } # rubocop:disable Layout/LineLength
|
|
57
62
|
rescue Sequel::Error => e
|
|
63
|
+
handle_exception(e, level: :error, operation: 'apollo.maintenance.run_decay_cycle')
|
|
58
64
|
{ decayed: 0, archived: 0, error: e.message }
|
|
59
65
|
end
|
|
60
66
|
|
|
61
67
|
def check_corroboration(**) # rubocop:disable Metrics/CyclomaticComplexity
|
|
62
|
-
|
|
68
|
+
unless defined?(Legion::Data::Model::ApolloEntry)
|
|
69
|
+
log.warn('Apollo Maintenance.check_corroboration skipped: apollo_data_not_available')
|
|
70
|
+
return { success: false, error: 'apollo_data_not_available' }
|
|
71
|
+
end
|
|
63
72
|
|
|
64
73
|
candidates = Legion::Data::Model::ApolloEntry.where(status: 'candidate').exclude(embedding: nil).all
|
|
65
74
|
confirmed = Legion::Data::Model::ApolloEntry.where(status: 'confirmed').exclude(embedding: nil).all
|
|
75
|
+
log.debug("Apollo Maintenance.check_corroboration candidates=#{candidates.size} confirmed=#{confirmed.size}")
|
|
66
76
|
|
|
67
77
|
promoted = 0
|
|
68
78
|
|
|
@@ -106,7 +116,9 @@ module Legion
|
|
|
106
116
|
end
|
|
107
117
|
|
|
108
118
|
{ success: true, promoted: promoted, scanned: candidates.size }
|
|
119
|
+
.tap { |result| log.info("Apollo Maintenance.check_corroboration scanned=#{result[:scanned]} promoted=#{result[:promoted]}") }
|
|
109
120
|
rescue Sequel::Error => e
|
|
121
|
+
handle_exception(e, level: :error, operation: 'apollo.maintenance.check_corroboration')
|
|
110
122
|
{ success: false, error: e.message }
|
|
111
123
|
end
|
|
112
124
|
|
|
@@ -9,6 +9,27 @@ RSpec.describe 'Apollo Contradiction Detection' do
|
|
|
9
9
|
it 'returns false when LLM unavailable' do
|
|
10
10
|
expect(knowledge.send(:llm_detects_conflict?, 'sky is blue', 'sky is red')).to be false
|
|
11
11
|
end
|
|
12
|
+
|
|
13
|
+
context 'when LLM is available' do
|
|
14
|
+
let(:llm_mod) do
|
|
15
|
+
Module.new do
|
|
16
|
+
def self.respond_to?(*) = true
|
|
17
|
+
def self.structured(**) = { data: { contradicts: true } }
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
before { stub_const('Legion::LLM', llm_mod) }
|
|
22
|
+
|
|
23
|
+
it 'truncates content longer than CONFLICT_CHECK_MAX_CHARS' do
|
|
24
|
+
long_text = 'x' * 10_000
|
|
25
|
+
allow(llm_mod).to receive(:structured).and_return({ data: { contradicts: false } })
|
|
26
|
+
knowledge.send(:llm_detects_conflict?, long_text, long_text)
|
|
27
|
+
expect(llm_mod).to have_received(:structured) do |**kwargs|
|
|
28
|
+
user_msg = kwargs[:messages].find { |m| m[:role] == 'user' }[:content]
|
|
29
|
+
expect(user_msg.length).to be < 10_000
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
12
33
|
end
|
|
13
34
|
|
|
14
35
|
describe '#detect_contradictions' do
|
|
@@ -44,6 +44,20 @@ RSpec.describe Legion::Extensions::Apollo::GaiaIntegration do
|
|
|
44
44
|
)
|
|
45
45
|
expect(result).to eq({ success: true })
|
|
46
46
|
end
|
|
47
|
+
|
|
48
|
+
it 'passes :observation as content_type regardless of domain' do
|
|
49
|
+
client_double = instance_double(Legion::Extensions::Apollo::Client)
|
|
50
|
+
allow(Legion::Extensions::Apollo::Client).to receive(:new).and_return(client_double)
|
|
51
|
+
allow(client_double).to receive(:store_knowledge).and_return({ success: true })
|
|
52
|
+
|
|
53
|
+
described_class.publish_insight(
|
|
54
|
+
{ confidence: 0.9, novelty: 0.5, content: 'insight', domain: 'clinical' },
|
|
55
|
+
agent_id: 'test-agent'
|
|
56
|
+
)
|
|
57
|
+
expect(client_double).to have_received(:store_knowledge).with(
|
|
58
|
+
hash_including(content_type: :observation)
|
|
59
|
+
)
|
|
60
|
+
end
|
|
47
61
|
end
|
|
48
62
|
|
|
49
63
|
describe 'entity watchdog phase handler' do
|
|
@@ -45,10 +45,41 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
45
45
|
expect(result[:source_agent]).to eq('worker-1')
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
-
it '
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
48
|
+
it 'falls back to :observation for unrecognized content_type' do
|
|
49
|
+
result = runner.store_knowledge(content: 'test', content_type: 'invalid_type')
|
|
50
|
+
expect(result[:content_type]).to eq(:observation)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it 'normalizes LLM-provided content_type "reasoning" to :concept' do
|
|
54
|
+
result = runner.store_knowledge(content: 'test', content_type: 'reasoning')
|
|
55
|
+
expect(result[:content_type]).to eq(:concept)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it 'normalizes "text" to :observation' do
|
|
59
|
+
result = runner.store_knowledge(content: 'test', content_type: 'text')
|
|
60
|
+
expect(result[:content_type]).to eq(:observation)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it 'normalizes "text/plain" to :observation' do
|
|
64
|
+
result = runner.store_knowledge(content: 'test', content_type: 'text/plain')
|
|
65
|
+
expect(result[:content_type]).to eq(:observation)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it 'strips leading colon from ":fact"' do
|
|
69
|
+
result = runner.store_knowledge(content: 'test', content_type: ':fact')
|
|
70
|
+
expect(result[:content_type]).to eq(:fact)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it 'normalizes "inference" to :association' do
|
|
74
|
+
result = runner.store_knowledge(content: 'test', content_type: 'inference')
|
|
75
|
+
expect(result[:content_type]).to eq(:association)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it 'accepts all valid CONTENT_TYPES unchanged' do
|
|
79
|
+
%i[fact concept procedure association observation].each do |ct|
|
|
80
|
+
result = runner.store_knowledge(content: 'test', content_type: ct)
|
|
81
|
+
expect(result[:content_type]).to eq(ct)
|
|
82
|
+
end
|
|
52
83
|
end
|
|
53
84
|
end
|
|
54
85
|
|
|
@@ -232,6 +263,36 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
232
263
|
tags: ['RabbitMQ'], source_agent: 'agent-1')
|
|
233
264
|
end
|
|
234
265
|
|
|
266
|
+
it 'sanitizes null bytes before storing content' do
|
|
267
|
+
expect(mock_entry_class).to receive(:create).with(
|
|
268
|
+
hash_including(content: 'helloworld')
|
|
269
|
+
).and_return(mock_entry)
|
|
270
|
+
host.handle_ingest(content: "hello\x00world", content_type: 'fact', source_agent: 'agent-1')
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it 'truncates short varchar metadata fields at the database boundary' do
|
|
274
|
+
expect(mock_entry_class).to receive(:create).with(
|
|
275
|
+
hash_including(
|
|
276
|
+
source_agent: 'a' * 50,
|
|
277
|
+
source_provider: 'p' * 50,
|
|
278
|
+
source_channel: 'c' * 100,
|
|
279
|
+
knowledge_domain: 'd' * 50,
|
|
280
|
+
submitted_by: 'u' * 255,
|
|
281
|
+
submitted_from: 'n' * 255
|
|
282
|
+
)
|
|
283
|
+
).and_return(mock_entry)
|
|
284
|
+
host.handle_ingest(
|
|
285
|
+
content: 'test',
|
|
286
|
+
content_type: 'fact',
|
|
287
|
+
source_agent: 'a' * 60,
|
|
288
|
+
source_provider: 'p' * 60,
|
|
289
|
+
source_channel: 'c' * 120,
|
|
290
|
+
knowledge_domain: 'd' * 60,
|
|
291
|
+
submitted_by: 'u' * 300,
|
|
292
|
+
submitted_from: 'n' * 300
|
|
293
|
+
)
|
|
294
|
+
end
|
|
295
|
+
|
|
235
296
|
context 'content hash dedup' do
|
|
236
297
|
let(:existing_entry) do
|
|
237
298
|
double('existing', id: 'uuid-existing', confidence: 0.6,
|
|
@@ -264,9 +325,16 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
264
325
|
end
|
|
265
326
|
|
|
266
327
|
it 'returns a structured error' do
|
|
328
|
+
allow(host).to receive(:handle_exception)
|
|
329
|
+
|
|
267
330
|
result = host.handle_ingest(content: 'test', content_type: 'fact', source_agent: 'a')
|
|
268
331
|
expect(result[:success]).to be false
|
|
269
332
|
expect(result[:error]).to eq('connection lost')
|
|
333
|
+
expect(host).to have_received(:handle_exception).with(
|
|
334
|
+
instance_of(Sequel::Error),
|
|
335
|
+
level: :error,
|
|
336
|
+
operation: 'apollo.knowledge.handle_ingest'
|
|
337
|
+
)
|
|
270
338
|
end
|
|
271
339
|
end
|
|
272
340
|
end
|
|
@@ -345,6 +413,64 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
345
413
|
expect(result[:count]).to eq(0)
|
|
346
414
|
end
|
|
347
415
|
end
|
|
416
|
+
|
|
417
|
+
context 'when query is browse-shaped' do
|
|
418
|
+
let(:mock_entry_class) { double('ApolloEntry') }
|
|
419
|
+
let(:dataset) { double('dataset') }
|
|
420
|
+
let(:entries) do
|
|
421
|
+
[{ id: 'uuid-1', content: 'Candidate fact', content_type: 'fact',
|
|
422
|
+
confidence: 0.7, tags: ['ruby'], source_agent: 'agent-1',
|
|
423
|
+
knowledge_domain: 'general' }]
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
before do
|
|
427
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
|
|
428
|
+
allow(mock_entry_class).to receive(:exclude).with(status: 'archived').and_return(dataset)
|
|
429
|
+
allow(dataset).to receive(:where).and_return(dataset)
|
|
430
|
+
allow(dataset).to receive(:order).with(:created_at).and_return(dataset)
|
|
431
|
+
allow(dataset).to receive(:limit).with(50).and_return(dataset)
|
|
432
|
+
allow(dataset).to receive(:all).and_return(entries)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
it 'lists recent non-archived entries without generating an embedding' do
|
|
436
|
+
expect(Legion::LLM::Embeddings).not_to receive(:generate)
|
|
437
|
+
|
|
438
|
+
result = host.handle_query(query: 'x', limit: 50)
|
|
439
|
+
|
|
440
|
+
expect(result[:success]).to be true
|
|
441
|
+
expect(result[:mode]).to eq(:browse)
|
|
442
|
+
expect(result[:count]).to eq(1)
|
|
443
|
+
expect(result[:entries].first[:content]).to eq('Candidate fact')
|
|
444
|
+
expect(dataset).not_to have_received(:where).with(status: ['confirmed'])
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
it 'respects an explicit confirmed status filter' do
|
|
448
|
+
host.handle_query(query: 'x', limit: 50, status: [:confirmed])
|
|
449
|
+
|
|
450
|
+
expect(dataset).to have_received(:where).with(status: ['confirmed'])
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
it 'applies tags and domain filters when provided' do
|
|
454
|
+
host.handle_query(query: 'x', limit: 50, tags: ['ruby'], domain: 'general')
|
|
455
|
+
|
|
456
|
+
expect(dataset).to have_received(:where).with('tags && ?')
|
|
457
|
+
expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
describe '#normalize_text_input' do
|
|
463
|
+
let(:host) { Object.new.extend(described_class) }
|
|
464
|
+
|
|
465
|
+
it 'strips null bytes in the local fallback path' do
|
|
466
|
+
expect(host.send(:normalize_text_input, "hello\x00world")).to eq('helloworld')
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
it 'scrubs invalid UTF-8 in the local fallback path' do
|
|
470
|
+
invalid = "hello\xC3(world".dup.force_encoding(Encoding::UTF_8)
|
|
471
|
+
|
|
472
|
+
expect(host.send(:normalize_text_input, invalid)).to eq('hello(world')
|
|
473
|
+
end
|
|
348
474
|
end
|
|
349
475
|
|
|
350
476
|
describe '#retrieve_relevant' do
|
data/spec/spec_helper.rb
CHANGED