lex-apollo 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9a1c9042626e5d9e09a783b8c703b11f261573fe1658498a4fa48ad023306baf
4
- data.tar.gz: 2f3e561b6851825d9d5c67325007802eafea63c10c93a00e37a40170202a0bdc
3
+ metadata.gz: 4ed5f6ac36b031c41850cae2f4e4c6768666c4151c730b7557f7d5f21ac24df7
4
+ data.tar.gz: 870ffad7838f068a01aa2b03af51ba2ae7df1cfe30debda9a8d3275705ef775a
5
5
  SHA512:
6
- metadata.gz: d2dae8784e591deeb1608671dbabbd313813a6f94214b77de0539d2316d6ef3a34e21d5002eb28a262e98c4c4d57773793be4ba006d185e0fd3afcf6816902c5
7
- data.tar.gz: 41e11ae62a001fab8e79b514d30c3c28374d38d1d4379926e00752df74e35ccfa79c61b3ac75e7e60341de0420287635413b265eca5fa0cd7fe4e7f02a5fad0f
6
+ metadata.gz: '0583ae3da3f19c3e852929e97947bf49b94e89d37f918d65fcc2507719049ac641ab6867c1e2b7eee899c923c1cb52ec75670c3cab345b159506d34a01099463'
7
+ data.tar.gz: 4f77f0e210baa01f3e77b06808ddfe06456d4f10358b8add5e4d602b8669be9b8223e6b69c30f2b56b2d18f20e5d0eabde504b05a8426812236d9a5918631bc4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.7] - 2026-03-25
4
+
5
+ ### Added
6
+ - Knowledge capture writeback system: `Helpers::Writeback` evaluates LLM responses for Apollo ingest with echo chamber prevention
7
+ - `Helpers::TagNormalizer` for mechanical tag normalization (aliases, special chars, max 5)
8
+ - `Helpers::Capability` for detecting embed/write capabilities per node
9
+ - Writeback transport layer: `Messages::Writeback`, `Queues::WritebackStore`, `Queues::WritebackVectorize`
10
+ - Writeback subscription actors: `Actor::WritebackStore` (pre-embedded), `Actor::WritebackVectorize` (needs embedding)
11
+ - Content hash dedup in `Runners::Knowledge#handle_ingest` — collision boosts confidence instead of inserting
12
+ - Identity tracking: `submitted_by`, `submitted_from`, `content_hash` fields on knowledge entries
13
+
3
14
  ## [0.4.6] - 2026-03-25
4
15
 
5
16
  ### Added
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Actor
9
+ class WritebackStore < Legion::Extensions::Actors::Subscription
10
+ def runner_class = 'Legion::Extensions::Apollo::Runners::Knowledge'
11
+ def runner_function = 'handle_ingest'
12
+ def check_subtask? = false
13
+ def generate_task? = false
14
+
15
+ def enabled?
16
+ defined?(Legion::Extensions::Apollo::Runners::Knowledge) &&
17
+ defined?(Legion::Transport) &&
18
+ Helpers::Capability.apollo_write_enabled?
19
+ rescue StandardError
20
+ false
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Actor
9
+ class WritebackVectorize < Legion::Extensions::Actors::Subscription
10
+ def runner_class = self.class
11
+ def runner_function = 'handle_vectorize'
12
+ def check_subtask? = false
13
+ def generate_task? = false
14
+
15
+ def handle_vectorize(payload)
16
+ payload = symbolize(payload)
17
+ embedding = Helpers::Embedding.generate(text: payload[:content])
18
+ enriched = payload.merge(embedding: embedding)
19
+
20
+ if Helpers::Capability.can_write?
21
+ Runners::Knowledge.handle_ingest(**enriched)
22
+ else
23
+ Transport::Messages::Writeback.new(
24
+ **enriched, has_embedding: true
25
+ ).publish
26
+ end
27
+
28
+ { success: true, action: :vectorized }
29
+ rescue StandardError => e
30
+ { success: false, error: e.message }
31
+ end
32
+
33
+ def enabled?
34
+ defined?(Legion::Transport) && Helpers::Capability.can_embed?
35
+ rescue StandardError
36
+ false
37
+ end
38
+
39
+ private
40
+
41
+ def symbolize(hash)
42
+ hash.transform_keys(&:to_sym)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Apollo
6
+ module Helpers
7
+ module Capability
8
+ EMBEDDING_MODELS = %w[mxbai-embed-large bge-large snowflake-arctic-embed].freeze
9
+
10
+ module_function
11
+
12
+ def can_embed?
13
+ return false unless defined?(Legion::LLM) && Legion::LLM.started?
14
+
15
+ ollama_embedding_available? || cloud_embedding_configured?
16
+ rescue StandardError
17
+ false
18
+ end
19
+
20
+ def can_write?
21
+ return false unless apollo_write_enabled?
22
+ return false unless defined?(Legion::Data) && Legion::Data.connected?
23
+
24
+ check_db_write_privilege
25
+ rescue StandardError
26
+ false
27
+ end
28
+
29
+ def apollo_write_enabled?
30
+ Legion::Settings.dig(:data, :apollo_write) == true
31
+ rescue StandardError
32
+ false
33
+ end
34
+
35
+ def ollama_embedding_available?
36
+ return false unless defined?(Legion::LLM::Discovery::Ollama)
37
+
38
+ EMBEDDING_MODELS.any? { |m| Legion::LLM::Discovery::Ollama.model_available?(m) }
39
+ rescue StandardError
40
+ false
41
+ end
42
+
43
+ def cloud_embedding_configured?
44
+ provider = Legion::Settings.dig(:apollo, :embedding, :provider)
45
+ model = Legion::Settings.dig(:apollo, :embedding, :model)
46
+ !provider.nil? && !model.nil?
47
+ rescue StandardError
48
+ false
49
+ end
50
+
51
+ def check_db_write_privilege
52
+ return @apollo_write_privilege unless @apollo_write_privilege.nil?
53
+
54
+ @apollo_write_privilege = Legion::Data.connection
55
+ .fetch("SELECT has_table_privilege(current_user, 'apollo_entries', 'INSERT') AS can_insert")
56
+ .first[:can_insert] == true
57
+ rescue StandardError
58
+ @apollo_write_privilege = false
59
+ end
60
+
61
+ def reset!
62
+ @apollo_write_privilege = nil
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Apollo
6
+ module Helpers
7
+ module TagNormalizer
8
+ ALIASES = {
9
+ 'c#' => 'csharp', '.net' => 'dotnet', 'c++' => 'cplusplus',
10
+ 'node.js' => 'nodejs', 'vue.js' => 'vuejs', 'react.js' => 'reactjs'
11
+ }.freeze
12
+
13
+ module_function
14
+
15
+ def normalize(raw)
16
+ tag = raw.to_s.strip.downcase
17
+ tag = ALIASES[tag] if ALIASES.key?(tag)
18
+ tag = tag.gsub(/[^a-z0-9\- ]/, '')
19
+ .gsub(/\s+/, '-')
20
+ .gsub(/-+/, '-')
21
+ .sub(/^-/, '')
22
+ .sub(/-$/, '')
23
+ tag.empty? ? nil : tag
24
+ end
25
+
26
+ def normalize_all(tags, max: 5)
27
+ Array(tags)
28
+ .filter_map { |t| normalize(t) }
29
+ .uniq
30
+ .first(max)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'socket'
5
+
6
+ module Legion
7
+ module Extensions
8
+ module Apollo
9
+ module Helpers
10
+ module Writeback
11
+ RESEARCH_TOOLS = %w[read_file search_files search_content run_command].freeze
12
+ MAX_CONTENT_LENGTH = 4000
13
+ MIN_CONTENT_LENGTH = 50
14
+
15
+ module_function
16
+
17
+ def evaluate_and_route(request:, response:, enrichments: {})
18
+ return unless writeback_enabled?
19
+ return unless should_capture?(request, response, enrichments)
20
+
21
+ payload = build_payload(request: request, response: response)
22
+ route_payload(payload)
23
+ rescue StandardError => e
24
+ Legion::Logging.warn("apollo writeback failed: #{e.message}") if defined?(Legion::Logging)
25
+ end
26
+
27
+ def should_capture?(_request, response, enrichments)
28
+ content = response_content(response)
29
+ return false if content.nil? || content.length < min_content_length
30
+
31
+ tool_calls = extract_tool_calls(response, enrichments)
32
+ research_calls = tool_calls.select { |tc| RESEARCH_TOOLS.include?(tc[:name] || tc['name']) }
33
+
34
+ return false if research_calls.empty?
35
+
36
+ apollo_results = enrichments['rag_context:apollo_results']
37
+ return true if apollo_results.nil? || (apollo_results[:count] || 0).zero?
38
+
39
+ # Apollo had results — only capture if LLM also did additional research
40
+ research_calls.any?
41
+ end
42
+
43
+ def build_payload(request:, response:, source_channel: nil)
44
+ content = response_content(response)
45
+ caller_identity = extract_identity(request)
46
+ user_query = extract_user_query(request)
47
+ tags = derive_tags(user_query)
48
+
49
+ {
50
+ content: content[0...MAX_CONTENT_LENGTH],
51
+ content_type: 'observation',
52
+ tags: Helpers::TagNormalizer.normalize_all(tags),
53
+ source_agent: response.respond_to?(:model) ? response.model : 'unknown',
54
+ source_channel: "#{source_channel || 'pipeline'}_synthesis",
55
+ submitted_by: caller_identity,
56
+ submitted_from: Socket.gethostname,
57
+ knowledge_domain: nil,
58
+ content_hash: content_hash(content)
59
+ }
60
+ end
61
+
62
+ def route_payload(payload)
63
+ can_embed = Helpers::Capability.can_embed?
64
+ can_write = Helpers::Capability.can_write?
65
+
66
+ if can_embed
67
+ embedding = Helpers::Embedding.generate(text: payload[:content])
68
+ payload[:embedding] = embedding
69
+ end
70
+
71
+ if can_write && can_embed
72
+ write_directly(payload)
73
+ else
74
+ publish_to_transport(payload, has_embedding: can_embed)
75
+ end
76
+ end
77
+
78
+ def write_directly(payload)
79
+ Runners::Knowledge.handle_ingest(**payload)
80
+ rescue StandardError => e
81
+ Legion::Logging.warn("apollo direct write failed, falling back to transport: #{e.message}") if defined?(Legion::Logging)
82
+ publish_to_transport(payload, has_embedding: !payload[:embedding].nil?)
83
+ end
84
+
85
+ def publish_to_transport(payload, has_embedding: false)
86
+ return unless defined?(Legion::Transport)
87
+
88
+ Transport::Messages::Writeback.new(
89
+ **payload, has_embedding: has_embedding
90
+ ).publish
91
+ rescue StandardError => e
92
+ Legion::Logging.warn("apollo writeback publish failed: #{e.message}") if defined?(Legion::Logging)
93
+ end
94
+
95
+ def writeback_enabled?
96
+ Legion::Settings.dig(:apollo, :writeback, :enabled) != false
97
+ rescue StandardError
98
+ true
99
+ end
100
+
101
+ def min_content_length
102
+ Legion::Settings.dig(:apollo, :writeback, :min_content_length) || MIN_CONTENT_LENGTH
103
+ rescue StandardError
104
+ MIN_CONTENT_LENGTH
105
+ end
106
+
107
+ def content_hash(content)
108
+ normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
109
+ Digest::MD5.hexdigest(normalized)
110
+ end
111
+
112
+ def response_content(response)
113
+ msg = response.respond_to?(:message) ? response.message : nil
114
+ return nil unless msg.is_a?(Hash)
115
+
116
+ msg[:content] || msg['content']
117
+ end
118
+
119
+ def extract_identity(request)
120
+ return 'unknown' unless request.respond_to?(:caller) && request.caller.is_a?(Hash)
121
+
122
+ request.caller.dig(:requested_by, :identity) || 'unknown'
123
+ rescue StandardError
124
+ 'unknown'
125
+ end
126
+
127
+ def extract_user_query(request)
128
+ return '' unless request.respond_to?(:messages)
129
+
130
+ user_msgs = Array(request.messages).select { |m| m[:role] == 'user' || m['role'] == 'user' }
131
+ (user_msgs.last || {})[:content] || ''
132
+ rescue StandardError
133
+ ''
134
+ end
135
+
136
+ def extract_tool_calls(response, enrichments)
137
+ calls = []
138
+ calls += Array(response.tool_calls) if response.respond_to?(:tool_calls)
139
+ calls += Array(enrichments['tool_calls']) if enrichments['tool_calls']
140
+ calls.uniq { |tc| tc[:name] || tc['name'] }
141
+ end
142
+
143
+ def derive_tags(query)
144
+ stop_words = %w[a an the is are was were be been being have has had do does did will would shall
145
+ should may might can could of in to for on with at by from as into about between
146
+ how what when where why who which this that these those it its and or but not]
147
+ words = query.to_s.downcase.gsub(/[^a-z0-9\s]/, '').split
148
+ words.reject { |w| stop_words.include?(w) || w.length < 3 }
149
+ .uniq
150
+ .first(5)
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -59,12 +59,25 @@ module Legion
59
59
  }
60
60
  end
61
61
 
62
- def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
62
+ def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
63
63
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
64
64
 
65
+ # Content hash dedup
66
+ hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
67
+ if hash
68
+ existing = Legion::Data::Model::ApolloEntry
69
+ .where(content_hash: hash)
70
+ .exclude(status: 'archived')
71
+ .first
72
+ if existing
73
+ existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
74
+ return { success: true, entry_id: existing.id, deduped: true }
75
+ end
76
+ end
77
+
65
78
  embedding = Helpers::Embedding.generate(text: content)
66
79
  content_type_sym = content_type.to_s
67
- tag_array = Array(tags)
80
+ tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
68
81
  domain = knowledge_domain || tag_array.first || 'general'
69
82
 
70
83
  corroborated, existing_id = find_corroboration(embedding, content_type_sym, source_agent, source_channel)
@@ -81,6 +94,9 @@ module Legion
81
94
  tags: Sequel.pg_array(tag_array),
82
95
  status: 'candidate',
83
96
  knowledge_domain: domain,
97
+ submitted_by: submitted_by,
98
+ submitted_from: submitted_from,
99
+ content_hash: hash,
84
100
  embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
85
101
  )
86
102
  existing_id = new_entry.id
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/transport/message' if defined?(Legion::Transport)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Transport
9
+ module Messages
10
+ class Writeback < Legion::Transport::Message
11
+ def exchange
12
+ Exchanges::Apollo
13
+ end
14
+
15
+ def routing_key
16
+ @options[:has_embedding] ? 'apollo.writeback.store' : 'apollo.writeback.vectorize'
17
+ end
18
+
19
+ def type
20
+ 'apollo_writeback'
21
+ end
22
+
23
+ def message
24
+ {
25
+ content: @options[:content],
26
+ content_type: @options[:content_type],
27
+ tags: @options[:tags],
28
+ source_agent: @options[:source_agent],
29
+ source_channel: @options[:source_channel],
30
+ submitted_by: @options[:submitted_by],
31
+ submitted_from: @options[:submitted_from],
32
+ embedding: @options[:embedding],
33
+ knowledge_domain: @options[:knowledge_domain],
34
+ context: @options[:context] || {}
35
+ }.compact
36
+ end
37
+
38
+ def validate
39
+ raise TypeError, 'content is required' unless @options[:content].is_a?(String)
40
+
41
+ @valid = true
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/transport/queue' if defined?(Legion::Transport)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Transport
9
+ module Queues
10
+ class WritebackStore < Legion::Transport::Queue
11
+ def queue_name
12
+ 'apollo.writeback.store'
13
+ end
14
+
15
+ def queue_options
16
+ { manual_ack: true, durable: true, arguments: { 'x-dead-letter-exchange': 'apollo.dlx' } }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/transport/queue' if defined?(Legion::Transport)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Transport
9
+ module Queues
10
+ class WritebackVectorize < Legion::Transport::Queue
11
+ def queue_name
12
+ 'apollo.writeback.vectorize'
13
+ end
14
+
15
+ def queue_options
16
+ { manual_ack: true, durable: true, arguments: { 'x-dead-letter-exchange': 'apollo.dlx' } }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.4.6'
6
+ VERSION = '0.4.7'
7
7
  end
8
8
  end
9
9
  end
@@ -4,6 +4,9 @@ require 'legion/extensions/apollo/version'
4
4
  require 'legion/extensions/apollo/helpers/confidence'
5
5
  require 'legion/extensions/apollo/helpers/similarity'
6
6
  require 'legion/extensions/apollo/helpers/graph_query'
7
+ require 'legion/extensions/apollo/helpers/tag_normalizer'
8
+ require 'legion/extensions/apollo/helpers/capability'
9
+ require 'legion/extensions/apollo/helpers/writeback'
7
10
  require 'legion/extensions/apollo/runners/knowledge'
8
11
  require 'legion/extensions/apollo/runners/expertise'
9
12
  require 'legion/extensions/apollo/runners/maintenance'
@@ -21,6 +24,9 @@ if defined?(Legion::Transport)
21
24
  require 'legion/extensions/apollo/transport/queues/gas'
22
25
  require 'legion/extensions/apollo/transport/messages/ingest'
23
26
  require 'legion/extensions/apollo/transport/messages/query'
27
+ require 'legion/extensions/apollo/transport/messages/writeback'
28
+ require 'legion/extensions/apollo/transport/queues/writeback_store'
29
+ require 'legion/extensions/apollo/transport/queues/writeback_vectorize'
24
30
  end
25
31
 
26
32
  module Legion
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ unless defined?(Legion::Extensions::Actors::Subscription)
6
+ module Legion
7
+ module Extensions
8
+ module Actors
9
+ class Subscription; end # rubocop:disable Lint/EmptyClass
10
+ end
11
+ end
12
+ end
13
+ end
14
+ $LOADED_FEATURES << 'legion/extensions/actors/subscription' unless $LOADED_FEATURES.include?('legion/extensions/actors/subscription')
15
+
16
+ require 'legion/extensions/apollo/helpers/capability'
17
+ require 'legion/extensions/apollo/runners/knowledge'
18
+ require 'legion/extensions/apollo/actors/writeback_store'
19
+
20
+ RSpec.describe Legion::Extensions::Apollo::Actor::WritebackStore do
21
+ subject(:actor) { described_class.new }
22
+
23
+ describe '#runner_class' do
24
+ it 'returns Knowledge runner string' do
25
+ expect(actor.runner_class).to eq('Legion::Extensions::Apollo::Runners::Knowledge')
26
+ end
27
+ end
28
+
29
+ describe '#runner_function' do
30
+ it 'returns handle_ingest' do
31
+ expect(actor.runner_function).to eq('handle_ingest')
32
+ end
33
+ end
34
+
35
+ describe '#check_subtask?' do
36
+ it { expect(actor.check_subtask?).to be false }
37
+ end
38
+
39
+ describe '#generate_task?' do
40
+ it { expect(actor.generate_task?).to be false }
41
+ end
42
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ unless defined?(Legion::Extensions::Actors::Subscription)
6
+ module Legion
7
+ module Extensions
8
+ module Actors
9
+ class Subscription; end # rubocop:disable Lint/EmptyClass
10
+ end
11
+ end
12
+ end
13
+ end
14
+ $LOADED_FEATURES << 'legion/extensions/actors/subscription' unless $LOADED_FEATURES.include?('legion/extensions/actors/subscription')
15
+
16
+ unless defined?(Legion::Transport::Message)
17
+ module Legion
18
+ module Transport
19
+ class Message
20
+ attr_reader :options
21
+
22
+ def initialize(**opts)
23
+ @options = opts
24
+ end
25
+
26
+ def publish
27
+ { published: true }
28
+ end
29
+ end
30
+
31
+ class Exchange
32
+ def exchange_name
33
+ 'mock'
34
+ end
35
+ end
36
+ end
37
+ end
38
+ $LOADED_FEATURES << 'legion/transport/message' unless $LOADED_FEATURES.include?('legion/transport/message')
39
+ $LOADED_FEATURES << 'legion/transport/exchange' unless $LOADED_FEATURES.include?('legion/transport/exchange')
40
+ end
41
+
42
+ require 'legion/extensions/apollo/helpers/embedding'
43
+ require 'legion/extensions/apollo/helpers/capability'
44
+ require 'legion/extensions/apollo/transport/exchanges/apollo'
45
+ require 'legion/extensions/apollo/transport/messages/writeback'
46
+ require 'legion/extensions/apollo/actors/writeback_vectorize'
47
+
48
+ RSpec.describe Legion::Extensions::Apollo::Actor::WritebackVectorize do
49
+ subject(:actor) { described_class.new }
50
+
51
+ describe '#runner_function' do
52
+ it 'returns handle_vectorize' do
53
+ expect(actor.runner_function).to eq('handle_vectorize')
54
+ end
55
+ end
56
+
57
+ describe '#handle_vectorize' do
58
+ let(:payload) { { content: 'test content', content_type: 'observation', tags: %w[test] } }
59
+
60
+ before do
61
+ allow(Legion::Extensions::Apollo::Helpers::Embedding).to receive(:generate).and_return([0.1] * 1024)
62
+ allow(Legion::Extensions::Apollo::Helpers::Capability).to receive(:can_write?).and_return(false)
63
+ end
64
+
65
+ it 'generates embedding and re-publishes when cannot write' do
66
+ msg = instance_double(Legion::Extensions::Apollo::Transport::Messages::Writeback)
67
+ allow(Legion::Extensions::Apollo::Transport::Messages::Writeback).to receive(:new).and_return(msg)
68
+ allow(msg).to receive(:publish)
69
+
70
+ result = actor.handle_vectorize(payload)
71
+ expect(result[:success]).to be true
72
+ expect(result[:action]).to eq(:vectorized)
73
+ expect(msg).to have_received(:publish)
74
+ end
75
+
76
+ it 'writes directly when can_write? is true' do
77
+ allow(Legion::Extensions::Apollo::Helpers::Capability).to receive(:can_write?).and_return(true)
78
+ allow(Legion::Extensions::Apollo::Runners::Knowledge).to receive(:handle_ingest).and_return({ success: true })
79
+
80
+ result = actor.handle_vectorize(payload)
81
+ expect(result[:success]).to be true
82
+ expect(Legion::Extensions::Apollo::Runners::Knowledge).to have_received(:handle_ingest)
83
+ end
84
+
85
+ it 'returns error hash on failure' do
86
+ allow(Legion::Extensions::Apollo::Helpers::Embedding).to receive(:generate).and_raise(RuntimeError, 'boom')
87
+
88
+ result = actor.handle_vectorize(payload)
89
+ expect(result[:success]).to be false
90
+ expect(result[:error]).to eq('boom')
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'legion/extensions/apollo/helpers/capability'
5
+
6
+ unless defined?(Legion::LLM)
7
+ module Legion
8
+ module LLM
9
+ def self.started? = false
10
+ end
11
+ end
12
+ end
13
+
14
+ RSpec.describe Legion::Extensions::Apollo::Helpers::Capability do
15
+ before do
16
+ described_class.instance_variable_set(:@apollo_write_privilege, nil)
17
+ end
18
+
19
+ describe '.can_embed?' do
20
+ it 'returns true when LLM is started and Ollama has a model' do
21
+ allow(Legion::LLM).to receive(:started?).and_return(true)
22
+ allow(described_class).to receive(:ollama_embedding_available?).and_return(true)
23
+ expect(described_class.can_embed?).to be true
24
+ end
25
+
26
+ it 'returns false when LLM is not started' do
27
+ allow(Legion::LLM).to receive(:started?).and_return(false) if defined?(Legion::LLM)
28
+ expect(described_class.can_embed?).to be false
29
+ end
30
+ end
31
+
32
+ describe '.can_write?' do
33
+ it 'returns false when apollo_write setting is false' do
34
+ allow(Legion::Settings).to receive(:dig).with(:data, :apollo_write).and_return(false)
35
+ expect(described_class.can_write?).to be false
36
+ end
37
+
38
+ it 'returns false when Data is not connected' do
39
+ allow(Legion::Settings).to receive(:dig).with(:data, :apollo_write).and_return(true)
40
+ allow(Legion::Data).to receive(:connected?).and_return(false) if defined?(Legion::Data)
41
+ expect(described_class.can_write?).to be false
42
+ end
43
+ end
44
+
45
+ describe '.apollo_write_enabled?' do
46
+ it 'reads from settings' do
47
+ allow(Legion::Settings).to receive(:dig).with(:data, :apollo_write).and_return(true)
48
+ expect(described_class.apollo_write_enabled?).to be true
49
+ end
50
+
51
+ it 'defaults to false' do
52
+ allow(Legion::Settings).to receive(:dig).with(:data, :apollo_write).and_return(nil)
53
+ expect(described_class.apollo_write_enabled?).to be false
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'legion/extensions/apollo/helpers/tag_normalizer'
5
+
6
+ RSpec.describe Legion::Extensions::Apollo::Helpers::TagNormalizer do
7
+ describe '.normalize' do
8
+ it 'lowercases tags' do
9
+ expect(described_class.normalize('RabbitMQ')).to eq('rabbitmq')
10
+ end
11
+
12
+ it 'strips leading/trailing whitespace' do
13
+ expect(described_class.normalize(' hello ')).to eq('hello')
14
+ end
15
+
16
+ it 'replaces spaces with hyphens' do
17
+ expect(described_class.normalize('message broker')).to eq('message-broker')
18
+ end
19
+
20
+ it 'strips special characters except hyphens' do
21
+ expect(described_class.normalize('c#')).to eq('csharp')
22
+ expect(described_class.normalize('hello!')).to eq('hello')
23
+ expect(described_class.normalize('key=value')).to eq('keyvalue')
24
+ end
25
+
26
+ it 'collapses multiple hyphens' do
27
+ expect(described_class.normalize('a--b---c')).to eq('a-b-c')
28
+ end
29
+
30
+ it 'applies known aliases' do
31
+ expect(described_class.normalize('C++')).to eq('cplusplus')
32
+ expect(described_class.normalize('.NET')).to eq('dotnet')
33
+ expect(described_class.normalize('node.js')).to eq('nodejs')
34
+ end
35
+
36
+ it 'returns nil for empty results' do
37
+ expect(described_class.normalize('!!!')).to be_nil
38
+ expect(described_class.normalize('')).to be_nil
39
+ end
40
+ end
41
+
42
+ describe '.normalize_all' do
43
+ it 'normalizes, deduplicates, and caps at max' do
44
+ tags = %w[RabbitMQ rabbitmq AMQP message-broker extra-tag sixth-tag]
45
+ result = described_class.normalize_all(tags, max: 5)
46
+ expect(result).to eq(%w[rabbitmq amqp message-broker extra-tag sixth-tag])
47
+ end
48
+
49
+ it 'filters out nil results' do
50
+ expect(described_class.normalize_all(['!!!', 'valid'])).to eq(['valid'])
51
+ end
52
+
53
+ it 'handles nil input' do
54
+ expect(described_class.normalize_all(nil)).to eq([])
55
+ end
56
+
57
+ it 'defaults max to 5' do
58
+ tags = %w[a b c d e f g]
59
+ expect(described_class.normalize_all(tags).length).to eq(5)
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+ require 'legion/extensions/apollo/helpers/tag_normalizer'
5
+ require 'legion/extensions/apollo/helpers/writeback'
6
+
7
+ RSpec.describe Legion::Extensions::Apollo::Helpers::Writeback do
8
+ let(:base_request) do
9
+ double('Request',
10
+ messages: [{ role: 'user', content: 'How does RabbitMQ clustering work?' }],
11
+ caller: { requested_by: { identity: 'user@example.com', type: :human } })
12
+ end
13
+
14
+ let(:base_response) do
15
+ double('Response',
16
+ message: { content: 'RabbitMQ clustering works by...' * 20 },
17
+ model: 'claude-sonnet-4-6',
18
+ tool_calls: [])
19
+ end
20
+
21
+ describe '.should_capture?' do
22
+ it 'returns false for short responses' do
23
+ short = double('Response', message: { content: 'yes' }, tool_calls: [])
24
+ expect(described_class.should_capture?(base_request, short, {})).to be false
25
+ end
26
+
27
+ it 'returns false when no research tools were used' do
28
+ expect(described_class.should_capture?(base_request, base_response, {})).to be false
29
+ end
30
+
31
+ it 'returns true when research tools were used' do
32
+ enrichments = { 'tool_calls' => [{ name: 'read_file' }] }
33
+ long_response = double('Response',
34
+ message: { content: 'x' * 100 },
35
+ model: 'claude-sonnet-4-6',
36
+ tool_calls: [{ name: 'read_file' }])
37
+ expect(described_class.should_capture?(base_request, long_response, enrichments)).to be true
38
+ end
39
+
40
+ it 'returns false for echo chamber (apollo had results, no additional research)' do
41
+ enrichments = {
42
+ 'rag_context:apollo_results' => { count: 3 },
43
+ 'tool_calls' => []
44
+ }
45
+ long_response = double('Response', message: { content: 'x' * 100 }, tool_calls: [])
46
+ expect(described_class.should_capture?(base_request, long_response, enrichments)).to be false
47
+ end
48
+ end
49
+
50
+ describe '.build_payload' do
51
+ it 'builds payload with identity context' do
52
+ payload = described_class.build_payload(
53
+ request: base_request,
54
+ response: base_response,
55
+ source_channel: 'chat'
56
+ )
57
+ expect(payload[:submitted_by]).to eq('user@example.com')
58
+ expect(payload[:source_agent]).to eq('claude-sonnet-4-6')
59
+ expect(payload[:content_type]).to eq('observation')
60
+ expect(payload[:source_channel]).to eq('chat_synthesis')
61
+ end
62
+
63
+ it 'truncates content to max length' do
64
+ long_response = double('Response',
65
+ message: { content: 'x' * 10_000 },
66
+ model: 'test')
67
+ payload = described_class.build_payload(request: base_request, response: long_response)
68
+ expect(payload[:content].length).to be <= 4000
69
+ end
70
+
71
+ it 'includes content_hash' do
72
+ payload = described_class.build_payload(request: base_request, response: base_response)
73
+ expect(payload[:content_hash]).to be_a(String)
74
+ expect(payload[:content_hash].length).to eq(32)
75
+ end
76
+
77
+ it 'normalizes tags' do
78
+ payload = described_class.build_payload(request: base_request, response: base_response)
79
+ expect(payload[:tags]).to all(match(/\A[a-z0-9-]+\z/))
80
+ end
81
+ end
82
+
83
+ describe '.content_hash' do
84
+ it 'produces consistent hashes for same content' do
85
+ hash1 = described_class.content_hash('hello world')
86
+ hash2 = described_class.content_hash('hello world')
87
+ expect(hash1).to eq(hash2)
88
+ end
89
+
90
+ it 'normalizes whitespace before hashing' do
91
+ hash1 = described_class.content_hash('hello world')
92
+ hash2 = described_class.content_hash('hello world')
93
+ expect(hash1).to eq(hash2)
94
+ end
95
+ end
96
+ end
@@ -5,6 +5,8 @@ require 'legion/extensions/apollo/helpers/confidence'
5
5
  require 'legion/extensions/apollo/helpers/similarity'
6
6
  require 'legion/extensions/apollo/helpers/embedding'
7
7
  require 'legion/extensions/apollo/helpers/graph_query'
8
+ require 'legion/extensions/apollo/helpers/tag_normalizer'
9
+ require 'legion/extensions/apollo/helpers/writeback'
8
10
  require 'legion/extensions/apollo/runners/knowledge'
9
11
 
10
12
  RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
@@ -120,7 +122,12 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
120
122
  allow(Legion::Extensions::Apollo::Helpers::Embedding).to receive(:generate)
121
123
  .and_return(Array.new(1536, 0.0))
122
124
 
125
+ # Corroboration lookup chain
123
126
  allow(mock_entry_class).to receive(:where).and_return(double(exclude: double(limit: empty_dataset)))
127
+ # Content hash dedup chain — no match by default
128
+ dedup_chain = double('dedup_chain')
129
+ allow(mock_entry_class).to receive(:where).with(content_hash: anything).and_return(dedup_chain)
130
+ allow(dedup_chain).to receive(:exclude).with(status: 'archived').and_return(double(first: nil))
124
131
  allow(mock_entry_class).to receive(:db).and_return(mock_db)
125
132
  allow(mock_db).to receive(:fetch).and_return(double(all: []))
126
133
  allow(mock_entry_class).to receive(:create).and_return(mock_entry)
@@ -190,6 +197,53 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
190
197
  ).and_return(mock_entry)
191
198
  host.handle_ingest(content: 'test', content_type: 'fact', source_agent: 'agent-1')
192
199
  end
200
+
201
+ it 'passes submitted_by and submitted_from to create' do
202
+ expect(mock_entry_class).to receive(:create).with(
203
+ hash_including(submitted_by: 'user@example.com', submitted_from: 'node-1')
204
+ ).and_return(mock_entry)
205
+ host.handle_ingest(content: 'test', content_type: 'fact',
206
+ source_agent: 'agent-1',
207
+ submitted_by: 'user@example.com',
208
+ submitted_from: 'node-1')
209
+ end
210
+
211
+ it 'passes content_hash to create' do
212
+ expect(mock_entry_class).to receive(:create).with(
213
+ hash_including(content_hash: 'abc123')
214
+ ).and_return(mock_entry)
215
+ host.handle_ingest(content: 'test', content_type: 'fact',
216
+ source_agent: 'agent-1', content_hash: 'abc123')
217
+ end
218
+
219
+ it 'normalizes tags before storage' do
220
+ expect(mock_entry_class).to receive(:create).with(
221
+ hash_including(tags: Sequel.pg_array(%w[rabbitmq]))
222
+ ).and_return(mock_entry)
223
+ host.handle_ingest(content: 'test', content_type: 'fact',
224
+ tags: ['RabbitMQ'], source_agent: 'agent-1')
225
+ end
226
+
227
+ context 'content hash dedup' do
228
+ let(:existing_entry) do
229
+ double('existing', id: 'uuid-existing', confidence: 0.6,
230
+ update: true)
231
+ end
232
+
233
+ it 'returns deduped result on hash collision with active entry' do
234
+ dedup_dataset = double('dataset')
235
+ allow(mock_entry_class).to receive(:where).with(content_hash: anything).and_return(dedup_dataset)
236
+ allow(dedup_dataset).to receive(:exclude).with(status: 'archived').and_return(dedup_dataset)
237
+ allow(dedup_dataset).to receive(:first).and_return(existing_entry)
238
+
239
+ result = host.handle_ingest(content: 'test', content_type: 'fact',
240
+ source_agent: 'agent-1',
241
+ content_hash: 'deadbeef12345678deadbeef12345678')
242
+ expect(result[:success]).to be true
243
+ expect(result[:deduped]).to be true
244
+ expect(result[:entry_id]).to eq('uuid-existing')
245
+ end
246
+ end
193
247
  end
194
248
 
195
249
  context 'when Sequel raises an error' do
@@ -138,7 +138,7 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Request do
138
138
  stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class)
139
139
  allow(Legion::Extensions::Apollo::Helpers::Embedding).to receive(:generate)
140
140
  .and_return(Array.new(1536, 0.0))
141
- allow(mock_entry_class).to receive(:where).and_return(double(exclude: double(limit: double(each: nil))))
141
+ allow(mock_entry_class).to receive(:where).and_return(double(exclude: double(limit: double(each: nil), first: nil)))
142
142
  allow(mock_entry_class).to receive(:exclude)
143
143
  .and_return(double(exclude: double(limit: double(all: []))))
144
144
  allow(mock_entry_class).to receive(:db).and_return(double(fetch: double(all: [])))
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ unless defined?(Legion::Transport::Message)
6
+ module Legion
7
+ module Transport
8
+ class Message
9
+ attr_reader :options
10
+
11
+ def initialize(**opts)
12
+ @options = opts
13
+ end
14
+
15
+ def publish
16
+ { published: true }
17
+ end
18
+ end
19
+
20
+ class Exchange
21
+ def exchange_name
22
+ 'mock'
23
+ end
24
+ end
25
+ end
26
+ end
27
+ $LOADED_FEATURES << 'legion/transport/message' unless $LOADED_FEATURES.include?('legion/transport/message')
28
+ $LOADED_FEATURES << 'legion/transport/exchange' unless $LOADED_FEATURES.include?('legion/transport/exchange')
29
+ end
30
+
31
+ require 'legion/extensions/apollo/transport/exchanges/apollo'
32
+ require 'legion/extensions/apollo/transport/messages/writeback'
33
+
34
+ RSpec.describe Legion::Extensions::Apollo::Transport::Messages::Writeback do
35
+ let(:base_opts) do
36
+ { content: 'test knowledge', content_type: 'observation',
37
+ tags: %w[test], source_agent: 'claude-sonnet-4-6',
38
+ submitted_by: 'user@example.com', submitted_from: 'node-1' }
39
+ end
40
+
41
+ describe '#routing_key' do
42
+ it 'routes to store when embedding present' do
43
+ msg = described_class.new(**base_opts, has_embedding: true, embedding: [0.1] * 1024)
44
+ expect(msg.routing_key).to eq('apollo.writeback.store')
45
+ end
46
+
47
+ it 'routes to vectorize when no embedding' do
48
+ msg = described_class.new(**base_opts, has_embedding: false)
49
+ expect(msg.routing_key).to eq('apollo.writeback.vectorize')
50
+ end
51
+ end
52
+
53
+ describe '#message' do
54
+ it 'includes identity fields' do
55
+ msg = described_class.new(**base_opts)
56
+ payload = msg.message
57
+ expect(payload[:submitted_by]).to eq('user@example.com')
58
+ expect(payload[:submitted_from]).to eq('node-1')
59
+ expect(payload[:source_agent]).to eq('claude-sonnet-4-6')
60
+ end
61
+
62
+ it 'compacts nil values' do
63
+ msg = described_class.new(**base_opts, embedding: nil, knowledge_domain: nil)
64
+ expect(msg.message).not_to have_key(:embedding)
65
+ expect(msg.message).not_to have_key(:knowledge_domain)
66
+ end
67
+ end
68
+
69
+ describe '#type' do
70
+ it 'returns apollo_writeback' do
71
+ msg = described_class.new(**base_opts)
72
+ expect(msg.type).to eq('apollo_writeback')
73
+ end
74
+ end
75
+
76
+ describe '#validate' do
77
+ it 'raises on missing content' do
78
+ expect { described_class.new(**base_opts, content: nil) }.to raise_error(TypeError)
79
+ end
80
+
81
+ it 'passes with valid content' do
82
+ msg = described_class.new(**base_opts)
83
+ msg.validate
84
+ expect(msg.instance_variable_get(:@valid)).to be true
85
+ end
86
+ end
87
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.6
4
+ version: 0.4.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -153,14 +153,19 @@ files:
153
153
  - lib/legion/extensions/apollo/actors/gas_subscriber.rb
154
154
  - lib/legion/extensions/apollo/actors/ingest.rb
155
155
  - lib/legion/extensions/apollo/actors/query_responder.rb
156
+ - lib/legion/extensions/apollo/actors/writeback_store.rb
157
+ - lib/legion/extensions/apollo/actors/writeback_vectorize.rb
156
158
  - lib/legion/extensions/apollo/api.rb
157
159
  - lib/legion/extensions/apollo/client.rb
158
160
  - lib/legion/extensions/apollo/gaia_integration.rb
161
+ - lib/legion/extensions/apollo/helpers/capability.rb
159
162
  - lib/legion/extensions/apollo/helpers/confidence.rb
160
163
  - lib/legion/extensions/apollo/helpers/embedding.rb
161
164
  - lib/legion/extensions/apollo/helpers/entity_watchdog.rb
162
165
  - lib/legion/extensions/apollo/helpers/graph_query.rb
163
166
  - lib/legion/extensions/apollo/helpers/similarity.rb
167
+ - lib/legion/extensions/apollo/helpers/tag_normalizer.rb
168
+ - lib/legion/extensions/apollo/helpers/writeback.rb
164
169
  - lib/legion/extensions/apollo/runners/entity_extractor.rb
165
170
  - lib/legion/extensions/apollo/runners/expertise.rb
166
171
  - lib/legion/extensions/apollo/runners/gas.rb
@@ -172,24 +177,32 @@ files:
172
177
  - lib/legion/extensions/apollo/transport/exchanges/llm_audit.rb
173
178
  - lib/legion/extensions/apollo/transport/messages/ingest.rb
174
179
  - lib/legion/extensions/apollo/transport/messages/query.rb
180
+ - lib/legion/extensions/apollo/transport/messages/writeback.rb
175
181
  - lib/legion/extensions/apollo/transport/queues/gas.rb
176
182
  - lib/legion/extensions/apollo/transport/queues/ingest.rb
177
183
  - lib/legion/extensions/apollo/transport/queues/query.rb
184
+ - lib/legion/extensions/apollo/transport/queues/writeback_store.rb
185
+ - lib/legion/extensions/apollo/transport/queues/writeback_vectorize.rb
178
186
  - lib/legion/extensions/apollo/version.rb
179
187
  - spec/legion/extensions/apollo/actors/decay_spec.rb
180
188
  - spec/legion/extensions/apollo/actors/entity_watchdog_spec.rb
181
189
  - spec/legion/extensions/apollo/actors/expertise_aggregator_spec.rb
182
190
  - spec/legion/extensions/apollo/actors/gas_subscriber_spec.rb
183
191
  - spec/legion/extensions/apollo/actors/ingest_spec.rb
192
+ - spec/legion/extensions/apollo/actors/writeback_store_spec.rb
193
+ - spec/legion/extensions/apollo/actors/writeback_vectorize_spec.rb
184
194
  - spec/legion/extensions/apollo/api_spec.rb
185
195
  - spec/legion/extensions/apollo/client_spec.rb
186
196
  - spec/legion/extensions/apollo/contradiction_spec.rb
187
197
  - spec/legion/extensions/apollo/gaia_integration_spec.rb
198
+ - spec/legion/extensions/apollo/helpers/capability_spec.rb
188
199
  - spec/legion/extensions/apollo/helpers/confidence_spec.rb
189
200
  - spec/legion/extensions/apollo/helpers/embedding_spec.rb
190
201
  - spec/legion/extensions/apollo/helpers/entity_watchdog_spec.rb
191
202
  - spec/legion/extensions/apollo/helpers/graph_query_spec.rb
192
203
  - spec/legion/extensions/apollo/helpers/similarity_spec.rb
204
+ - spec/legion/extensions/apollo/helpers/tag_normalizer_spec.rb
205
+ - spec/legion/extensions/apollo/helpers/writeback_spec.rb
193
206
  - spec/legion/extensions/apollo/runners/decay_cycle_spec.rb
194
207
  - spec/legion/extensions/apollo/runners/entity_extractor_spec.rb
195
208
  - spec/legion/extensions/apollo/runners/expertise_spec.rb
@@ -202,6 +215,7 @@ files:
202
215
  - spec/legion/extensions/apollo/runners/request_spec.rb
203
216
  - spec/legion/extensions/apollo/transport/messages/ingest_spec.rb
204
217
  - spec/legion/extensions/apollo/transport/messages/query_spec.rb
218
+ - spec/legion/extensions/apollo/transport/messages/writeback_spec.rb
205
219
  - spec/spec_helper.rb
206
220
  homepage: https://github.com/LegionIO/lex-apollo
207
221
  licenses: