lex-apollo 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -0
  3. data/lib/legion/extensions/apollo/actors/writeback_store.rb +26 -0
  4. data/lib/legion/extensions/apollo/actors/writeback_vectorize.rb +48 -0
  5. data/lib/legion/extensions/apollo/helpers/capability.rb +68 -0
  6. data/lib/legion/extensions/apollo/helpers/embedding.rb +59 -3
  7. data/lib/legion/extensions/apollo/helpers/graph_query.rb +3 -3
  8. data/lib/legion/extensions/apollo/helpers/similarity.rb +13 -0
  9. data/lib/legion/extensions/apollo/helpers/tag_normalizer.rb +36 -0
  10. data/lib/legion/extensions/apollo/helpers/writeback.rb +156 -0
  11. data/lib/legion/extensions/apollo/runners/knowledge.rb +24 -5
  12. data/lib/legion/extensions/apollo/transport/messages/writeback.rb +48 -0
  13. data/lib/legion/extensions/apollo/transport/queues/writeback_store.rb +23 -0
  14. data/lib/legion/extensions/apollo/transport/queues/writeback_vectorize.rb +23 -0
  15. data/lib/legion/extensions/apollo/version.rb +1 -1
  16. data/lib/legion/extensions/apollo.rb +6 -0
  17. data/spec/legion/extensions/apollo/actors/writeback_store_spec.rb +42 -0
  18. data/spec/legion/extensions/apollo/actors/writeback_vectorize_spec.rb +93 -0
  19. data/spec/legion/extensions/apollo/contradiction_spec.rb +1 -1
  20. data/spec/legion/extensions/apollo/helpers/capability_spec.rb +56 -0
  21. data/spec/legion/extensions/apollo/helpers/embedding_spec.rb +6 -6
  22. data/spec/legion/extensions/apollo/helpers/graph_query_spec.rb +2 -2
  23. data/spec/legion/extensions/apollo/helpers/tag_normalizer_spec.rb +62 -0
  24. data/spec/legion/extensions/apollo/helpers/writeback_spec.rb +96 -0
  25. data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +54 -0
  26. data/spec/legion/extensions/apollo/runners/request_spec.rb +1 -1
  27. data/spec/legion/extensions/apollo/transport/messages/writeback_spec.rb +87 -0
  28. metadata +15 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 692cd508a98259d83ceaf2c6b752c7c87ddeb219145291bd568fa76430e41577
4
- data.tar.gz: b02962423b3dff2950af8b51b0cf5035a8db72f6c2ef80c4f1e54f9f0989e25c
3
+ metadata.gz: 4ed5f6ac36b031c41850cae2f4e4c6768666c4151c730b7557f7d5f21ac24df7
4
+ data.tar.gz: 870ffad7838f068a01aa2b03af51ba2ae7df1cfe30debda9a8d3275705ef775a
5
5
  SHA512:
6
- metadata.gz: 7c4989d252540ae09177a73f389a1f4d1a09c51aee09acdf88bdfbb9ffb3e5d7c6522e8cf0953117518011f199f97f1e76dacb6d88aa2f10a48ecfe1da064497
7
- data.tar.gz: a806f1485d4f3ef587f2fbd015180c027934ac87b080c4e0264ccb12a478708d89d0aa313ac3c9b2f20ed3c64ec779e2468ec7b457967c7cb6c1ce780324a884
6
+ metadata.gz: '0583ae3da3f19c3e852929e97947bf49b94e89d37f918d65fcc2507719049ac641ab6867c1e2b7eee899c923c1cb52ec75670c3cab345b159506d34a01099463'
7
+ data.tar.gz: 4f77f0e210baa01f3e77b06808ddfe06456d4f10358b8add5e4d602b8669be9b8223e6b69c30f2b56b2d18f20e5d0eabde504b05a8426812236d9a5918631bc4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.7] - 2026-03-25
4
+
5
+ ### Added
6
+ - Knowledge capture writeback system: `Helpers::Writeback` evaluates LLM responses for Apollo ingest with echo chamber prevention
7
+ - `Helpers::TagNormalizer` for mechanical tag normalization (aliases, special chars, max 5)
8
+ - `Helpers::Capability` for detecting embed/write capabilities per node
9
+ - Writeback transport layer: `Messages::Writeback`, `Queues::WritebackStore`, `Queues::WritebackVectorize`
10
+ - Writeback subscription actors: `Actor::WritebackStore` (pre-embedded), `Actor::WritebackVectorize` (needs embedding)
11
+ - Content hash dedup in `Runners::Knowledge#handle_ingest` — collision boosts confidence instead of inserting
12
+ - Identity tracking: `submitted_by`, `submitted_from`, `content_hash` fields on knowledge entries
13
+
14
+ ## [0.4.6] - 2026-03-25
15
+
16
+ ### Added
17
+ - Apollo-specific embedding provider/model settings: `apollo.embedding.provider` and `apollo.embedding.model` override LLM defaults
18
+ - `embedding_opts` helper reads Apollo settings and passes `provider:`/`model:` to `Legion::LLM.embed`
19
+ - Local-first embedding: `detect_local_model` checks Ollama for pulled 1024-dim models (`mxbai-embed-large`, `bge-large`, `snowflake-arctic-embed`) before falling back to cloud provider
20
+
21
+ ### Changed
22
+ - `DEFAULT_DIMENSION` changed from 1536 to 1024 for cross-provider compatibility (Bedrock Titan v2, OpenAI with dimensions:, Ollama models)
23
+ - `Helpers::Embedding.generate` now passes provider/model from Apollo settings, falling back to LLM defaults when not configured
24
+
3
25
  ## [0.4.5] - 2026-03-25
4
26
 
5
27
  ### Added
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Actor
9
+ class WritebackStore < Legion::Extensions::Actors::Subscription
10
+ def runner_class = 'Legion::Extensions::Apollo::Runners::Knowledge'
11
+ def runner_function = 'handle_ingest'
12
+ def check_subtask? = false
13
+ def generate_task? = false
14
+
15
+ def enabled?
16
+ defined?(Legion::Extensions::Apollo::Runners::Knowledge) &&
17
+ defined?(Legion::Transport) &&
18
+ Helpers::Capability.apollo_write_enabled?
19
+ rescue StandardError
20
+ false
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Actor
9
+ class WritebackVectorize < Legion::Extensions::Actors::Subscription
10
+ def runner_class = self.class
11
+ def runner_function = 'handle_vectorize'
12
+ def check_subtask? = false
13
+ def generate_task? = false
14
+
15
+ def handle_vectorize(payload)
16
+ payload = symbolize(payload)
17
+ embedding = Helpers::Embedding.generate(text: payload[:content])
18
+ enriched = payload.merge(embedding: embedding)
19
+
20
+ if Helpers::Capability.can_write?
21
+ Runners::Knowledge.handle_ingest(**enriched)
22
+ else
23
+ Transport::Messages::Writeback.new(
24
+ **enriched, has_embedding: true
25
+ ).publish
26
+ end
27
+
28
+ { success: true, action: :vectorized }
29
+ rescue StandardError => e
30
+ { success: false, error: e.message }
31
+ end
32
+
33
+ def enabled?
34
+ defined?(Legion::Transport) && Helpers::Capability.can_embed?
35
+ rescue StandardError
36
+ false
37
+ end
38
+
39
+ private
40
+
41
+ def symbolize(hash)
42
+ hash.transform_keys(&:to_sym)
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Apollo
6
+ module Helpers
7
+ module Capability
8
+ EMBEDDING_MODELS = %w[mxbai-embed-large bge-large snowflake-arctic-embed].freeze
9
+
10
+ module_function
11
+
12
+ def can_embed?
13
+ return false unless defined?(Legion::LLM) && Legion::LLM.started?
14
+
15
+ ollama_embedding_available? || cloud_embedding_configured?
16
+ rescue StandardError
17
+ false
18
+ end
19
+
20
+ def can_write?
21
+ return false unless apollo_write_enabled?
22
+ return false unless defined?(Legion::Data) && Legion::Data.connected?
23
+
24
+ check_db_write_privilege
25
+ rescue StandardError
26
+ false
27
+ end
28
+
29
+ def apollo_write_enabled?
30
+ Legion::Settings.dig(:data, :apollo_write) == true
31
+ rescue StandardError
32
+ false
33
+ end
34
+
35
+ def ollama_embedding_available?
36
+ return false unless defined?(Legion::LLM::Discovery::Ollama)
37
+
38
+ EMBEDDING_MODELS.any? { |m| Legion::LLM::Discovery::Ollama.model_available?(m) }
39
+ rescue StandardError
40
+ false
41
+ end
42
+
43
+ def cloud_embedding_configured?
44
+ provider = Legion::Settings.dig(:apollo, :embedding, :provider)
45
+ model = Legion::Settings.dig(:apollo, :embedding, :model)
46
+ !provider.nil? && !model.nil?
47
+ rescue StandardError
48
+ false
49
+ end
50
+
51
+ def check_db_write_privilege
52
+ return @apollo_write_privilege unless @apollo_write_privilege.nil?
53
+
54
+ @apollo_write_privilege = Legion::Data.connection
55
+ .fetch("SELECT has_table_privilege(current_user, 'apollo_entries', 'INSERT') AS can_insert")
56
+ .first[:can_insert] == true
57
+ rescue StandardError
58
+ @apollo_write_privilege = false
59
+ end
60
+
61
+ def reset!
62
+ @apollo_write_privilege = nil
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -5,7 +5,9 @@ module Legion
5
5
  module Apollo
6
6
  module Helpers
7
7
  module Embedding
8
- DEFAULT_DIMENSION = 1536
8
+ DEFAULT_DIMENSION = 1024
9
+
10
+ LOCAL_EMBEDDING_MODELS = %w[mxbai-embed-large bge-large snowflake-arctic-embed].freeze
9
11
 
10
12
  module_function
11
13
 
@@ -15,8 +17,15 @@ module Legion
15
17
  return zero_vector
16
18
  end
17
19
 
18
- result = Legion::LLM.embed(text)
19
- vector = result.is_a?(Hash) ? result[:vector] : result
20
+ local_model = detect_local_model
21
+ vector = if local_model
22
+ ollama_embed(text, local_model)
23
+ else
24
+ opts = cloud_embedding_opts
25
+ result = Legion::LLM.embed(text, **opts)
26
+ result.is_a?(Hash) ? result[:vector] : result
27
+ end
28
+
20
29
  if vector.is_a?(Array) && vector.any?
21
30
  @dimension = vector.size
22
31
  vector
@@ -38,6 +47,53 @@ module Legion
38
47
  DEFAULT_DIMENSION
39
48
  end
40
49
 
50
+ def ollama_embed(text, model)
51
+ require 'faraday'
52
+ base_url = ollama_base_url
53
+ Legion::Logging.debug("[apollo] embedding via local Ollama: #{model}") if defined?(Legion::Logging)
54
+ conn = Faraday.new(url: base_url) { |f| f.options.timeout = 10 }
55
+ response = conn.post('/api/embed', { model: model, input: text }.to_json,
56
+ 'Content-Type' => 'application/json')
57
+ return nil unless response.success?
58
+
59
+ parsed = ::JSON.parse(response.body)
60
+ parsed['embeddings']&.first
61
+ rescue StandardError => e
62
+ Legion::Logging.warn("[apollo] local Ollama embed failed: #{e.message}") if defined?(Legion::Logging)
63
+ nil
64
+ end
65
+
66
+ def ollama_base_url
67
+ return 'http://localhost:11434' unless defined?(Legion::Settings)
68
+
69
+ Legion::Settings[:llm].dig(:providers, :ollama, :base_url) || 'http://localhost:11434'
70
+ rescue StandardError
71
+ 'http://localhost:11434'
72
+ end
73
+
74
+ def cloud_embedding_opts
75
+ return {} unless defined?(Legion::Settings) && !Legion::Settings[:apollo].nil?
76
+
77
+ embedding = Legion::Settings[:apollo][:embedding] || {}
78
+ opts = {}
79
+ opts[:provider] = embedding[:provider].to_sym if embedding[:provider]
80
+ opts[:model] = embedding[:model] if embedding[:model]
81
+ opts
82
+ rescue StandardError
83
+ {}
84
+ end
85
+
86
+ def detect_local_model
87
+ return nil unless defined?(Legion::LLM::Discovery::Ollama)
88
+
89
+ LOCAL_EMBEDDING_MODELS.find do |m|
90
+ Legion::LLM::Discovery::Ollama.model_available?(m) ||
91
+ Legion::LLM::Discovery::Ollama.model_available?("#{m}:latest")
92
+ end
93
+ rescue StandardError
94
+ nil
95
+ end
96
+
41
97
  def zero_vector
42
98
  Array.new(dimension, 0.0)
43
99
  end
@@ -32,7 +32,7 @@ module Legion
32
32
  SELECT e.id, e.content, e.content_type, e.confidence, e.tags, e.source_agent,
33
33
  0 AS depth, 1.0::float AS activation
34
34
  FROM apollo_entries e
35
- WHERE e.id = $entry_id
35
+ WHERE e.id = :entry_id
36
36
 
37
37
  UNION ALL
38
38
 
@@ -72,11 +72,11 @@ module Legion
72
72
  <<~SQL
73
73
  SELECT e.id, e.content, e.content_type, e.confidence, e.tags, e.source_agent,
74
74
  e.access_count, e.created_at, e.knowledge_domain,
75
- (e.embedding <=> $embedding) AS distance
75
+ (e.embedding <=> :embedding) AS distance
76
76
  FROM apollo_entries e
77
77
  WHERE #{where_clause}
78
78
  AND e.embedding IS NOT NULL
79
- ORDER BY e.embedding <=> $embedding
79
+ ORDER BY e.embedding <=> :embedding
80
80
  LIMIT #{limit}
81
81
  SQL
82
82
  end
@@ -10,6 +10,10 @@ module Legion
10
10
  module_function
11
11
 
12
12
  def cosine_similarity(vec_a:, vec_b:, **)
13
+ vec_a = parse_vector(vec_a)
14
+ vec_b = parse_vector(vec_b)
15
+ return 0.0 unless vec_a.is_a?(Array) && vec_b.is_a?(Array)
16
+
13
17
  dot = vec_a.zip(vec_b).sum { |x, y| x * y }
14
18
  mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
15
19
  mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
@@ -18,6 +22,15 @@ module Legion
18
22
  dot / (mag_a * mag_b)
19
23
  end
20
24
 
25
+ def parse_vector(vec)
26
+ return vec if vec.is_a?(Array)
27
+ return nil unless vec.is_a?(String)
28
+
29
+ ::JSON.parse(vec)
30
+ rescue StandardError
31
+ nil
32
+ end
33
+
21
34
  def above_corroboration_threshold?(similarity:, **)
22
35
  similarity >= Confidence::CORROBORATION_SIMILARITY_THRESHOLD
23
36
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Apollo
6
+ module Helpers
7
+ module TagNormalizer
8
+ ALIASES = {
9
+ 'c#' => 'csharp', '.net' => 'dotnet', 'c++' => 'cplusplus',
10
+ 'node.js' => 'nodejs', 'vue.js' => 'vuejs', 'react.js' => 'reactjs'
11
+ }.freeze
12
+
13
+ module_function
14
+
15
+ def normalize(raw)
16
+ tag = raw.to_s.strip.downcase
17
+ tag = ALIASES[tag] if ALIASES.key?(tag)
18
+ tag = tag.gsub(/[^a-z0-9\- ]/, '')
19
+ .gsub(/\s+/, '-')
20
+ .gsub(/-+/, '-')
21
+ .sub(/^-/, '')
22
+ .sub(/-$/, '')
23
+ tag.empty? ? nil : tag
24
+ end
25
+
26
+ def normalize_all(tags, max: 5)
27
+ Array(tags)
28
+ .filter_map { |t| normalize(t) }
29
+ .uniq
30
+ .first(max)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'socket'
5
+
6
+ module Legion
7
+ module Extensions
8
+ module Apollo
9
+ module Helpers
10
+ module Writeback
11
+ RESEARCH_TOOLS = %w[read_file search_files search_content run_command].freeze
12
+ MAX_CONTENT_LENGTH = 4000
13
+ MIN_CONTENT_LENGTH = 50
14
+
15
+ module_function
16
+
17
+ def evaluate_and_route(request:, response:, enrichments: {})
18
+ return unless writeback_enabled?
19
+ return unless should_capture?(request, response, enrichments)
20
+
21
+ payload = build_payload(request: request, response: response)
22
+ route_payload(payload)
23
+ rescue StandardError => e
24
+ Legion::Logging.warn("apollo writeback failed: #{e.message}") if defined?(Legion::Logging)
25
+ end
26
+
27
+ def should_capture?(_request, response, enrichments)
28
+ content = response_content(response)
29
+ return false if content.nil? || content.length < min_content_length
30
+
31
+ tool_calls = extract_tool_calls(response, enrichments)
32
+ research_calls = tool_calls.select { |tc| RESEARCH_TOOLS.include?(tc[:name] || tc['name']) }
33
+
34
+ return false if research_calls.empty?
35
+
36
+ apollo_results = enrichments['rag_context:apollo_results']
37
+ return true if apollo_results.nil? || (apollo_results[:count] || 0).zero?
38
+
39
+ # Apollo had results — only capture if LLM also did additional research
40
+ research_calls.any?
41
+ end
42
+
43
+ def build_payload(request:, response:, source_channel: nil)
44
+ content = response_content(response)
45
+ caller_identity = extract_identity(request)
46
+ user_query = extract_user_query(request)
47
+ tags = derive_tags(user_query)
48
+
49
+ {
50
+ content: content[0...MAX_CONTENT_LENGTH],
51
+ content_type: 'observation',
52
+ tags: Helpers::TagNormalizer.normalize_all(tags),
53
+ source_agent: response.respond_to?(:model) ? response.model : 'unknown',
54
+ source_channel: "#{source_channel || 'pipeline'}_synthesis",
55
+ submitted_by: caller_identity,
56
+ submitted_from: Socket.gethostname,
57
+ knowledge_domain: nil,
58
+ content_hash: content_hash(content)
59
+ }
60
+ end
61
+
62
+ def route_payload(payload)
63
+ can_embed = Helpers::Capability.can_embed?
64
+ can_write = Helpers::Capability.can_write?
65
+
66
+ if can_embed
67
+ embedding = Helpers::Embedding.generate(text: payload[:content])
68
+ payload[:embedding] = embedding
69
+ end
70
+
71
+ if can_write && can_embed
72
+ write_directly(payload)
73
+ else
74
+ publish_to_transport(payload, has_embedding: can_embed)
75
+ end
76
+ end
77
+
78
+ def write_directly(payload)
79
+ Runners::Knowledge.handle_ingest(**payload)
80
+ rescue StandardError => e
81
+ Legion::Logging.warn("apollo direct write failed, falling back to transport: #{e.message}") if defined?(Legion::Logging)
82
+ publish_to_transport(payload, has_embedding: !payload[:embedding].nil?)
83
+ end
84
+
85
+ def publish_to_transport(payload, has_embedding: false)
86
+ return unless defined?(Legion::Transport)
87
+
88
+ Transport::Messages::Writeback.new(
89
+ **payload, has_embedding: has_embedding
90
+ ).publish
91
+ rescue StandardError => e
92
+ Legion::Logging.warn("apollo writeback publish failed: #{e.message}") if defined?(Legion::Logging)
93
+ end
94
+
95
+ def writeback_enabled?
96
+ Legion::Settings.dig(:apollo, :writeback, :enabled) != false
97
+ rescue StandardError
98
+ true
99
+ end
100
+
101
+ def min_content_length
102
+ Legion::Settings.dig(:apollo, :writeback, :min_content_length) || MIN_CONTENT_LENGTH
103
+ rescue StandardError
104
+ MIN_CONTENT_LENGTH
105
+ end
106
+
107
+ def content_hash(content)
108
+ normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
109
+ Digest::MD5.hexdigest(normalized)
110
+ end
111
+
112
+ def response_content(response)
113
+ msg = response.respond_to?(:message) ? response.message : nil
114
+ return nil unless msg.is_a?(Hash)
115
+
116
+ msg[:content] || msg['content']
117
+ end
118
+
119
+ def extract_identity(request)
120
+ return 'unknown' unless request.respond_to?(:caller) && request.caller.is_a?(Hash)
121
+
122
+ request.caller.dig(:requested_by, :identity) || 'unknown'
123
+ rescue StandardError
124
+ 'unknown'
125
+ end
126
+
127
+ def extract_user_query(request)
128
+ return '' unless request.respond_to?(:messages)
129
+
130
+ user_msgs = Array(request.messages).select { |m| m[:role] == 'user' || m['role'] == 'user' }
131
+ (user_msgs.last || {})[:content] || ''
132
+ rescue StandardError
133
+ ''
134
+ end
135
+
136
+ def extract_tool_calls(response, enrichments)
137
+ calls = []
138
+ calls += Array(response.tool_calls) if response.respond_to?(:tool_calls)
139
+ calls += Array(enrichments['tool_calls']) if enrichments['tool_calls']
140
+ calls.uniq { |tc| tc[:name] || tc['name'] }
141
+ end
142
+
143
+ def derive_tags(query)
144
+ stop_words = %w[a an the is are was were be been being have has had do does did will would shall
145
+ should may might can could of in to for on with at by from as into about between
146
+ how what when where why who which this that these those it its and or but not]
147
+ words = query.to_s.downcase.gsub(/[^a-z0-9\s]/, '').split
148
+ words.reject { |w| stop_words.include?(w) || w.length < 3 }
149
+ .uniq
150
+ .first(5)
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -59,12 +59,25 @@ module Legion
59
59
  }
60
60
  end
61
61
 
62
- def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
62
+ def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
63
63
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
64
64
 
65
+ # Content hash dedup
66
+ hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
67
+ if hash
68
+ existing = Legion::Data::Model::ApolloEntry
69
+ .where(content_hash: hash)
70
+ .exclude(status: 'archived')
71
+ .first
72
+ if existing
73
+ existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
74
+ return { success: true, entry_id: existing.id, deduped: true }
75
+ end
76
+ end
77
+
65
78
  embedding = Helpers::Embedding.generate(text: content)
66
79
  content_type_sym = content_type.to_s
67
- tag_array = Array(tags)
80
+ tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
68
81
  domain = knowledge_domain || tag_array.first || 'general'
69
82
 
70
83
  corroborated, existing_id = find_corroboration(embedding, content_type_sym, source_agent, source_channel)
@@ -81,6 +94,9 @@ module Legion
81
94
  tags: Sequel.pg_array(tag_array),
82
95
  status: 'candidate',
83
96
  knowledge_domain: domain,
97
+ submitted_by: submitted_by,
98
+ submitted_from: submitted_from,
99
+ content_hash: hash,
84
100
  embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
85
101
  )
86
102
  existing_id = new_entry.id
@@ -112,6 +128,8 @@ module Legion
112
128
  db = Legion::Data::Model::ApolloEntry.db
113
129
  entries = db.fetch(sql, embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")).all
114
130
 
131
+ entries = entries.reject { |e| e[:distance].respond_to?(:nan?) && e[:distance].nan? }
132
+
115
133
  entries.each do |entry|
116
134
  Legion::Data::Model::ApolloEntry.where(id: entry[:id]).update(
117
135
  access_count: Sequel.expr(:access_count) + 1,
@@ -130,7 +148,7 @@ module Legion
130
148
 
131
149
  formatted = entries.map do |entry|
132
150
  { id: entry[:id], content: entry[:content], content_type: entry[:content_type],
133
- confidence: entry[:confidence], distance: entry[:distance],
151
+ confidence: entry[:confidence], distance: entry[:distance]&.to_f,
134
152
  tags: entry[:tags], source_agent: entry[:source_agent],
135
153
  knowledge_domain: entry[:knowledge_domain] }
136
154
  end
@@ -218,6 +236,7 @@ module Legion
218
236
 
219
237
  db = Legion::Data::Model::ApolloEntry.db
220
238
  entries = db.fetch(sql, embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")).all
239
+ entries = entries.reject { |e| e[:distance].respond_to?(:nan?) && e[:distance].nan? }
221
240
 
222
241
  entries.each do |entry|
223
242
  Legion::Data::Model::ApolloEntry.where(id: entry[:id]).update(
@@ -228,7 +247,7 @@ module Legion
228
247
 
229
248
  formatted = entries.map do |entry|
230
249
  { id: entry[:id], content: entry[:content], content_type: entry[:content_type],
231
- confidence: entry[:confidence], distance: entry[:distance],
250
+ confidence: entry[:confidence], distance: entry[:distance]&.to_f,
232
251
  tags: entry[:tags], source_agent: entry[:source_agent],
233
252
  knowledge_domain: entry[:knowledge_domain] }
234
253
  end
@@ -313,7 +332,7 @@ module Legion
313
332
 
314
333
  db = Legion::Data::Model::ApolloEntry.db
315
334
  similar = db.fetch(
316
- "SELECT id, content, embedding FROM apollo_entries WHERE id != $entry_id AND embedding IS NOT NULL ORDER BY embedding <=> $embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
335
+ "SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
317
336
  entry_id: entry_id,
318
337
  embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
319
338
  ).all
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/transport/message' if defined?(Legion::Transport)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Transport
9
+ module Messages
10
+ class Writeback < Legion::Transport::Message
11
+ def exchange
12
+ Exchanges::Apollo
13
+ end
14
+
15
+ def routing_key
16
+ @options[:has_embedding] ? 'apollo.writeback.store' : 'apollo.writeback.vectorize'
17
+ end
18
+
19
+ def type
20
+ 'apollo_writeback'
21
+ end
22
+
23
+ def message
24
+ {
25
+ content: @options[:content],
26
+ content_type: @options[:content_type],
27
+ tags: @options[:tags],
28
+ source_agent: @options[:source_agent],
29
+ source_channel: @options[:source_channel],
30
+ submitted_by: @options[:submitted_by],
31
+ submitted_from: @options[:submitted_from],
32
+ embedding: @options[:embedding],
33
+ knowledge_domain: @options[:knowledge_domain],
34
+ context: @options[:context] || {}
35
+ }.compact
36
+ end
37
+
38
+ def validate
39
+ raise TypeError, 'content is required' unless @options[:content].is_a?(String)
40
+
41
+ @valid = true
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/transport/queue' if defined?(Legion::Transport)
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Apollo
8
+ module Transport
9
+ module Queues
10
+ class WritebackStore < Legion::Transport::Queue
11
+ def queue_name
12
+ 'apollo.writeback.store'
13
+ end
14
+
15
+ def queue_options
16
+ { manual_ack: true, durable: true, arguments: { 'x-dead-letter-exchange': 'apollo.dlx' } }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end