lex-apollo 0.4.26 → 0.4.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe40d4ba4efef56b2bd1cec166f525ca3c165989453e455bdacc14a2a5377d16
4
- data.tar.gz: e4f7f0daabec002d031c00a6936308df68d75dd2200b1f49f7264de6f2833153
3
+ metadata.gz: d25745968940593d7d1ac3fba3f1ccd1b5d7ce553cca28c1a1fdfa4d3104af11
4
+ data.tar.gz: 96ecdae2a460625d95cbe0e4605f2c60778d035ea3c3f93e19d6560df0a40e70
5
5
  SHA512:
6
- metadata.gz: 7d0995197fb2c7191a81a91bba476792570baaf5e750df8b0464de096beabc82a4fbec8fabb8b09fda967d5c834c7bf682041d8775ce24c602d1790e332692c7
7
- data.tar.gz: 6c0277a64242c3c37795dde7229db4d2f0c27d191a7ad5d252266db1c4b433538ec5741f57624a3e262d1ad1b38ca9abf9c0ec1b2463a7bd69b9fac2126cb190
6
+ metadata.gz: c788a0ab4bd58e70ef2be2b7be88101bcccf4880fecd0ee4b8cd7a1be05cbe9b9e157c99fc7f5724a62a4d6fcdacf6278e8c902368fcc60324d966b204698b48
7
+ data.tar.gz: 1a51dc48ec715adc969325e82f1c7bd81e23b35f716c0ee6c3ded05a9d38eb0b57261121892658fb8715a969f7ab22f1a06664af17aa990dc0e708272229039e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.27] - 2026-05-15
4
+
5
+ ### Added
6
+ - `handle_ingest` now accepts and persists `access_scope`, `identity_principal_id`, `identity_id`, and `identity_canonical_name` to `apollo_entries`. These kwargs are injected automatically by `legion-apollo`'s identity middleware; no callers need updating. Defaults to `access_scope: 'global'` for backward compatibility.
7
+ - `build_semantic_search_sql` accepts an optional `requesting_principal_id:` kwarg and adds an access-scope SQL filter when provided: `global` entries are always visible, `team` entries are visible when the requesting principal shares a group membership with the submitter, and `private` entries are visible only to the owning principal (checked via both `identity_principal_id` FK and an `identities` subquery for multi-provider auth).
8
+ - `handle_query` and `retrieve_relevant` both accept and forward `requesting_principal_id:` to the SQL builder — covers the GAIA path (`handle_query`) and the direct-call path (`retrieve_relevant`).
9
+ - Browse-mode fallback in `handle_query` (`list_entries_chronologically`) also applies the same access-scope filter when `requesting_principal_id:` is provided.
10
+ - Private-entry dedup guard in `active_duplicate_for_hash`: when `access_scope: 'private'` and `identity_principal_id` is set, the dedup query is scoped per-principal so two different principals writing identical content each get their own row.
11
+
12
+ ### Fixed
13
+ - `handle_erasure_request` now clears `identity_principal_id`, `identity_id`, and `identity_canonical_name` on confirmed (redacted) entries in addition to the existing `source_agent`/`source_provider`/`source_channel` redaction — GDPR right-to-erasure compliance gap.
14
+
3
15
  ## [0.4.26] - 2026-05-11
4
16
 
5
17
  ### Fixed
@@ -52,7 +52,9 @@ module Legion
52
52
  SQL
53
53
  end
54
54
 
55
- def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence, statuses: nil, tags: nil, domain: nil, **)
55
+ def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
56
+ statuses: nil, tags: nil, domain: nil,
57
+ requesting_principal_id: nil, **)
56
58
  conditions = ["e.confidence >= #{min_confidence}"]
57
59
 
58
60
  if statuses&.any?
@@ -67,6 +69,23 @@ module Legion
67
69
 
68
70
  conditions << "e.knowledge_domain = '#{domain}'" if domain
69
71
 
72
+ if requesting_principal_id
73
+ pid = requesting_principal_id.to_i
74
+ conditions << <<~SCOPE_SQL.strip
75
+ (e.access_scope = 'global'
76
+ OR (e.access_scope = 'private'
77
+ AND (e.identity_principal_id = #{pid}
78
+ OR e.identity_id IN (SELECT id FROM identities WHERE principal_id = #{pid})))
79
+ OR (e.access_scope = 'team'
80
+ AND EXISTS (
81
+ SELECT 1 FROM identity_group_memberships igm1
82
+ JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id
83
+ WHERE igm1.principal_id = #{pid}
84
+ AND igm2.principal_id = e.identity_principal_id
85
+ )))
86
+ SCOPE_SQL
87
+ end
88
+
70
89
  where_clause = conditions.join(' AND ')
71
90
 
72
91
  <<~SQL
@@ -82,9 +82,13 @@ module Legion
82
82
  }
83
83
  end
84
84
 
85
- def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
85
+ def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', # rubocop:disable Metrics/ParameterLists
86
+ source_provider: nil, source_channel: nil, knowledge_domain: nil,
87
+ submitted_by: nil, submitted_from: nil, content_hash: nil, context: {},
88
+ skip: false, access_scope: 'global', **)
86
89
  return { status: :skipped } if skip
87
90
 
91
+ identity = resolve_process_identity
88
92
  content = normalize_text_input(content)
89
93
  content_type = normalize_content_type(content_type.nil? ? :observation : content_type)
90
94
  log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
@@ -92,7 +96,8 @@ module Legion
92
96
  return early_error if early_error
93
97
 
94
98
  hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
95
- existing = active_duplicate_for_hash(hash)
99
+ existing = active_duplicate_for_hash(hash, access_scope: access_scope,
100
+ identity_principal_id: identity[:principal_id])
96
101
  if existing
97
102
  log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
98
103
  return { success: true, entry_id: existing.id, deduped: true }
@@ -102,7 +107,11 @@ module Legion
102
107
  content_type_sym = content_type.to_s
103
108
  metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
104
109
  source_provider: source_provider, source_channel: source_channel,
105
- submitted_by: submitted_by, submitted_from: submitted_from)
110
+ submitted_by: submitted_by, submitted_from: submitted_from,
111
+ access_scope: access_scope,
112
+ identity_principal_id: identity[:principal_id],
113
+ identity_id: identity[:identity_id],
114
+ identity_canonical_name: identity[:canonical_name])
106
115
 
107
116
  corroborated, existing_id = find_corroboration(
108
117
  embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
@@ -133,9 +142,13 @@ module Legion
133
142
  { success: false, error: e.message }
134
143
  end
135
144
 
136
- def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength, Metrics/CyclomaticComplexity
145
+ def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, # rubocop:disable Metrics/CyclomaticComplexity, Metrics/ParameterLists
146
+ min_confidence: Helpers::GraphQuery.default_query_min_confidence,
147
+ status: UNSET, tags: nil, domain: nil, agent_id: 'unknown',
148
+ requesting_principal_id: nil, **)
137
149
  return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
138
150
 
151
+ requesting_principal_id = resolve_requesting_principal_id(requesting_principal_id)
139
152
  entry_model = Helpers::DataModels.apollo_entry
140
153
  query = normalize_text_input(query)
141
154
  status_defaulted = status.equal?(UNSET)
@@ -143,19 +156,22 @@ module Legion
143
156
  log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
144
157
  if browse_query?(query)
145
158
  return list_entries_chronologically(query: query, limit: limit, status: requested_status,
146
- status_defaulted: status_defaulted, tags: tags, domain: domain)
159
+ status_defaulted: status_defaulted, tags: tags, domain: domain,
160
+ requesting_principal_id: requesting_principal_id)
147
161
  end
148
162
 
149
163
  embedding = embed_text(query)
150
164
  if embedding.nil?
151
165
  log.warn('Apollo Knowledge.handle_query embedding unavailable; falling back to browse query')
152
166
  return list_entries_chronologically(query: query, limit: limit, status: requested_status,
153
- status_defaulted: status_defaulted, tags: tags, domain: domain)
167
+ status_defaulted: status_defaulted, tags: tags, domain: domain,
168
+ requesting_principal_id: requesting_principal_id)
154
169
  end
155
170
 
156
171
  sql = Helpers::GraphQuery.build_semantic_search_sql(
157
172
  limit: limit, min_confidence: min_confidence,
158
- statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
173
+ statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain,
174
+ requesting_principal_id: requesting_principal_id
159
175
  )
160
176
 
161
177
  db = entry_model.db
@@ -251,11 +267,14 @@ module Legion
251
267
  { success: false, error: e.message }
252
268
  end
253
269
 
254
- def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5), min_confidence: Helpers::GraphQuery.default_query_min_confidence, tags: nil, domain: nil, skip: false, **) # rubocop:disable Layout/LineLength
270
+ def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
271
+ min_confidence: Helpers::GraphQuery.default_query_min_confidence,
272
+ tags: nil, domain: nil, skip: false, requesting_principal_id: nil, **)
255
273
  return { status: :skipped } if skip
256
274
 
257
275
  return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
258
276
 
277
+ requesting_principal_id = resolve_requesting_principal_id(requesting_principal_id)
259
278
  query = normalize_text_input(query)
260
279
  log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
261
280
  return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
@@ -268,7 +287,8 @@ module Legion
268
287
 
269
288
  sql = Helpers::GraphQuery.build_semantic_search_sql(
270
289
  limit: limit, min_confidence: min_confidence,
271
- statuses: %w[confirmed candidate], tags: tags, domain: domain
290
+ statuses: %w[confirmed candidate], tags: tags, domain: domain,
291
+ requesting_principal_id: requesting_principal_id
272
292
  )
273
293
 
274
294
  db = Helpers::DataModels.apollo_entry.db
@@ -341,10 +361,16 @@ module Legion
341
361
  .exclude(status: 'confirmed')
342
362
  .delete
343
363
 
344
- # Redact attribution on confirmed entries (corroborated, retain knowledge)
345
364
  redacted = conn[:apollo_entries]
346
365
  .where(source_agent: agent_id, status: 'confirmed')
347
- .update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
366
+ .update(
367
+ source_agent: 'redacted',
368
+ source_provider: nil,
369
+ source_channel: nil,
370
+ identity_principal_id: nil,
371
+ identity_id: nil,
372
+ identity_canonical_name: nil
373
+ )
348
374
 
349
375
  { deleted: deleted, redacted: redacted, agent_id: agent_id }
350
376
  .tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
@@ -425,49 +451,62 @@ module Legion
425
451
  normalize_text_input(value)[0, max_length]
426
452
  end
427
453
 
428
- def active_duplicate_for_hash(hash)
454
+ def active_duplicate_for_hash(hash, access_scope: nil, identity_principal_id: nil)
429
455
  return nil unless hash
430
456
 
431
- existing = Helpers::DataModels.apollo_entry
432
- .where(content_hash: hash)
433
- .exclude(status: 'archived')
434
- .first
457
+ dataset = Helpers::DataModels.apollo_entry
458
+ .where(content_hash: hash)
459
+ .exclude(status: 'archived')
460
+
461
+ dataset = dataset.where(identity_principal_id: identity_principal_id) if access_scope == 'private' && identity_principal_id
462
+
463
+ existing = dataset.first
435
464
  existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
436
465
  log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
437
466
  existing
438
467
  end
439
468
 
440
- def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
469
+ def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, # rubocop:disable Metrics/ParameterLists
470
+ submitted_by:, submitted_from:, access_scope: 'global',
471
+ identity_principal_id: nil, identity_id: nil, identity_canonical_name: nil)
441
472
  tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
442
473
  agent = truncate_for_column(source_agent, 50) || 'unknown'
443
474
 
444
- { tags: tag_array,
445
- domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
446
- source_agent: agent,
447
- source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
448
- source_channel: truncate_for_column(source_channel, 100),
449
- submitted_by: truncate_for_column(submitted_by, 255),
450
- submitted_from: truncate_for_column(submitted_from, 255) }
475
+ { tags: tag_array,
476
+ domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
477
+ source_agent: agent,
478
+ source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
479
+ source_channel: truncate_for_column(source_channel, 100),
480
+ submitted_by: truncate_for_column(submitted_by, 255),
481
+ submitted_from: truncate_for_column(submitted_from, 255),
482
+ access_scope: access_scope || 'global',
483
+ identity_principal_id: identity_principal_id.is_a?(Integer) ? identity_principal_id : nil,
484
+ identity_id: identity_id.is_a?(Integer) ? identity_id : nil,
485
+ identity_canonical_name: identity_canonical_name&.to_s&.slice(0, 255) }
451
486
  end
452
487
 
453
488
  def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
454
489
  new_entry = Helpers::DataModels.apollo_entry.create(
455
- content: content,
456
- content_type: content_type,
457
- confidence: Helpers::Confidence.initial_confidence,
458
- source_agent: metadata[:source_agent],
459
- source_provider: metadata[:source_provider],
460
- source_channel: metadata[:source_channel],
461
- source_context: json_dump(context.is_a?(Hash) ? context : {}),
462
- tags: Sequel.pg_array(metadata[:tags]),
463
- status: 'candidate',
464
- knowledge_domain: metadata[:domain],
465
- submitted_by: metadata[:submitted_by],
466
- submitted_from: metadata[:submitted_from],
467
- content_hash: content_hash,
468
- embedding: embedding ? Sequel.lit("'[#{embedding.join(',')}]'::vector") : nil
490
+ content: content,
491
+ content_type: content_type,
492
+ confidence: Helpers::Confidence.initial_confidence,
493
+ source_agent: metadata[:source_agent],
494
+ source_provider: metadata[:source_provider],
495
+ source_channel: metadata[:source_channel],
496
+ source_context: json_dump(context.is_a?(Hash) ? context : {}),
497
+ tags: Sequel.pg_array(metadata[:tags]),
498
+ status: 'candidate',
499
+ knowledge_domain: metadata[:domain],
500
+ submitted_by: metadata[:submitted_by],
501
+ submitted_from: metadata[:submitted_from],
502
+ content_hash: content_hash,
503
+ access_scope: metadata[:access_scope] || 'global',
504
+ identity_principal_id: metadata[:identity_principal_id],
505
+ identity_id: metadata[:identity_id],
506
+ identity_canonical_name: metadata[:identity_canonical_name],
507
+ embedding: embedding ? Sequel.lit("'[#{embedding.join(',')}]'::vector") : nil
469
508
  )
470
- log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
509
+ log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]} access_scope=#{metadata[:access_scope]}") # rubocop:disable Layout/LineLength
471
510
  new_entry.id
472
511
  rescue Sequel::UniqueConstraintViolation => e
473
512
  # Race condition: another thread/process inserted the same content_hash between our
@@ -491,7 +530,7 @@ module Legion
491
530
  query.to_s.strip.length < 3
492
531
  end
493
532
 
494
- def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
533
+ def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:, requesting_principal_id: nil)
495
534
  log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
496
535
  dataset = Helpers::DataModels.apollo_entry.exclude(status: 'archived')
497
536
  requested = Array(status).map(&:to_s).reject(&:empty?)
@@ -499,6 +538,18 @@ module Legion
499
538
  dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
500
539
  dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
501
540
 
541
+ if requesting_principal_id
542
+ pid = requesting_principal_id.to_i
543
+ dataset = dataset.where(
544
+ Sequel.lit(
545
+ "(access_scope = 'global' " \
546
+ "OR (access_scope = 'private' AND (identity_principal_id = ? OR identity_id IN (SELECT id FROM identities WHERE principal_id = ?))) " \
547
+ "OR (access_scope = 'team' AND EXISTS (SELECT 1 FROM identity_group_memberships igm1 JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id WHERE igm1.principal_id = ? AND igm2.principal_id = identity_principal_id)))", # rubocop:disable Layout/LineLength
548
+ pid, pid, pid
549
+ )
550
+ )
551
+ end
552
+
502
553
  entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
503
554
  format_entry(entry.is_a?(Hash) ? entry : entry.values)
504
555
  end
@@ -671,6 +722,29 @@ module Legion
671
722
  end
672
723
  end
673
724
 
725
+ def resolve_process_identity
726
+ return { principal_id: nil, identity_id: nil, canonical_name: nil } unless defined?(Legion::Identity::Process)
727
+
728
+ {
729
+ principal_id: Legion::Identity::Process.db_principal_id,
730
+ identity_id: Legion::Identity::Process.db_identity_id,
731
+ canonical_name: Legion::Identity::Process.canonical_name
732
+ }
733
+ rescue StandardError => e
734
+ handle_exception(e, level: :warn, operation: 'apollo.knowledge.resolve_process_identity')
735
+ { principal_id: nil, identity_id: nil, canonical_name: nil }
736
+ end
737
+
738
+ def resolve_requesting_principal_id(explicit_value)
739
+ return explicit_value if explicit_value
740
+ return nil unless defined?(Legion::Identity::Process)
741
+
742
+ Legion::Identity::Process.db_principal_id
743
+ rescue StandardError => e
744
+ handle_exception(e, level: :warn, operation: 'apollo.knowledge.resolve_requesting_principal_id')
745
+ nil
746
+ end
747
+
674
748
  include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
675
749
  include Legion::JSON::Helper
676
750
  include Legion::Settings::Helper
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.4.26'
6
+ VERSION = '0.4.28'
7
7
  end
8
8
  end
9
9
  end
@@ -65,5 +65,36 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::GraphQuery do
65
65
  expect(sql).to include("'dns'")
66
66
  expect(sql).to include('&&')
67
67
  end
68
+
69
+ context 'without requesting_principal_id' do
70
+ it 'includes no access_scope filter' do
71
+ sql = described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3)
72
+ expect(sql).not_to include('access_scope')
73
+ end
74
+ end
75
+
76
+ context 'with requesting_principal_id' do
77
+ let(:sql) { described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3, requesting_principal_id: 42) }
78
+
79
+ it 'allows global entries unconditionally' do
80
+ expect(sql).to include("access_scope = 'global'")
81
+ end
82
+
83
+ it 'allows private entries owned by the principal via principal FK' do
84
+ expect(sql).to include('e.identity_principal_id = 42')
85
+ end
86
+
87
+ it 'allows private entries owned by the principal via identity subquery (multi-provider auth)' do
88
+ expect(sql).to include('SELECT id FROM identities WHERE principal_id = 42')
89
+ end
90
+
91
+ it 'allows team entries when principal shares a group with the submitter' do
92
+ expect(sql).to include('identity_group_memberships')
93
+ end
94
+
95
+ it 'wraps the entire access_scope condition in parentheses so it does not break AND chaining' do
96
+ expect(sql).to match(/AND \(.*access_scope.*\)/m)
97
+ end
98
+ end
68
99
  end
69
100
  end
@@ -371,6 +371,97 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
371
371
  end
372
372
  end
373
373
 
374
+ context 'identity from Legion::Identity::Process' do
375
+ let(:mock_entry_class2) { double('ApolloEntry2') }
376
+ let(:mock_expertise_class2) { double('ApolloExpertise2') }
377
+ let(:mock_access_log_class2) { double('ApolloAccessLog2') }
378
+ let(:mock_entry2) { double('entry2', id: 99, embedding: nil) }
379
+
380
+ before do
381
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class2)
382
+ stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class2)
383
+ stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class2)
384
+ scoped_ds = double('scoped_ds2', first: nil, where: double('scoped_ds2b', first: nil))
385
+ allow(mock_entry_class2).to receive(:where).and_return(scoped_ds)
386
+ allow(scoped_ds).to receive(:exclude).and_return(scoped_ds)
387
+ allow(mock_expertise_class2).to receive(:where).and_return(double(first: nil))
388
+ allow(mock_expertise_class2).to receive(:create)
389
+ allow(mock_access_log_class2).to receive(:create)
390
+ allow(host).to receive(:embed_text).and_return(nil)
391
+ allow(host).to receive(:find_corroboration).and_return([false, nil])
392
+ allow(host).to receive(:detect_contradictions).and_return([])
393
+ stub_const('Legion::Identity::Process', double(
394
+ db_principal_id: 42,
395
+ db_identity_id: 7,
396
+ canonical_name: 'alice'
397
+ ))
398
+ end
399
+
400
+ it 'derives identity from Legion::Identity::Process and persists it' do
401
+ expect(mock_entry_class2).to receive(:create).with(
402
+ hash_including(identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
403
+ access_scope: 'private')
404
+ ).and_return(mock_entry2)
405
+
406
+ host.handle_ingest(content: 'test fact', tags: [], access_scope: 'private')
407
+ end
408
+
409
+ it 'defaults access_scope to global when not provided' do
410
+ expect(mock_entry_class2).to receive(:create).with(
411
+ hash_including(access_scope: 'global')
412
+ ).and_return(mock_entry2)
413
+
414
+ host.handle_ingest(content: 'test fact', tags: [])
415
+ end
416
+
417
+ it 'ignores identity kwargs passed by callers' do
418
+ expect(mock_entry_class2).to receive(:create).with(
419
+ hash_including(identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice')
420
+ ).and_return(mock_entry2)
421
+
422
+ host.handle_ingest(
423
+ content: 'test fact', tags: [],
424
+ identity_principal_id: 999, identity_id: 888, identity_canonical_name: 'mallory',
425
+ access_scope: 'private'
426
+ )
427
+ end
428
+ end
429
+
430
+ context 'dedup: private entries from different principals are not deduplicated' do
431
+ let(:mock_entry_class3) { double('ApolloEntry3') }
432
+ let(:mock_expertise_class3) { double('ApolloExpertise3') }
433
+ let(:mock_access_log_class3) { double('ApolloAccessLog3') }
434
+ let(:mock_entry_a) { double('entry_a', id: 7, embedding: nil) }
435
+ let(:mock_entry_b) { double('entry_b', id: 8, embedding: nil) }
436
+
437
+ before do
438
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class3)
439
+ stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class3)
440
+ stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class3)
441
+ allow(mock_expertise_class3).to receive(:where).and_return(double(first: nil))
442
+ allow(mock_expertise_class3).to receive(:create)
443
+ allow(mock_access_log_class3).to receive(:create)
444
+ allow(host).to receive(:embed_text).and_return(nil)
445
+ allow(host).to receive(:find_corroboration).and_return([false, nil])
446
+ allow(host).to receive(:detect_contradictions).and_return([])
447
+ # default dedup returns nil (no duplicate) — supports chained .where for private scope
448
+ scoped_ds3 = double('dedup_chain3', first: nil)
449
+ allow(scoped_ds3).to receive(:where).and_return(double('scoped_ds3b', first: nil))
450
+ allow(scoped_ds3).to receive(:exclude).and_return(scoped_ds3)
451
+ allow(mock_entry_class3).to receive(:where).and_return(scoped_ds3)
452
+ # two different principals each get their own entry
453
+ allow(mock_entry_class3).to receive(:create).and_return(mock_entry_a, mock_entry_b)
454
+ end
455
+
456
+ it 'does not deduplicate private entries from different principals' do
457
+ result1 = host.handle_ingest(content: 'same fact', content_type: :observation,
458
+ access_scope: 'private', identity_principal_id: 1)
459
+ result2 = host.handle_ingest(content: 'same fact', content_type: :observation,
460
+ access_scope: 'private', identity_principal_id: 2)
461
+ expect(result1[:entry_id]).not_to eq(result2[:entry_id])
462
+ end
463
+ end
464
+
374
465
  context 'early-return warn logs' do
375
466
  let(:logger) { instance_double('Logger', debug: nil, info: nil, warn: nil) }
376
467
 
@@ -521,6 +612,37 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
521
612
  expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
522
613
  end
523
614
  end
615
+
616
+ context 'access scope forwarding' do
617
+ let(:mock_entry_class) { double('ApolloEntry') }
618
+ let(:mock_db) { double('db') }
619
+
620
+ before do
621
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
622
+ allow(Legion::LLM::Call::Embeddings).to receive(:generate)
623
+ .and_return({ vector: Array.new(1024, 0.1), model: 'test', provider: :ollama, dimensions: 1024, tokens: 0 })
624
+ allow(mock_entry_class).to receive(:db).and_return(mock_db)
625
+ allow(mock_db).to receive(:fetch).and_return(double(all: []))
626
+ allow(mock_entry_class).to receive(:where).and_return(double(update: true))
627
+ end
628
+
629
+ it 'forwards requesting_principal_id to build_semantic_search_sql' do
630
+ expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
631
+ hash_including(requesting_principal_id: 42)
632
+ ).and_return('SELECT 1')
633
+ allow(mock_db).to receive(:fetch).and_return(double(all: []))
634
+
635
+ host.handle_query(query: 'test', requesting_principal_id: 42)
636
+ end
637
+
638
+ it 'passes nil requesting_principal_id when not provided (no filter)' do
639
+ expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
640
+ hash_including(requesting_principal_id: nil)
641
+ ).and_return('SELECT 1')
642
+
643
+ host.handle_query(query: 'test')
644
+ end
645
+ end
524
646
  end
525
647
 
526
648
  describe '#normalize_text_input' do
@@ -597,6 +719,14 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
597
719
  ).and_call_original
598
720
  host.retrieve_relevant(query: 'treatment', domain: 'clinical')
599
721
  end
722
+
723
+ it 'forwards requesting_principal_id to build_semantic_search_sql' do
724
+ expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
725
+ hash_including(requesting_principal_id: 7)
726
+ ).and_return('SELECT 1')
727
+
728
+ host.retrieve_relevant(query: 'test', requesting_principal_id: 7)
729
+ end
600
730
  end
601
731
  end
602
732
 
@@ -866,6 +996,18 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
866
996
  expect(result[:redacted]).to eq(2)
867
997
  expect(result[:agent_id]).to eq('agent-dead')
868
998
  end
999
+
1000
+ it 'clears identity columns on confirmed (redacted) entries' do
1001
+ expect(mock_dataset).to receive(:update).with(
1002
+ hash_including(
1003
+ identity_principal_id: nil,
1004
+ identity_id: nil,
1005
+ identity_canonical_name: nil
1006
+ )
1007
+ ).and_return(2)
1008
+
1009
+ host.handle_erasure_request(agent_id: 'agent-dead')
1010
+ end
869
1011
  end
870
1012
 
871
1013
  context 'when Sequel raises an error' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.26
4
+ version: 0.4.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity