lex-apollo 0.4.26 → 0.4.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe40d4ba4efef56b2bd1cec166f525ca3c165989453e455bdacc14a2a5377d16
4
- data.tar.gz: e4f7f0daabec002d031c00a6936308df68d75dd2200b1f49f7264de6f2833153
3
+ metadata.gz: 85cccff520bb34332f705695f5193cd55e01bb94fb680bf0a51ad81a04ec3112
4
+ data.tar.gz: b3370806ab160e71f3f213fb4401a6a45aa650778a29574b1cc553a9ad34d167
5
5
  SHA512:
6
- metadata.gz: 7d0995197fb2c7191a81a91bba476792570baaf5e750df8b0464de096beabc82a4fbec8fabb8b09fda967d5c834c7bf682041d8775ce24c602d1790e332692c7
7
- data.tar.gz: 6c0277a64242c3c37795dde7229db4d2f0c27d191a7ad5d252266db1c4b433538ec5741f57624a3e262d1ad1b38ca9abf9c0ec1b2463a7bd69b9fac2126cb190
6
+ metadata.gz: d45dd95922afbff6cf87391eca84b658359fc3b329f80cfd9404054e2e5fc7cff5b398d2b065f407ab9bff3141ba87b9f3d30e0c980e6c4625231edbf591bc18
7
+ data.tar.gz: 47183b00644bae505cd55c1abad707c5c86d901a38472d186bb5601eb3ec4fa0d2452715a667b3f7ebd0081d00ccb3fc3d6d6fabab61e7d146fdb2890deca7f2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.27] - 2026-05-15
4
+
5
+ ### Added
6
+ - `handle_ingest` now accepts and persists `access_scope`, `identity_principal_id`, `identity_id`, and `identity_canonical_name` to `apollo_entries`. These kwargs are injected automatically by `legion-apollo`'s identity middleware; no callers need updating. Defaults to `access_scope: 'global'` for backward compatibility.
7
+ - `build_semantic_search_sql` accepts an optional `requesting_principal_id:` kwarg and adds an access-scope SQL filter when provided: `global` entries are always visible, `team` entries are visible when the requesting principal shares a group membership with the submitter, and `private` entries are visible only to the owning principal (checked via both `identity_principal_id` FK and an `identities` subquery for multi-provider auth).
8
+ - `handle_query` and `retrieve_relevant` both accept and forward `requesting_principal_id:` to the SQL builder — covers the GAIA path (`handle_query`) and the direct-call path (`retrieve_relevant`).
9
+ - Browse-mode fallback in `handle_query` (`list_entries_chronologically`) also applies the same access-scope filter when `requesting_principal_id:` is provided.
10
+ - Private-entry dedup guard in `active_duplicate_for_hash`: when `access_scope: 'private'` and `identity_principal_id` is set, the dedup query is scoped per-principal so two different principals writing identical content each get their own row.
11
+
12
+ ### Fixed
13
+ - `handle_erasure_request` now clears `identity_principal_id`, `identity_id`, and `identity_canonical_name` on confirmed (redacted) entries in addition to the existing `source_agent`/`source_provider`/`source_channel` redaction — GDPR right-to-erasure compliance gap.
14
+
3
15
  ## [0.4.26] - 2026-05-11
4
16
 
5
17
  ### Fixed
@@ -52,7 +52,9 @@ module Legion
52
52
  SQL
53
53
  end
54
54
 
55
- def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence, statuses: nil, tags: nil, domain: nil, **)
55
+ def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
56
+ statuses: nil, tags: nil, domain: nil,
57
+ requesting_principal_id: nil, **)
56
58
  conditions = ["e.confidence >= #{min_confidence}"]
57
59
 
58
60
  if statuses&.any?
@@ -67,6 +69,23 @@ module Legion
67
69
 
68
70
  conditions << "e.knowledge_domain = '#{domain}'" if domain
69
71
 
72
+ if requesting_principal_id
73
+ pid = requesting_principal_id.to_i
74
+ conditions << <<~SCOPE_SQL.strip
75
+ (e.access_scope = 'global'
76
+ OR (e.access_scope = 'private'
77
+ AND (e.identity_principal_id = #{pid}
78
+ OR e.identity_id IN (SELECT id FROM identities WHERE principal_id = #{pid})))
79
+ OR (e.access_scope = 'team'
80
+ AND EXISTS (
81
+ SELECT 1 FROM identity_group_memberships igm1
82
+ JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id
83
+ WHERE igm1.principal_id = #{pid}
84
+ AND igm2.principal_id = e.identity_principal_id
85
+ )))
86
+ SCOPE_SQL
87
+ end
88
+
70
89
  where_clause = conditions.join(' AND ')
71
90
 
72
91
  <<~SQL
@@ -82,7 +82,11 @@ module Legion
82
82
  }
83
83
  end
84
84
 
85
- def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
85
+ def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', # rubocop:disable Metrics/ParameterLists
86
+ source_provider: nil, source_channel: nil, knowledge_domain: nil,
87
+ submitted_by: nil, submitted_from: nil, content_hash: nil, context: {},
88
+ skip: false, access_scope: 'global', identity_principal_id: nil,
89
+ identity_id: nil, identity_canonical_name: nil, **)
86
90
  return { status: :skipped } if skip
87
91
 
88
92
  content = normalize_text_input(content)
@@ -92,7 +96,8 @@ module Legion
92
96
  return early_error if early_error
93
97
 
94
98
  hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
95
- existing = active_duplicate_for_hash(hash)
99
+ existing = active_duplicate_for_hash(hash, access_scope: access_scope,
100
+ identity_principal_id: identity_principal_id)
96
101
  if existing
97
102
  log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
98
103
  return { success: true, entry_id: existing.id, deduped: true }
@@ -102,7 +107,11 @@ module Legion
102
107
  content_type_sym = content_type.to_s
103
108
  metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
104
109
  source_provider: source_provider, source_channel: source_channel,
105
- submitted_by: submitted_by, submitted_from: submitted_from)
110
+ submitted_by: submitted_by, submitted_from: submitted_from,
111
+ access_scope: access_scope,
112
+ identity_principal_id: identity_principal_id,
113
+ identity_id: identity_id,
114
+ identity_canonical_name: identity_canonical_name)
106
115
 
107
116
  corroborated, existing_id = find_corroboration(
108
117
  embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
@@ -133,7 +142,10 @@ module Legion
133
142
  { success: false, error: e.message }
134
143
  end
135
144
 
136
- def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength, Metrics/CyclomaticComplexity
145
+ def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, # rubocop:disable Metrics/CyclomaticComplexity, Metrics/ParameterLists
146
+ min_confidence: Helpers::GraphQuery.default_query_min_confidence,
147
+ status: UNSET, tags: nil, domain: nil, agent_id: 'unknown',
148
+ requesting_principal_id: nil, **)
137
149
  return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
138
150
 
139
151
  entry_model = Helpers::DataModels.apollo_entry
@@ -143,19 +155,22 @@ module Legion
143
155
  log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
144
156
  if browse_query?(query)
145
157
  return list_entries_chronologically(query: query, limit: limit, status: requested_status,
146
- status_defaulted: status_defaulted, tags: tags, domain: domain)
158
+ status_defaulted: status_defaulted, tags: tags, domain: domain,
159
+ requesting_principal_id: requesting_principal_id)
147
160
  end
148
161
 
149
162
  embedding = embed_text(query)
150
163
  if embedding.nil?
151
164
  log.warn('Apollo Knowledge.handle_query embedding unavailable; falling back to browse query')
152
165
  return list_entries_chronologically(query: query, limit: limit, status: requested_status,
153
- status_defaulted: status_defaulted, tags: tags, domain: domain)
166
+ status_defaulted: status_defaulted, tags: tags, domain: domain,
167
+ requesting_principal_id: requesting_principal_id)
154
168
  end
155
169
 
156
170
  sql = Helpers::GraphQuery.build_semantic_search_sql(
157
171
  limit: limit, min_confidence: min_confidence,
158
- statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
172
+ statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain,
173
+ requesting_principal_id: requesting_principal_id
159
174
  )
160
175
 
161
176
  db = entry_model.db
@@ -251,7 +266,9 @@ module Legion
251
266
  { success: false, error: e.message }
252
267
  end
253
268
 
254
- def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5), min_confidence: Helpers::GraphQuery.default_query_min_confidence, tags: nil, domain: nil, skip: false, **) # rubocop:disable Layout/LineLength
269
+ def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
270
+ min_confidence: Helpers::GraphQuery.default_query_min_confidence,
271
+ tags: nil, domain: nil, skip: false, requesting_principal_id: nil, **)
255
272
  return { status: :skipped } if skip
256
273
 
257
274
  return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
@@ -268,7 +285,8 @@ module Legion
268
285
 
269
286
  sql = Helpers::GraphQuery.build_semantic_search_sql(
270
287
  limit: limit, min_confidence: min_confidence,
271
- statuses: %w[confirmed candidate], tags: tags, domain: domain
288
+ statuses: %w[confirmed candidate], tags: tags, domain: domain,
289
+ requesting_principal_id: requesting_principal_id
272
290
  )
273
291
 
274
292
  db = Helpers::DataModels.apollo_entry.db
@@ -341,10 +359,16 @@ module Legion
341
359
  .exclude(status: 'confirmed')
342
360
  .delete
343
361
 
344
- # Redact attribution on confirmed entries (corroborated, retain knowledge)
345
362
  redacted = conn[:apollo_entries]
346
363
  .where(source_agent: agent_id, status: 'confirmed')
347
- .update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
364
+ .update(
365
+ source_agent: 'redacted',
366
+ source_provider: nil,
367
+ source_channel: nil,
368
+ identity_principal_id: nil,
369
+ identity_id: nil,
370
+ identity_canonical_name: nil
371
+ )
348
372
 
349
373
  { deleted: deleted, redacted: redacted, agent_id: agent_id }
350
374
  .tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
@@ -425,49 +449,62 @@ module Legion
425
449
  normalize_text_input(value)[0, max_length]
426
450
  end
427
451
 
428
- def active_duplicate_for_hash(hash)
452
+ def active_duplicate_for_hash(hash, access_scope: nil, identity_principal_id: nil)
429
453
  return nil unless hash
430
454
 
431
- existing = Helpers::DataModels.apollo_entry
432
- .where(content_hash: hash)
433
- .exclude(status: 'archived')
434
- .first
455
+ dataset = Helpers::DataModels.apollo_entry
456
+ .where(content_hash: hash)
457
+ .exclude(status: 'archived')
458
+
459
+ dataset = dataset.where(identity_principal_id: identity_principal_id) if access_scope == 'private' && identity_principal_id
460
+
461
+ existing = dataset.first
435
462
  existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
436
463
  log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
437
464
  existing
438
465
  end
439
466
 
440
- def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
467
+ def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, # rubocop:disable Metrics/ParameterLists
468
+ submitted_by:, submitted_from:, access_scope: 'global',
469
+ identity_principal_id: nil, identity_id: nil, identity_canonical_name: nil)
441
470
  tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
442
471
  agent = truncate_for_column(source_agent, 50) || 'unknown'
443
472
 
444
- { tags: tag_array,
445
- domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
446
- source_agent: agent,
447
- source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
448
- source_channel: truncate_for_column(source_channel, 100),
449
- submitted_by: truncate_for_column(submitted_by, 255),
450
- submitted_from: truncate_for_column(submitted_from, 255) }
473
+ { tags: tag_array,
474
+ domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
475
+ source_agent: agent,
476
+ source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
477
+ source_channel: truncate_for_column(source_channel, 100),
478
+ submitted_by: truncate_for_column(submitted_by, 255),
479
+ submitted_from: truncate_for_column(submitted_from, 255),
480
+ access_scope: access_scope || 'global',
481
+ identity_principal_id: identity_principal_id.is_a?(Integer) ? identity_principal_id : nil,
482
+ identity_id: identity_id.is_a?(Integer) ? identity_id : nil,
483
+ identity_canonical_name: identity_canonical_name&.to_s&.slice(0, 255) }
451
484
  end
452
485
 
453
486
  def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
454
487
  new_entry = Helpers::DataModels.apollo_entry.create(
455
- content: content,
456
- content_type: content_type,
457
- confidence: Helpers::Confidence.initial_confidence,
458
- source_agent: metadata[:source_agent],
459
- source_provider: metadata[:source_provider],
460
- source_channel: metadata[:source_channel],
461
- source_context: json_dump(context.is_a?(Hash) ? context : {}),
462
- tags: Sequel.pg_array(metadata[:tags]),
463
- status: 'candidate',
464
- knowledge_domain: metadata[:domain],
465
- submitted_by: metadata[:submitted_by],
466
- submitted_from: metadata[:submitted_from],
467
- content_hash: content_hash,
468
- embedding: embedding ? Sequel.lit("'[#{embedding.join(',')}]'::vector") : nil
488
+ content: content,
489
+ content_type: content_type,
490
+ confidence: Helpers::Confidence.initial_confidence,
491
+ source_agent: metadata[:source_agent],
492
+ source_provider: metadata[:source_provider],
493
+ source_channel: metadata[:source_channel],
494
+ source_context: json_dump(context.is_a?(Hash) ? context : {}),
495
+ tags: Sequel.pg_array(metadata[:tags]),
496
+ status: 'candidate',
497
+ knowledge_domain: metadata[:domain],
498
+ submitted_by: metadata[:submitted_by],
499
+ submitted_from: metadata[:submitted_from],
500
+ content_hash: content_hash,
501
+ access_scope: metadata[:access_scope] || 'global',
502
+ identity_principal_id: metadata[:identity_principal_id],
503
+ identity_id: metadata[:identity_id],
504
+ identity_canonical_name: metadata[:identity_canonical_name],
505
+ embedding: embedding ? Sequel.lit("'[#{embedding.join(',')}]'::vector") : nil
469
506
  )
470
- log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
507
+ log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]} access_scope=#{metadata[:access_scope]}") # rubocop:disable Layout/LineLength
471
508
  new_entry.id
472
509
  rescue Sequel::UniqueConstraintViolation => e
473
510
  # Race condition: another thread/process inserted the same content_hash between our
@@ -491,7 +528,7 @@ module Legion
491
528
  query.to_s.strip.length < 3
492
529
  end
493
530
 
494
- def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
531
+ def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:, requesting_principal_id: nil)
495
532
  log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
496
533
  dataset = Helpers::DataModels.apollo_entry.exclude(status: 'archived')
497
534
  requested = Array(status).map(&:to_s).reject(&:empty?)
@@ -499,6 +536,18 @@ module Legion
499
536
  dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
500
537
  dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
501
538
 
539
+ if requesting_principal_id
540
+ pid = requesting_principal_id.to_i
541
+ dataset = dataset.where(
542
+ Sequel.lit(
543
+ "(access_scope = 'global' " \
544
+ "OR (access_scope = 'private' AND (identity_principal_id = ? OR identity_id IN (SELECT id FROM identities WHERE principal_id = ?))) " \
545
+ "OR (access_scope = 'team' AND EXISTS (SELECT 1 FROM identity_group_memberships igm1 JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id WHERE igm1.principal_id = ? AND igm2.principal_id = identity_principal_id)))", # rubocop:disable Layout/LineLength
546
+ pid, pid, pid
547
+ )
548
+ )
549
+ end
550
+
502
551
  entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
503
552
  format_entry(entry.is_a?(Hash) ? entry : entry.values)
504
553
  end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.4.26'
6
+ VERSION = '0.4.27'
7
7
  end
8
8
  end
9
9
  end
@@ -65,5 +65,36 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::GraphQuery do
65
65
  expect(sql).to include("'dns'")
66
66
  expect(sql).to include('&&')
67
67
  end
68
+
69
+ context 'without requesting_principal_id' do
70
+ it 'includes no access_scope filter' do
71
+ sql = described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3)
72
+ expect(sql).not_to include('access_scope')
73
+ end
74
+ end
75
+
76
+ context 'with requesting_principal_id' do
77
+ let(:sql) { described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3, requesting_principal_id: 42) }
78
+
79
+ it 'allows global entries unconditionally' do
80
+ expect(sql).to include("access_scope = 'global'")
81
+ end
82
+
83
+ it 'allows private entries owned by the principal via principal FK' do
84
+ expect(sql).to include('e.identity_principal_id = 42')
85
+ end
86
+
87
+ it 'allows private entries owned by the principal via identity subquery (multi-provider auth)' do
88
+ expect(sql).to include('SELECT id FROM identities WHERE principal_id = 42')
89
+ end
90
+
91
+ it 'allows team entries when principal shares a group with the submitter' do
92
+ expect(sql).to include('identity_group_memberships')
93
+ end
94
+
95
+ it 'wraps the entire access_scope condition in parentheses so it does not break AND chaining' do
96
+ expect(sql).to match(/AND \(.*access_scope.*\)/m)
97
+ end
98
+ end
68
99
  end
69
100
  end
@@ -371,6 +371,95 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
371
371
  end
372
372
  end
373
373
 
374
+ context 'identity kwargs persistence' do
375
+ let(:mock_entry_class2) { double('ApolloEntry2') }
376
+ let(:mock_expertise_class2) { double('ApolloExpertise2') }
377
+ let(:mock_access_log_class2) { double('ApolloAccessLog2') }
378
+ let(:mock_entry2) { double('entry2', id: 99, embedding: nil) }
379
+
380
+ before do
381
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class2)
382
+ stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class2)
383
+ stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class2)
384
+ scoped_ds = double('scoped_ds2', first: nil, where: double('scoped_ds2b', first: nil))
385
+ allow(mock_entry_class2).to receive(:where).and_return(scoped_ds)
386
+ allow(scoped_ds).to receive(:exclude).and_return(scoped_ds)
387
+ allow(mock_expertise_class2).to receive(:where).and_return(double(first: nil))
388
+ allow(mock_expertise_class2).to receive(:create)
389
+ allow(mock_access_log_class2).to receive(:create)
390
+ allow(host).to receive(:embed_text).and_return(nil)
391
+ allow(host).to receive(:find_corroboration).and_return([false, nil])
392
+ allow(host).to receive(:detect_contradictions).and_return([])
393
+ end
394
+
395
+ it 'passes identity_principal_id and access_scope through to create_candidate_entry' do
396
+ expect(mock_entry_class2).to receive(:create).with(
397
+ hash_including(identity_principal_id: 42, access_scope: 'private')
398
+ ).and_return(mock_entry2)
399
+
400
+ host.handle_ingest(
401
+ content: 'test fact', tags: [],
402
+ identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
403
+ access_scope: 'private'
404
+ )
405
+ end
406
+
407
+ it 'defaults access_scope to global when not provided' do
408
+ expect(mock_entry_class2).to receive(:create).with(
409
+ hash_including(access_scope: 'global')
410
+ ).and_return(mock_entry2)
411
+
412
+ host.handle_ingest(content: 'test fact', tags: [])
413
+ end
414
+
415
+ it 'persists identity_id and identity_canonical_name' do
416
+ expect(mock_entry_class2).to receive(:create).with(
417
+ hash_including(identity_id: 7, identity_canonical_name: 'alice')
418
+ ).and_return(mock_entry2)
419
+
420
+ host.handle_ingest(
421
+ content: 'test fact', tags: [],
422
+ identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
423
+ access_scope: 'private'
424
+ )
425
+ end
426
+ end
427
+
428
+ context 'dedup: private entries from different principals are not deduplicated' do
429
+ let(:mock_entry_class3) { double('ApolloEntry3') }
430
+ let(:mock_expertise_class3) { double('ApolloExpertise3') }
431
+ let(:mock_access_log_class3) { double('ApolloAccessLog3') }
432
+ let(:mock_entry_a) { double('entry_a', id: 7, embedding: nil) }
433
+ let(:mock_entry_b) { double('entry_b', id: 8, embedding: nil) }
434
+
435
+ before do
436
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class3)
437
+ stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class3)
438
+ stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class3)
439
+ allow(mock_expertise_class3).to receive(:where).and_return(double(first: nil))
440
+ allow(mock_expertise_class3).to receive(:create)
441
+ allow(mock_access_log_class3).to receive(:create)
442
+ allow(host).to receive(:embed_text).and_return(nil)
443
+ allow(host).to receive(:find_corroboration).and_return([false, nil])
444
+ allow(host).to receive(:detect_contradictions).and_return([])
445
+ # default dedup returns nil (no duplicate) — supports chained .where for private scope
446
+ scoped_ds3 = double('dedup_chain3', first: nil)
447
+ allow(scoped_ds3).to receive(:where).and_return(double('scoped_ds3b', first: nil))
448
+ allow(scoped_ds3).to receive(:exclude).and_return(scoped_ds3)
449
+ allow(mock_entry_class3).to receive(:where).and_return(scoped_ds3)
450
+ # two different principals each get their own entry
451
+ allow(mock_entry_class3).to receive(:create).and_return(mock_entry_a, mock_entry_b)
452
+ end
453
+
454
+ it 'does not deduplicate private entries from different principals' do
455
+ result1 = host.handle_ingest(content: 'same fact', content_type: :observation,
456
+ access_scope: 'private', identity_principal_id: 1)
457
+ result2 = host.handle_ingest(content: 'same fact', content_type: :observation,
458
+ access_scope: 'private', identity_principal_id: 2)
459
+ expect(result1[:entry_id]).not_to eq(result2[:entry_id])
460
+ end
461
+ end
462
+
374
463
  context 'early-return warn logs' do
375
464
  let(:logger) { instance_double('Logger', debug: nil, info: nil, warn: nil) }
376
465
 
@@ -521,6 +610,37 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
521
610
  expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
522
611
  end
523
612
  end
613
+
614
+ context 'access scope forwarding' do
615
+ let(:mock_entry_class) { double('ApolloEntry') }
616
+ let(:mock_db) { double('db') }
617
+
618
+ before do
619
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
620
+ allow(Legion::LLM::Call::Embeddings).to receive(:generate)
621
+ .and_return({ vector: Array.new(1024, 0.1), model: 'test', provider: :ollama, dimensions: 1024, tokens: 0 })
622
+ allow(mock_entry_class).to receive(:db).and_return(mock_db)
623
+ allow(mock_db).to receive(:fetch).and_return(double(all: []))
624
+ allow(mock_entry_class).to receive(:where).and_return(double(update: true))
625
+ end
626
+
627
+ it 'forwards requesting_principal_id to build_semantic_search_sql' do
628
+ expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
629
+ hash_including(requesting_principal_id: 42)
630
+ ).and_return('SELECT 1')
631
+ allow(mock_db).to receive(:fetch).and_return(double(all: []))
632
+
633
+ host.handle_query(query: 'test', requesting_principal_id: 42)
634
+ end
635
+
636
+ it 'passes nil requesting_principal_id when not provided (no filter)' do
637
+ expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
638
+ hash_including(requesting_principal_id: nil)
639
+ ).and_return('SELECT 1')
640
+
641
+ host.handle_query(query: 'test')
642
+ end
643
+ end
524
644
  end
525
645
 
526
646
  describe '#normalize_text_input' do
@@ -597,6 +717,14 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
597
717
  ).and_call_original
598
718
  host.retrieve_relevant(query: 'treatment', domain: 'clinical')
599
719
  end
720
+
721
+ it 'forwards requesting_principal_id to build_semantic_search_sql' do
722
+ expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
723
+ hash_including(requesting_principal_id: 7)
724
+ ).and_return('SELECT 1')
725
+
726
+ host.retrieve_relevant(query: 'test', requesting_principal_id: 7)
727
+ end
600
728
  end
601
729
  end
602
730
 
@@ -866,6 +994,18 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
866
994
  expect(result[:redacted]).to eq(2)
867
995
  expect(result[:agent_id]).to eq('agent-dead')
868
996
  end
997
+
998
+ it 'clears identity columns on confirmed (redacted) entries' do
999
+ expect(mock_dataset).to receive(:update).with(
1000
+ hash_including(
1001
+ identity_principal_id: nil,
1002
+ identity_id: nil,
1003
+ identity_canonical_name: nil
1004
+ )
1005
+ ).and_return(2)
1006
+
1007
+ host.handle_erasure_request(agent_id: 'agent-dead')
1008
+ end
869
1009
  end
870
1010
 
871
1011
  context 'when Sequel raises an error' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.26
4
+ version: 0.4.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity