lex-apollo 0.4.26 → 0.4.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/extensions/apollo/helpers/graph_query.rb +20 -1
- data/lib/legion/extensions/apollo/runners/knowledge.rb +89 -40
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/spec/legion/extensions/apollo/helpers/graph_query_spec.rb +31 -0
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +140 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 85cccff520bb34332f705695f5193cd55e01bb94fb680bf0a51ad81a04ec3112
|
|
4
|
+
data.tar.gz: b3370806ab160e71f3f213fb4401a6a45aa650778a29574b1cc553a9ad34d167
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d45dd95922afbff6cf87391eca84b658359fc3b329f80cfd9404054e2e5fc7cff5b398d2b065f407ab9bff3141ba87b9f3d30e0c980e6c4625231edbf591bc18
|
|
7
|
+
data.tar.gz: 47183b00644bae505cd55c1abad707c5c86d901a38472d186bb5601eb3ec4fa0d2452715a667b3f7ebd0081d00ccb3fc3d6d6fabab61e7d146fdb2890deca7f2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.27] - 2026-05-15
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `handle_ingest` now accepts and persists `access_scope`, `identity_principal_id`, `identity_id`, and `identity_canonical_name` to `apollo_entries`. These kwargs are injected automatically by `legion-apollo`'s identity middleware; no callers need updating. Defaults to `access_scope: 'global'` for backward compatibility.
|
|
7
|
+
- `build_semantic_search_sql` accepts an optional `requesting_principal_id:` kwarg and adds an access-scope SQL filter when provided: `global` entries are always visible, `team` entries are visible when the requesting principal shares a group membership with the submitter, and `private` entries are visible only to the owning principal (checked via both `identity_principal_id` FK and an `identities` subquery for multi-provider auth).
|
|
8
|
+
- `handle_query` and `retrieve_relevant` both accept and forward `requesting_principal_id:` to the SQL builder — covers the GAIA path (`handle_query`) and the direct-call path (`retrieve_relevant`).
|
|
9
|
+
- Browse-mode fallback in `handle_query` (`list_entries_chronologically`) also applies the same access-scope filter when `requesting_principal_id:` is provided.
|
|
10
|
+
- Private-entry dedup guard in `active_duplicate_for_hash`: when `access_scope: 'private'` and `identity_principal_id` is set, the dedup query is scoped per-principal so two different principals writing identical content each get their own row.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- `handle_erasure_request` now clears `identity_principal_id`, `identity_id`, and `identity_canonical_name` on confirmed (redacted) entries in addition to the existing `source_agent`/`source_provider`/`source_channel` redaction — GDPR right-to-erasure compliance gap.
|
|
14
|
+
|
|
3
15
|
## [0.4.26] - 2026-05-11
|
|
4
16
|
|
|
5
17
|
### Fixed
|
|
@@ -52,7 +52,9 @@ module Legion
|
|
|
52
52
|
SQL
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
|
|
55
|
+
def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
|
|
56
|
+
statuses: nil, tags: nil, domain: nil,
|
|
57
|
+
requesting_principal_id: nil, **)
|
|
56
58
|
conditions = ["e.confidence >= #{min_confidence}"]
|
|
57
59
|
|
|
58
60
|
if statuses&.any?
|
|
@@ -67,6 +69,23 @@ module Legion
|
|
|
67
69
|
|
|
68
70
|
conditions << "e.knowledge_domain = '#{domain}'" if domain
|
|
69
71
|
|
|
72
|
+
if requesting_principal_id
|
|
73
|
+
pid = requesting_principal_id.to_i
|
|
74
|
+
conditions << <<~SCOPE_SQL.strip
|
|
75
|
+
(e.access_scope = 'global'
|
|
76
|
+
OR (e.access_scope = 'private'
|
|
77
|
+
AND (e.identity_principal_id = #{pid}
|
|
78
|
+
OR e.identity_id IN (SELECT id FROM identities WHERE principal_id = #{pid})))
|
|
79
|
+
OR (e.access_scope = 'team'
|
|
80
|
+
AND EXISTS (
|
|
81
|
+
SELECT 1 FROM identity_group_memberships igm1
|
|
82
|
+
JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id
|
|
83
|
+
WHERE igm1.principal_id = #{pid}
|
|
84
|
+
AND igm2.principal_id = e.identity_principal_id
|
|
85
|
+
)))
|
|
86
|
+
SCOPE_SQL
|
|
87
|
+
end
|
|
88
|
+
|
|
70
89
|
where_clause = conditions.join(' AND ')
|
|
71
90
|
|
|
72
91
|
<<~SQL
|
|
@@ -82,7 +82,11 @@ module Legion
|
|
|
82
82
|
}
|
|
83
83
|
end
|
|
84
84
|
|
|
85
|
-
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown',
|
|
85
|
+
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', # rubocop:disable Metrics/ParameterLists
|
|
86
|
+
source_provider: nil, source_channel: nil, knowledge_domain: nil,
|
|
87
|
+
submitted_by: nil, submitted_from: nil, content_hash: nil, context: {},
|
|
88
|
+
skip: false, access_scope: 'global', identity_principal_id: nil,
|
|
89
|
+
identity_id: nil, identity_canonical_name: nil, **)
|
|
86
90
|
return { status: :skipped } if skip
|
|
87
91
|
|
|
88
92
|
content = normalize_text_input(content)
|
|
@@ -92,7 +96,8 @@ module Legion
|
|
|
92
96
|
return early_error if early_error
|
|
93
97
|
|
|
94
98
|
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
95
|
-
existing = active_duplicate_for_hash(hash
|
|
99
|
+
existing = active_duplicate_for_hash(hash, access_scope: access_scope,
|
|
100
|
+
identity_principal_id: identity_principal_id)
|
|
96
101
|
if existing
|
|
97
102
|
log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
|
|
98
103
|
return { success: true, entry_id: existing.id, deduped: true }
|
|
@@ -102,7 +107,11 @@ module Legion
|
|
|
102
107
|
content_type_sym = content_type.to_s
|
|
103
108
|
metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
|
|
104
109
|
source_provider: source_provider, source_channel: source_channel,
|
|
105
|
-
submitted_by: submitted_by, submitted_from: submitted_from
|
|
110
|
+
submitted_by: submitted_by, submitted_from: submitted_from,
|
|
111
|
+
access_scope: access_scope,
|
|
112
|
+
identity_principal_id: identity_principal_id,
|
|
113
|
+
identity_id: identity_id,
|
|
114
|
+
identity_canonical_name: identity_canonical_name)
|
|
106
115
|
|
|
107
116
|
corroborated, existing_id = find_corroboration(
|
|
108
117
|
embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
|
|
@@ -133,7 +142,10 @@ module Legion
|
|
|
133
142
|
{ success: false, error: e.message }
|
|
134
143
|
end
|
|
135
144
|
|
|
136
|
-
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit,
|
|
145
|
+
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, # rubocop:disable Metrics/CyclomaticComplexity, Metrics/ParameterLists
|
|
146
|
+
min_confidence: Helpers::GraphQuery.default_query_min_confidence,
|
|
147
|
+
status: UNSET, tags: nil, domain: nil, agent_id: 'unknown',
|
|
148
|
+
requesting_principal_id: nil, **)
|
|
137
149
|
return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
|
|
138
150
|
|
|
139
151
|
entry_model = Helpers::DataModels.apollo_entry
|
|
@@ -143,19 +155,22 @@ module Legion
|
|
|
143
155
|
log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
144
156
|
if browse_query?(query)
|
|
145
157
|
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
146
|
-
status_defaulted: status_defaulted, tags: tags, domain: domain
|
|
158
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain,
|
|
159
|
+
requesting_principal_id: requesting_principal_id)
|
|
147
160
|
end
|
|
148
161
|
|
|
149
162
|
embedding = embed_text(query)
|
|
150
163
|
if embedding.nil?
|
|
151
164
|
log.warn('Apollo Knowledge.handle_query embedding unavailable; falling back to browse query')
|
|
152
165
|
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
153
|
-
status_defaulted: status_defaulted, tags: tags, domain: domain
|
|
166
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain,
|
|
167
|
+
requesting_principal_id: requesting_principal_id)
|
|
154
168
|
end
|
|
155
169
|
|
|
156
170
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
157
171
|
limit: limit, min_confidence: min_confidence,
|
|
158
|
-
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
|
|
172
|
+
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain,
|
|
173
|
+
requesting_principal_id: requesting_principal_id
|
|
159
174
|
)
|
|
160
175
|
|
|
161
176
|
db = entry_model.db
|
|
@@ -251,7 +266,9 @@ module Legion
|
|
|
251
266
|
{ success: false, error: e.message }
|
|
252
267
|
end
|
|
253
268
|
|
|
254
|
-
def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
|
|
269
|
+
def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
|
|
270
|
+
min_confidence: Helpers::GraphQuery.default_query_min_confidence,
|
|
271
|
+
tags: nil, domain: nil, skip: false, requesting_principal_id: nil, **)
|
|
255
272
|
return { status: :skipped } if skip
|
|
256
273
|
|
|
257
274
|
return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
|
|
@@ -268,7 +285,8 @@ module Legion
|
|
|
268
285
|
|
|
269
286
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
270
287
|
limit: limit, min_confidence: min_confidence,
|
|
271
|
-
statuses: %w[confirmed candidate], tags: tags, domain: domain
|
|
288
|
+
statuses: %w[confirmed candidate], tags: tags, domain: domain,
|
|
289
|
+
requesting_principal_id: requesting_principal_id
|
|
272
290
|
)
|
|
273
291
|
|
|
274
292
|
db = Helpers::DataModels.apollo_entry.db
|
|
@@ -341,10 +359,16 @@ module Legion
|
|
|
341
359
|
.exclude(status: 'confirmed')
|
|
342
360
|
.delete
|
|
343
361
|
|
|
344
|
-
# Redact attribution on confirmed entries (corroborated, retain knowledge)
|
|
345
362
|
redacted = conn[:apollo_entries]
|
|
346
363
|
.where(source_agent: agent_id, status: 'confirmed')
|
|
347
|
-
.update(
|
|
364
|
+
.update(
|
|
365
|
+
source_agent: 'redacted',
|
|
366
|
+
source_provider: nil,
|
|
367
|
+
source_channel: nil,
|
|
368
|
+
identity_principal_id: nil,
|
|
369
|
+
identity_id: nil,
|
|
370
|
+
identity_canonical_name: nil
|
|
371
|
+
)
|
|
348
372
|
|
|
349
373
|
{ deleted: deleted, redacted: redacted, agent_id: agent_id }
|
|
350
374
|
.tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
|
|
@@ -425,49 +449,62 @@ module Legion
|
|
|
425
449
|
normalize_text_input(value)[0, max_length]
|
|
426
450
|
end
|
|
427
451
|
|
|
428
|
-
def active_duplicate_for_hash(hash)
|
|
452
|
+
def active_duplicate_for_hash(hash, access_scope: nil, identity_principal_id: nil)
|
|
429
453
|
return nil unless hash
|
|
430
454
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
455
|
+
dataset = Helpers::DataModels.apollo_entry
|
|
456
|
+
.where(content_hash: hash)
|
|
457
|
+
.exclude(status: 'archived')
|
|
458
|
+
|
|
459
|
+
dataset = dataset.where(identity_principal_id: identity_principal_id) if access_scope == 'private' && identity_principal_id
|
|
460
|
+
|
|
461
|
+
existing = dataset.first
|
|
435
462
|
existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
436
463
|
log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
|
|
437
464
|
existing
|
|
438
465
|
end
|
|
439
466
|
|
|
440
|
-
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:,
|
|
467
|
+
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, # rubocop:disable Metrics/ParameterLists
|
|
468
|
+
submitted_by:, submitted_from:, access_scope: 'global',
|
|
469
|
+
identity_principal_id: nil, identity_id: nil, identity_canonical_name: nil)
|
|
441
470
|
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
442
471
|
agent = truncate_for_column(source_agent, 50) || 'unknown'
|
|
443
472
|
|
|
444
|
-
{ tags:
|
|
445
|
-
domain:
|
|
446
|
-
source_agent:
|
|
447
|
-
source_provider:
|
|
448
|
-
source_channel:
|
|
449
|
-
submitted_by:
|
|
450
|
-
submitted_from:
|
|
473
|
+
{ tags: tag_array,
|
|
474
|
+
domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
|
|
475
|
+
source_agent: agent,
|
|
476
|
+
source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
|
|
477
|
+
source_channel: truncate_for_column(source_channel, 100),
|
|
478
|
+
submitted_by: truncate_for_column(submitted_by, 255),
|
|
479
|
+
submitted_from: truncate_for_column(submitted_from, 255),
|
|
480
|
+
access_scope: access_scope || 'global',
|
|
481
|
+
identity_principal_id: identity_principal_id.is_a?(Integer) ? identity_principal_id : nil,
|
|
482
|
+
identity_id: identity_id.is_a?(Integer) ? identity_id : nil,
|
|
483
|
+
identity_canonical_name: identity_canonical_name&.to_s&.slice(0, 255) }
|
|
451
484
|
end
|
|
452
485
|
|
|
453
486
|
def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
|
|
454
487
|
new_entry = Helpers::DataModels.apollo_entry.create(
|
|
455
|
-
content:
|
|
456
|
-
content_type:
|
|
457
|
-
confidence:
|
|
458
|
-
source_agent:
|
|
459
|
-
source_provider:
|
|
460
|
-
source_channel:
|
|
461
|
-
source_context:
|
|
462
|
-
tags:
|
|
463
|
-
status:
|
|
464
|
-
knowledge_domain:
|
|
465
|
-
submitted_by:
|
|
466
|
-
submitted_from:
|
|
467
|
-
content_hash:
|
|
468
|
-
|
|
488
|
+
content: content,
|
|
489
|
+
content_type: content_type,
|
|
490
|
+
confidence: Helpers::Confidence.initial_confidence,
|
|
491
|
+
source_agent: metadata[:source_agent],
|
|
492
|
+
source_provider: metadata[:source_provider],
|
|
493
|
+
source_channel: metadata[:source_channel],
|
|
494
|
+
source_context: json_dump(context.is_a?(Hash) ? context : {}),
|
|
495
|
+
tags: Sequel.pg_array(metadata[:tags]),
|
|
496
|
+
status: 'candidate',
|
|
497
|
+
knowledge_domain: metadata[:domain],
|
|
498
|
+
submitted_by: metadata[:submitted_by],
|
|
499
|
+
submitted_from: metadata[:submitted_from],
|
|
500
|
+
content_hash: content_hash,
|
|
501
|
+
access_scope: metadata[:access_scope] || 'global',
|
|
502
|
+
identity_principal_id: metadata[:identity_principal_id],
|
|
503
|
+
identity_id: metadata[:identity_id],
|
|
504
|
+
identity_canonical_name: metadata[:identity_canonical_name],
|
|
505
|
+
embedding: embedding ? Sequel.lit("'[#{embedding.join(',')}]'::vector") : nil
|
|
469
506
|
)
|
|
470
|
-
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
|
|
507
|
+
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]} access_scope=#{metadata[:access_scope]}") # rubocop:disable Layout/LineLength
|
|
471
508
|
new_entry.id
|
|
472
509
|
rescue Sequel::UniqueConstraintViolation => e
|
|
473
510
|
# Race condition: another thread/process inserted the same content_hash between our
|
|
@@ -491,7 +528,7 @@ module Legion
|
|
|
491
528
|
query.to_s.strip.length < 3
|
|
492
529
|
end
|
|
493
530
|
|
|
494
|
-
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
|
|
531
|
+
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:, requesting_principal_id: nil)
|
|
495
532
|
log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
496
533
|
dataset = Helpers::DataModels.apollo_entry.exclude(status: 'archived')
|
|
497
534
|
requested = Array(status).map(&:to_s).reject(&:empty?)
|
|
@@ -499,6 +536,18 @@ module Legion
|
|
|
499
536
|
dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
|
|
500
537
|
dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
|
|
501
538
|
|
|
539
|
+
if requesting_principal_id
|
|
540
|
+
pid = requesting_principal_id.to_i
|
|
541
|
+
dataset = dataset.where(
|
|
542
|
+
Sequel.lit(
|
|
543
|
+
"(access_scope = 'global' " \
|
|
544
|
+
"OR (access_scope = 'private' AND (identity_principal_id = ? OR identity_id IN (SELECT id FROM identities WHERE principal_id = ?))) " \
|
|
545
|
+
"OR (access_scope = 'team' AND EXISTS (SELECT 1 FROM identity_group_memberships igm1 JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id WHERE igm1.principal_id = ? AND igm2.principal_id = identity_principal_id)))", # rubocop:disable Layout/LineLength
|
|
546
|
+
pid, pid, pid
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
end
|
|
550
|
+
|
|
502
551
|
entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
|
|
503
552
|
format_entry(entry.is_a?(Hash) ? entry : entry.values)
|
|
504
553
|
end
|
|
@@ -65,5 +65,36 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::GraphQuery do
|
|
|
65
65
|
expect(sql).to include("'dns'")
|
|
66
66
|
expect(sql).to include('&&')
|
|
67
67
|
end
|
|
68
|
+
|
|
69
|
+
context 'without requesting_principal_id' do
|
|
70
|
+
it 'includes no access_scope filter' do
|
|
71
|
+
sql = described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3)
|
|
72
|
+
expect(sql).not_to include('access_scope')
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
context 'with requesting_principal_id' do
|
|
77
|
+
let(:sql) { described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3, requesting_principal_id: 42) }
|
|
78
|
+
|
|
79
|
+
it 'allows global entries unconditionally' do
|
|
80
|
+
expect(sql).to include("access_scope = 'global'")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it 'allows private entries owned by the principal via principal FK' do
|
|
84
|
+
expect(sql).to include('e.identity_principal_id = 42')
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'allows private entries owned by the principal via identity subquery (multi-provider auth)' do
|
|
88
|
+
expect(sql).to include('SELECT id FROM identities WHERE principal_id = 42')
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'allows team entries when principal shares a group with the submitter' do
|
|
92
|
+
expect(sql).to include('identity_group_memberships')
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it 'wraps the entire access_scope condition in parentheses so it does not break AND chaining' do
|
|
96
|
+
expect(sql).to match(/AND \(.*access_scope.*\)/m)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
68
99
|
end
|
|
69
100
|
end
|
|
@@ -371,6 +371,95 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
371
371
|
end
|
|
372
372
|
end
|
|
373
373
|
|
|
374
|
+
context 'identity kwargs persistence' do
|
|
375
|
+
let(:mock_entry_class2) { double('ApolloEntry2') }
|
|
376
|
+
let(:mock_expertise_class2) { double('ApolloExpertise2') }
|
|
377
|
+
let(:mock_access_log_class2) { double('ApolloAccessLog2') }
|
|
378
|
+
let(:mock_entry2) { double('entry2', id: 99, embedding: nil) }
|
|
379
|
+
|
|
380
|
+
before do
|
|
381
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class2)
|
|
382
|
+
stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class2)
|
|
383
|
+
stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class2)
|
|
384
|
+
scoped_ds = double('scoped_ds2', first: nil, where: double('scoped_ds2b', first: nil))
|
|
385
|
+
allow(mock_entry_class2).to receive(:where).and_return(scoped_ds)
|
|
386
|
+
allow(scoped_ds).to receive(:exclude).and_return(scoped_ds)
|
|
387
|
+
allow(mock_expertise_class2).to receive(:where).and_return(double(first: nil))
|
|
388
|
+
allow(mock_expertise_class2).to receive(:create)
|
|
389
|
+
allow(mock_access_log_class2).to receive(:create)
|
|
390
|
+
allow(host).to receive(:embed_text).and_return(nil)
|
|
391
|
+
allow(host).to receive(:find_corroboration).and_return([false, nil])
|
|
392
|
+
allow(host).to receive(:detect_contradictions).and_return([])
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
it 'passes identity_principal_id and access_scope through to create_candidate_entry' do
|
|
396
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
397
|
+
hash_including(identity_principal_id: 42, access_scope: 'private')
|
|
398
|
+
).and_return(mock_entry2)
|
|
399
|
+
|
|
400
|
+
host.handle_ingest(
|
|
401
|
+
content: 'test fact', tags: [],
|
|
402
|
+
identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
|
|
403
|
+
access_scope: 'private'
|
|
404
|
+
)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
it 'defaults access_scope to global when not provided' do
|
|
408
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
409
|
+
hash_including(access_scope: 'global')
|
|
410
|
+
).and_return(mock_entry2)
|
|
411
|
+
|
|
412
|
+
host.handle_ingest(content: 'test fact', tags: [])
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
it 'persists identity_id and identity_canonical_name' do
|
|
416
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
417
|
+
hash_including(identity_id: 7, identity_canonical_name: 'alice')
|
|
418
|
+
).and_return(mock_entry2)
|
|
419
|
+
|
|
420
|
+
host.handle_ingest(
|
|
421
|
+
content: 'test fact', tags: [],
|
|
422
|
+
identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
|
|
423
|
+
access_scope: 'private'
|
|
424
|
+
)
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
context 'dedup: private entries from different principals are not deduplicated' do
|
|
429
|
+
let(:mock_entry_class3) { double('ApolloEntry3') }
|
|
430
|
+
let(:mock_expertise_class3) { double('ApolloExpertise3') }
|
|
431
|
+
let(:mock_access_log_class3) { double('ApolloAccessLog3') }
|
|
432
|
+
let(:mock_entry_a) { double('entry_a', id: 7, embedding: nil) }
|
|
433
|
+
let(:mock_entry_b) { double('entry_b', id: 8, embedding: nil) }
|
|
434
|
+
|
|
435
|
+
before do
|
|
436
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class3)
|
|
437
|
+
stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class3)
|
|
438
|
+
stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class3)
|
|
439
|
+
allow(mock_expertise_class3).to receive(:where).and_return(double(first: nil))
|
|
440
|
+
allow(mock_expertise_class3).to receive(:create)
|
|
441
|
+
allow(mock_access_log_class3).to receive(:create)
|
|
442
|
+
allow(host).to receive(:embed_text).and_return(nil)
|
|
443
|
+
allow(host).to receive(:find_corroboration).and_return([false, nil])
|
|
444
|
+
allow(host).to receive(:detect_contradictions).and_return([])
|
|
445
|
+
# default dedup returns nil (no duplicate) — supports chained .where for private scope
|
|
446
|
+
scoped_ds3 = double('dedup_chain3', first: nil)
|
|
447
|
+
allow(scoped_ds3).to receive(:where).and_return(double('scoped_ds3b', first: nil))
|
|
448
|
+
allow(scoped_ds3).to receive(:exclude).and_return(scoped_ds3)
|
|
449
|
+
allow(mock_entry_class3).to receive(:where).and_return(scoped_ds3)
|
|
450
|
+
# two different principals each get their own entry
|
|
451
|
+
allow(mock_entry_class3).to receive(:create).and_return(mock_entry_a, mock_entry_b)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
it 'does not deduplicate private entries from different principals' do
|
|
455
|
+
result1 = host.handle_ingest(content: 'same fact', content_type: :observation,
|
|
456
|
+
access_scope: 'private', identity_principal_id: 1)
|
|
457
|
+
result2 = host.handle_ingest(content: 'same fact', content_type: :observation,
|
|
458
|
+
access_scope: 'private', identity_principal_id: 2)
|
|
459
|
+
expect(result1[:entry_id]).not_to eq(result2[:entry_id])
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
|
|
374
463
|
context 'early-return warn logs' do
|
|
375
464
|
let(:logger) { instance_double('Logger', debug: nil, info: nil, warn: nil) }
|
|
376
465
|
|
|
@@ -521,6 +610,37 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
521
610
|
expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
|
|
522
611
|
end
|
|
523
612
|
end
|
|
613
|
+
|
|
614
|
+
context 'access scope forwarding' do
|
|
615
|
+
let(:mock_entry_class) { double('ApolloEntry') }
|
|
616
|
+
let(:mock_db) { double('db') }
|
|
617
|
+
|
|
618
|
+
before do
|
|
619
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
|
|
620
|
+
allow(Legion::LLM::Call::Embeddings).to receive(:generate)
|
|
621
|
+
.and_return({ vector: Array.new(1024, 0.1), model: 'test', provider: :ollama, dimensions: 1024, tokens: 0 })
|
|
622
|
+
allow(mock_entry_class).to receive(:db).and_return(mock_db)
|
|
623
|
+
allow(mock_db).to receive(:fetch).and_return(double(all: []))
|
|
624
|
+
allow(mock_entry_class).to receive(:where).and_return(double(update: true))
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
it 'forwards requesting_principal_id to build_semantic_search_sql' do
|
|
628
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
629
|
+
hash_including(requesting_principal_id: 42)
|
|
630
|
+
).and_return('SELECT 1')
|
|
631
|
+
allow(mock_db).to receive(:fetch).and_return(double(all: []))
|
|
632
|
+
|
|
633
|
+
host.handle_query(query: 'test', requesting_principal_id: 42)
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
it 'passes nil requesting_principal_id when not provided (no filter)' do
|
|
637
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
638
|
+
hash_including(requesting_principal_id: nil)
|
|
639
|
+
).and_return('SELECT 1')
|
|
640
|
+
|
|
641
|
+
host.handle_query(query: 'test')
|
|
642
|
+
end
|
|
643
|
+
end
|
|
524
644
|
end
|
|
525
645
|
|
|
526
646
|
describe '#normalize_text_input' do
|
|
@@ -597,6 +717,14 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
597
717
|
).and_call_original
|
|
598
718
|
host.retrieve_relevant(query: 'treatment', domain: 'clinical')
|
|
599
719
|
end
|
|
720
|
+
|
|
721
|
+
it 'forwards requesting_principal_id to build_semantic_search_sql' do
|
|
722
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
723
|
+
hash_including(requesting_principal_id: 7)
|
|
724
|
+
).and_return('SELECT 1')
|
|
725
|
+
|
|
726
|
+
host.retrieve_relevant(query: 'test', requesting_principal_id: 7)
|
|
727
|
+
end
|
|
600
728
|
end
|
|
601
729
|
end
|
|
602
730
|
|
|
@@ -866,6 +994,18 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
866
994
|
expect(result[:redacted]).to eq(2)
|
|
867
995
|
expect(result[:agent_id]).to eq('agent-dead')
|
|
868
996
|
end
|
|
997
|
+
|
|
998
|
+
it 'clears identity columns on confirmed (redacted) entries' do
|
|
999
|
+
expect(mock_dataset).to receive(:update).with(
|
|
1000
|
+
hash_including(
|
|
1001
|
+
identity_principal_id: nil,
|
|
1002
|
+
identity_id: nil,
|
|
1003
|
+
identity_canonical_name: nil
|
|
1004
|
+
)
|
|
1005
|
+
).and_return(2)
|
|
1006
|
+
|
|
1007
|
+
host.handle_erasure_request(agent_id: 'agent-dead')
|
|
1008
|
+
end
|
|
869
1009
|
end
|
|
870
1010
|
|
|
871
1011
|
context 'when Sequel raises an error' do
|