lex-apollo 0.4.26 → 0.4.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/extensions/apollo/helpers/graph_query.rb +20 -1
- data/lib/legion/extensions/apollo/runners/knowledge.rb +114 -40
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/spec/legion/extensions/apollo/helpers/graph_query_spec.rb +31 -0
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +142 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d25745968940593d7d1ac3fba3f1ccd1b5d7ce553cca28c1a1fdfa4d3104af11
|
|
4
|
+
data.tar.gz: 96ecdae2a460625d95cbe0e4605f2c60778d035ea3c3f93e19d6560df0a40e70
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c788a0ab4bd58e70ef2be2b7be88101bcccf4880fecd0ee4b8cd7a1be05cbe9b9e157c99fc7f5724a62a4d6fcdacf6278e8c902368fcc60324d966b204698b48
|
|
7
|
+
data.tar.gz: 1a51dc48ec715adc969325e82f1c7bd81e23b35f716c0ee6c3ded05a9d38eb0b57261121892658fb8715a969f7ab22f1a06664af17aa990dc0e708272229039e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.27] - 2026-05-15
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `handle_ingest` now accepts and persists `access_scope`, `identity_principal_id`, `identity_id`, and `identity_canonical_name` to `apollo_entries`. These kwargs are injected automatically by `legion-apollo`'s identity middleware; no callers need updating. Defaults to `access_scope: 'global'` for backward compatibility.
|
|
7
|
+
- `build_semantic_search_sql` accepts an optional `requesting_principal_id:` kwarg and adds an access-scope SQL filter when provided: `global` entries are always visible, `team` entries are visible when the requesting principal shares a group membership with the submitter, and `private` entries are visible only to the owning principal (checked via both `identity_principal_id` FK and an `identities` subquery for multi-provider auth).
|
|
8
|
+
- `handle_query` and `retrieve_relevant` both accept and forward `requesting_principal_id:` to the SQL builder — covers the GAIA path (`handle_query`) and the direct-call path (`retrieve_relevant`).
|
|
9
|
+
- Browse-mode fallback in `handle_query` (`list_entries_chronologically`) also applies the same access-scope filter when `requesting_principal_id:` is provided.
|
|
10
|
+
- Private-entry dedup guard in `active_duplicate_for_hash`: when `access_scope: 'private'` and `identity_principal_id` is set, the dedup query is scoped per-principal so two different principals writing identical content each get their own row.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- `handle_erasure_request` now clears `identity_principal_id`, `identity_id`, and `identity_canonical_name` on confirmed (redacted) entries in addition to the existing `source_agent`/`source_provider`/`source_channel` redaction — GDPR right-to-erasure compliance gap.
|
|
14
|
+
|
|
3
15
|
## [0.4.26] - 2026-05-11
|
|
4
16
|
|
|
5
17
|
### Fixed
|
|
@@ -52,7 +52,9 @@ module Legion
|
|
|
52
52
|
SQL
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
|
|
55
|
+
def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
|
|
56
|
+
statuses: nil, tags: nil, domain: nil,
|
|
57
|
+
requesting_principal_id: nil, **)
|
|
56
58
|
conditions = ["e.confidence >= #{min_confidence}"]
|
|
57
59
|
|
|
58
60
|
if statuses&.any?
|
|
@@ -67,6 +69,23 @@ module Legion
|
|
|
67
69
|
|
|
68
70
|
conditions << "e.knowledge_domain = '#{domain}'" if domain
|
|
69
71
|
|
|
72
|
+
if requesting_principal_id
|
|
73
|
+
pid = requesting_principal_id.to_i
|
|
74
|
+
conditions << <<~SCOPE_SQL.strip
|
|
75
|
+
(e.access_scope = 'global'
|
|
76
|
+
OR (e.access_scope = 'private'
|
|
77
|
+
AND (e.identity_principal_id = #{pid}
|
|
78
|
+
OR e.identity_id IN (SELECT id FROM identities WHERE principal_id = #{pid})))
|
|
79
|
+
OR (e.access_scope = 'team'
|
|
80
|
+
AND EXISTS (
|
|
81
|
+
SELECT 1 FROM identity_group_memberships igm1
|
|
82
|
+
JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id
|
|
83
|
+
WHERE igm1.principal_id = #{pid}
|
|
84
|
+
AND igm2.principal_id = e.identity_principal_id
|
|
85
|
+
)))
|
|
86
|
+
SCOPE_SQL
|
|
87
|
+
end
|
|
88
|
+
|
|
70
89
|
where_clause = conditions.join(' AND ')
|
|
71
90
|
|
|
72
91
|
<<~SQL
|
|
@@ -82,9 +82,13 @@ module Legion
|
|
|
82
82
|
}
|
|
83
83
|
end
|
|
84
84
|
|
|
85
|
-
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown',
|
|
85
|
+
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', # rubocop:disable Metrics/ParameterLists
|
|
86
|
+
source_provider: nil, source_channel: nil, knowledge_domain: nil,
|
|
87
|
+
submitted_by: nil, submitted_from: nil, content_hash: nil, context: {},
|
|
88
|
+
skip: false, access_scope: 'global', **)
|
|
86
89
|
return { status: :skipped } if skip
|
|
87
90
|
|
|
91
|
+
identity = resolve_process_identity
|
|
88
92
|
content = normalize_text_input(content)
|
|
89
93
|
content_type = normalize_content_type(content_type.nil? ? :observation : content_type)
|
|
90
94
|
log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
|
|
@@ -92,7 +96,8 @@ module Legion
|
|
|
92
96
|
return early_error if early_error
|
|
93
97
|
|
|
94
98
|
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
95
|
-
existing = active_duplicate_for_hash(hash
|
|
99
|
+
existing = active_duplicate_for_hash(hash, access_scope: access_scope,
|
|
100
|
+
identity_principal_id: identity[:principal_id])
|
|
96
101
|
if existing
|
|
97
102
|
log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
|
|
98
103
|
return { success: true, entry_id: existing.id, deduped: true }
|
|
@@ -102,7 +107,11 @@ module Legion
|
|
|
102
107
|
content_type_sym = content_type.to_s
|
|
103
108
|
metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
|
|
104
109
|
source_provider: source_provider, source_channel: source_channel,
|
|
105
|
-
submitted_by: submitted_by, submitted_from: submitted_from
|
|
110
|
+
submitted_by: submitted_by, submitted_from: submitted_from,
|
|
111
|
+
access_scope: access_scope,
|
|
112
|
+
identity_principal_id: identity[:principal_id],
|
|
113
|
+
identity_id: identity[:identity_id],
|
|
114
|
+
identity_canonical_name: identity[:canonical_name])
|
|
106
115
|
|
|
107
116
|
corroborated, existing_id = find_corroboration(
|
|
108
117
|
embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
|
|
@@ -133,9 +142,13 @@ module Legion
|
|
|
133
142
|
{ success: false, error: e.message }
|
|
134
143
|
end
|
|
135
144
|
|
|
136
|
-
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit,
|
|
145
|
+
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, # rubocop:disable Metrics/CyclomaticComplexity, Metrics/ParameterLists
|
|
146
|
+
min_confidence: Helpers::GraphQuery.default_query_min_confidence,
|
|
147
|
+
status: UNSET, tags: nil, domain: nil, agent_id: 'unknown',
|
|
148
|
+
requesting_principal_id: nil, **)
|
|
137
149
|
return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
|
|
138
150
|
|
|
151
|
+
requesting_principal_id = resolve_requesting_principal_id(requesting_principal_id)
|
|
139
152
|
entry_model = Helpers::DataModels.apollo_entry
|
|
140
153
|
query = normalize_text_input(query)
|
|
141
154
|
status_defaulted = status.equal?(UNSET)
|
|
@@ -143,19 +156,22 @@ module Legion
|
|
|
143
156
|
log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
144
157
|
if browse_query?(query)
|
|
145
158
|
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
146
|
-
status_defaulted: status_defaulted, tags: tags, domain: domain
|
|
159
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain,
|
|
160
|
+
requesting_principal_id: requesting_principal_id)
|
|
147
161
|
end
|
|
148
162
|
|
|
149
163
|
embedding = embed_text(query)
|
|
150
164
|
if embedding.nil?
|
|
151
165
|
log.warn('Apollo Knowledge.handle_query embedding unavailable; falling back to browse query')
|
|
152
166
|
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
153
|
-
status_defaulted: status_defaulted, tags: tags, domain: domain
|
|
167
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain,
|
|
168
|
+
requesting_principal_id: requesting_principal_id)
|
|
154
169
|
end
|
|
155
170
|
|
|
156
171
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
157
172
|
limit: limit, min_confidence: min_confidence,
|
|
158
|
-
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
|
|
173
|
+
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain,
|
|
174
|
+
requesting_principal_id: requesting_principal_id
|
|
159
175
|
)
|
|
160
176
|
|
|
161
177
|
db = entry_model.db
|
|
@@ -251,11 +267,14 @@ module Legion
|
|
|
251
267
|
{ success: false, error: e.message }
|
|
252
268
|
end
|
|
253
269
|
|
|
254
|
-
def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
|
|
270
|
+
def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
|
|
271
|
+
min_confidence: Helpers::GraphQuery.default_query_min_confidence,
|
|
272
|
+
tags: nil, domain: nil, skip: false, requesting_principal_id: nil, **)
|
|
255
273
|
return { status: :skipped } if skip
|
|
256
274
|
|
|
257
275
|
return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
|
|
258
276
|
|
|
277
|
+
requesting_principal_id = resolve_requesting_principal_id(requesting_principal_id)
|
|
259
278
|
query = normalize_text_input(query)
|
|
260
279
|
log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
261
280
|
return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
|
|
@@ -268,7 +287,8 @@ module Legion
|
|
|
268
287
|
|
|
269
288
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
270
289
|
limit: limit, min_confidence: min_confidence,
|
|
271
|
-
statuses: %w[confirmed candidate], tags: tags, domain: domain
|
|
290
|
+
statuses: %w[confirmed candidate], tags: tags, domain: domain,
|
|
291
|
+
requesting_principal_id: requesting_principal_id
|
|
272
292
|
)
|
|
273
293
|
|
|
274
294
|
db = Helpers::DataModels.apollo_entry.db
|
|
@@ -341,10 +361,16 @@ module Legion
|
|
|
341
361
|
.exclude(status: 'confirmed')
|
|
342
362
|
.delete
|
|
343
363
|
|
|
344
|
-
# Redact attribution on confirmed entries (corroborated, retain knowledge)
|
|
345
364
|
redacted = conn[:apollo_entries]
|
|
346
365
|
.where(source_agent: agent_id, status: 'confirmed')
|
|
347
|
-
.update(
|
|
366
|
+
.update(
|
|
367
|
+
source_agent: 'redacted',
|
|
368
|
+
source_provider: nil,
|
|
369
|
+
source_channel: nil,
|
|
370
|
+
identity_principal_id: nil,
|
|
371
|
+
identity_id: nil,
|
|
372
|
+
identity_canonical_name: nil
|
|
373
|
+
)
|
|
348
374
|
|
|
349
375
|
{ deleted: deleted, redacted: redacted, agent_id: agent_id }
|
|
350
376
|
.tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
|
|
@@ -425,49 +451,62 @@ module Legion
|
|
|
425
451
|
normalize_text_input(value)[0, max_length]
|
|
426
452
|
end
|
|
427
453
|
|
|
428
|
-
def active_duplicate_for_hash(hash)
|
|
454
|
+
def active_duplicate_for_hash(hash, access_scope: nil, identity_principal_id: nil)
|
|
429
455
|
return nil unless hash
|
|
430
456
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
457
|
+
dataset = Helpers::DataModels.apollo_entry
|
|
458
|
+
.where(content_hash: hash)
|
|
459
|
+
.exclude(status: 'archived')
|
|
460
|
+
|
|
461
|
+
dataset = dataset.where(identity_principal_id: identity_principal_id) if access_scope == 'private' && identity_principal_id
|
|
462
|
+
|
|
463
|
+
existing = dataset.first
|
|
435
464
|
existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
436
465
|
log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
|
|
437
466
|
existing
|
|
438
467
|
end
|
|
439
468
|
|
|
440
|
-
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:,
|
|
469
|
+
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, # rubocop:disable Metrics/ParameterLists
|
|
470
|
+
submitted_by:, submitted_from:, access_scope: 'global',
|
|
471
|
+
identity_principal_id: nil, identity_id: nil, identity_canonical_name: nil)
|
|
441
472
|
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
442
473
|
agent = truncate_for_column(source_agent, 50) || 'unknown'
|
|
443
474
|
|
|
444
|
-
{ tags:
|
|
445
|
-
domain:
|
|
446
|
-
source_agent:
|
|
447
|
-
source_provider:
|
|
448
|
-
source_channel:
|
|
449
|
-
submitted_by:
|
|
450
|
-
submitted_from:
|
|
475
|
+
{ tags: tag_array,
|
|
476
|
+
domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
|
|
477
|
+
source_agent: agent,
|
|
478
|
+
source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
|
|
479
|
+
source_channel: truncate_for_column(source_channel, 100),
|
|
480
|
+
submitted_by: truncate_for_column(submitted_by, 255),
|
|
481
|
+
submitted_from: truncate_for_column(submitted_from, 255),
|
|
482
|
+
access_scope: access_scope || 'global',
|
|
483
|
+
identity_principal_id: identity_principal_id.is_a?(Integer) ? identity_principal_id : nil,
|
|
484
|
+
identity_id: identity_id.is_a?(Integer) ? identity_id : nil,
|
|
485
|
+
identity_canonical_name: identity_canonical_name&.to_s&.slice(0, 255) }
|
|
451
486
|
end
|
|
452
487
|
|
|
453
488
|
def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
|
|
454
489
|
new_entry = Helpers::DataModels.apollo_entry.create(
|
|
455
|
-
content:
|
|
456
|
-
content_type:
|
|
457
|
-
confidence:
|
|
458
|
-
source_agent:
|
|
459
|
-
source_provider:
|
|
460
|
-
source_channel:
|
|
461
|
-
source_context:
|
|
462
|
-
tags:
|
|
463
|
-
status:
|
|
464
|
-
knowledge_domain:
|
|
465
|
-
submitted_by:
|
|
466
|
-
submitted_from:
|
|
467
|
-
content_hash:
|
|
468
|
-
|
|
490
|
+
content: content,
|
|
491
|
+
content_type: content_type,
|
|
492
|
+
confidence: Helpers::Confidence.initial_confidence,
|
|
493
|
+
source_agent: metadata[:source_agent],
|
|
494
|
+
source_provider: metadata[:source_provider],
|
|
495
|
+
source_channel: metadata[:source_channel],
|
|
496
|
+
source_context: json_dump(context.is_a?(Hash) ? context : {}),
|
|
497
|
+
tags: Sequel.pg_array(metadata[:tags]),
|
|
498
|
+
status: 'candidate',
|
|
499
|
+
knowledge_domain: metadata[:domain],
|
|
500
|
+
submitted_by: metadata[:submitted_by],
|
|
501
|
+
submitted_from: metadata[:submitted_from],
|
|
502
|
+
content_hash: content_hash,
|
|
503
|
+
access_scope: metadata[:access_scope] || 'global',
|
|
504
|
+
identity_principal_id: metadata[:identity_principal_id],
|
|
505
|
+
identity_id: metadata[:identity_id],
|
|
506
|
+
identity_canonical_name: metadata[:identity_canonical_name],
|
|
507
|
+
embedding: embedding ? Sequel.lit("'[#{embedding.join(',')}]'::vector") : nil
|
|
469
508
|
)
|
|
470
|
-
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
|
|
509
|
+
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]} access_scope=#{metadata[:access_scope]}") # rubocop:disable Layout/LineLength
|
|
471
510
|
new_entry.id
|
|
472
511
|
rescue Sequel::UniqueConstraintViolation => e
|
|
473
512
|
# Race condition: another thread/process inserted the same content_hash between our
|
|
@@ -491,7 +530,7 @@ module Legion
|
|
|
491
530
|
query.to_s.strip.length < 3
|
|
492
531
|
end
|
|
493
532
|
|
|
494
|
-
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
|
|
533
|
+
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:, requesting_principal_id: nil)
|
|
495
534
|
log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
496
535
|
dataset = Helpers::DataModels.apollo_entry.exclude(status: 'archived')
|
|
497
536
|
requested = Array(status).map(&:to_s).reject(&:empty?)
|
|
@@ -499,6 +538,18 @@ module Legion
|
|
|
499
538
|
dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
|
|
500
539
|
dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
|
|
501
540
|
|
|
541
|
+
if requesting_principal_id
|
|
542
|
+
pid = requesting_principal_id.to_i
|
|
543
|
+
dataset = dataset.where(
|
|
544
|
+
Sequel.lit(
|
|
545
|
+
"(access_scope = 'global' " \
|
|
546
|
+
"OR (access_scope = 'private' AND (identity_principal_id = ? OR identity_id IN (SELECT id FROM identities WHERE principal_id = ?))) " \
|
|
547
|
+
"OR (access_scope = 'team' AND EXISTS (SELECT 1 FROM identity_group_memberships igm1 JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id WHERE igm1.principal_id = ? AND igm2.principal_id = identity_principal_id)))", # rubocop:disable Layout/LineLength
|
|
548
|
+
pid, pid, pid
|
|
549
|
+
)
|
|
550
|
+
)
|
|
551
|
+
end
|
|
552
|
+
|
|
502
553
|
entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
|
|
503
554
|
format_entry(entry.is_a?(Hash) ? entry : entry.values)
|
|
504
555
|
end
|
|
@@ -671,6 +722,29 @@ module Legion
|
|
|
671
722
|
end
|
|
672
723
|
end
|
|
673
724
|
|
|
725
|
+
def resolve_process_identity
|
|
726
|
+
return { principal_id: nil, identity_id: nil, canonical_name: nil } unless defined?(Legion::Identity::Process)
|
|
727
|
+
|
|
728
|
+
{
|
|
729
|
+
principal_id: Legion::Identity::Process.db_principal_id,
|
|
730
|
+
identity_id: Legion::Identity::Process.db_identity_id,
|
|
731
|
+
canonical_name: Legion::Identity::Process.canonical_name
|
|
732
|
+
}
|
|
733
|
+
rescue StandardError => e
|
|
734
|
+
handle_exception(e, level: :warn, operation: 'apollo.knowledge.resolve_process_identity')
|
|
735
|
+
{ principal_id: nil, identity_id: nil, canonical_name: nil }
|
|
736
|
+
end
|
|
737
|
+
|
|
738
|
+
def resolve_requesting_principal_id(explicit_value)
|
|
739
|
+
return explicit_value if explicit_value
|
|
740
|
+
return nil unless defined?(Legion::Identity::Process)
|
|
741
|
+
|
|
742
|
+
Legion::Identity::Process.db_principal_id
|
|
743
|
+
rescue StandardError => e
|
|
744
|
+
handle_exception(e, level: :warn, operation: 'apollo.knowledge.resolve_requesting_principal_id')
|
|
745
|
+
nil
|
|
746
|
+
end
|
|
747
|
+
|
|
674
748
|
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
675
749
|
include Legion::JSON::Helper
|
|
676
750
|
include Legion::Settings::Helper
|
|
@@ -65,5 +65,36 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::GraphQuery do
|
|
|
65
65
|
expect(sql).to include("'dns'")
|
|
66
66
|
expect(sql).to include('&&')
|
|
67
67
|
end
|
|
68
|
+
|
|
69
|
+
context 'without requesting_principal_id' do
|
|
70
|
+
it 'includes no access_scope filter' do
|
|
71
|
+
sql = described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3)
|
|
72
|
+
expect(sql).not_to include('access_scope')
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
context 'with requesting_principal_id' do
|
|
77
|
+
let(:sql) { described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3, requesting_principal_id: 42) }
|
|
78
|
+
|
|
79
|
+
it 'allows global entries unconditionally' do
|
|
80
|
+
expect(sql).to include("access_scope = 'global'")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it 'allows private entries owned by the principal via principal FK' do
|
|
84
|
+
expect(sql).to include('e.identity_principal_id = 42')
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'allows private entries owned by the principal via identity subquery (multi-provider auth)' do
|
|
88
|
+
expect(sql).to include('SELECT id FROM identities WHERE principal_id = 42')
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'allows team entries when principal shares a group with the submitter' do
|
|
92
|
+
expect(sql).to include('identity_group_memberships')
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it 'wraps the entire access_scope condition in parentheses so it does not break AND chaining' do
|
|
96
|
+
expect(sql).to match(/AND \(.*access_scope.*\)/m)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
68
99
|
end
|
|
69
100
|
end
|
|
@@ -371,6 +371,97 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
371
371
|
end
|
|
372
372
|
end
|
|
373
373
|
|
|
374
|
+
context 'identity from Legion::Identity::Process' do
|
|
375
|
+
let(:mock_entry_class2) { double('ApolloEntry2') }
|
|
376
|
+
let(:mock_expertise_class2) { double('ApolloExpertise2') }
|
|
377
|
+
let(:mock_access_log_class2) { double('ApolloAccessLog2') }
|
|
378
|
+
let(:mock_entry2) { double('entry2', id: 99, embedding: nil) }
|
|
379
|
+
|
|
380
|
+
before do
|
|
381
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class2)
|
|
382
|
+
stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class2)
|
|
383
|
+
stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class2)
|
|
384
|
+
scoped_ds = double('scoped_ds2', first: nil, where: double('scoped_ds2b', first: nil))
|
|
385
|
+
allow(mock_entry_class2).to receive(:where).and_return(scoped_ds)
|
|
386
|
+
allow(scoped_ds).to receive(:exclude).and_return(scoped_ds)
|
|
387
|
+
allow(mock_expertise_class2).to receive(:where).and_return(double(first: nil))
|
|
388
|
+
allow(mock_expertise_class2).to receive(:create)
|
|
389
|
+
allow(mock_access_log_class2).to receive(:create)
|
|
390
|
+
allow(host).to receive(:embed_text).and_return(nil)
|
|
391
|
+
allow(host).to receive(:find_corroboration).and_return([false, nil])
|
|
392
|
+
allow(host).to receive(:detect_contradictions).and_return([])
|
|
393
|
+
stub_const('Legion::Identity::Process', double(
|
|
394
|
+
db_principal_id: 42,
|
|
395
|
+
db_identity_id: 7,
|
|
396
|
+
canonical_name: 'alice'
|
|
397
|
+
))
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
it 'derives identity from Legion::Identity::Process and persists it' do
|
|
401
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
402
|
+
hash_including(identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
|
|
403
|
+
access_scope: 'private')
|
|
404
|
+
).and_return(mock_entry2)
|
|
405
|
+
|
|
406
|
+
host.handle_ingest(content: 'test fact', tags: [], access_scope: 'private')
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
it 'defaults access_scope to global when not provided' do
|
|
410
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
411
|
+
hash_including(access_scope: 'global')
|
|
412
|
+
).and_return(mock_entry2)
|
|
413
|
+
|
|
414
|
+
host.handle_ingest(content: 'test fact', tags: [])
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
it 'ignores identity kwargs passed by callers' do
|
|
418
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
419
|
+
hash_including(identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice')
|
|
420
|
+
).and_return(mock_entry2)
|
|
421
|
+
|
|
422
|
+
host.handle_ingest(
|
|
423
|
+
content: 'test fact', tags: [],
|
|
424
|
+
identity_principal_id: 999, identity_id: 888, identity_canonical_name: 'mallory',
|
|
425
|
+
access_scope: 'private'
|
|
426
|
+
)
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
context 'dedup: private entries from different principals are not deduplicated' do
|
|
431
|
+
let(:mock_entry_class3) { double('ApolloEntry3') }
|
|
432
|
+
let(:mock_expertise_class3) { double('ApolloExpertise3') }
|
|
433
|
+
let(:mock_access_log_class3) { double('ApolloAccessLog3') }
|
|
434
|
+
let(:mock_entry_a) { double('entry_a', id: 7, embedding: nil) }
|
|
435
|
+
let(:mock_entry_b) { double('entry_b', id: 8, embedding: nil) }
|
|
436
|
+
|
|
437
|
+
before do
|
|
438
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class3)
|
|
439
|
+
stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class3)
|
|
440
|
+
stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class3)
|
|
441
|
+
allow(mock_expertise_class3).to receive(:where).and_return(double(first: nil))
|
|
442
|
+
allow(mock_expertise_class3).to receive(:create)
|
|
443
|
+
allow(mock_access_log_class3).to receive(:create)
|
|
444
|
+
allow(host).to receive(:embed_text).and_return(nil)
|
|
445
|
+
allow(host).to receive(:find_corroboration).and_return([false, nil])
|
|
446
|
+
allow(host).to receive(:detect_contradictions).and_return([])
|
|
447
|
+
# default dedup returns nil (no duplicate) — supports chained .where for private scope
|
|
448
|
+
scoped_ds3 = double('dedup_chain3', first: nil)
|
|
449
|
+
allow(scoped_ds3).to receive(:where).and_return(double('scoped_ds3b', first: nil))
|
|
450
|
+
allow(scoped_ds3).to receive(:exclude).and_return(scoped_ds3)
|
|
451
|
+
allow(mock_entry_class3).to receive(:where).and_return(scoped_ds3)
|
|
452
|
+
# two different principals each get their own entry
|
|
453
|
+
allow(mock_entry_class3).to receive(:create).and_return(mock_entry_a, mock_entry_b)
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
it 'does not deduplicate private entries from different principals' do
|
|
457
|
+
result1 = host.handle_ingest(content: 'same fact', content_type: :observation,
|
|
458
|
+
access_scope: 'private', identity_principal_id: 1)
|
|
459
|
+
result2 = host.handle_ingest(content: 'same fact', content_type: :observation,
|
|
460
|
+
access_scope: 'private', identity_principal_id: 2)
|
|
461
|
+
expect(result1[:entry_id]).not_to eq(result2[:entry_id])
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
|
|
374
465
|
context 'early-return warn logs' do
|
|
375
466
|
let(:logger) { instance_double('Logger', debug: nil, info: nil, warn: nil) }
|
|
376
467
|
|
|
@@ -521,6 +612,37 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
521
612
|
expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
|
|
522
613
|
end
|
|
523
614
|
end
|
|
615
|
+
|
|
616
|
+
context 'access scope forwarding' do
|
|
617
|
+
let(:mock_entry_class) { double('ApolloEntry') }
|
|
618
|
+
let(:mock_db) { double('db') }
|
|
619
|
+
|
|
620
|
+
before do
|
|
621
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
|
|
622
|
+
allow(Legion::LLM::Call::Embeddings).to receive(:generate)
|
|
623
|
+
.and_return({ vector: Array.new(1024, 0.1), model: 'test', provider: :ollama, dimensions: 1024, tokens: 0 })
|
|
624
|
+
allow(mock_entry_class).to receive(:db).and_return(mock_db)
|
|
625
|
+
allow(mock_db).to receive(:fetch).and_return(double(all: []))
|
|
626
|
+
allow(mock_entry_class).to receive(:where).and_return(double(update: true))
|
|
627
|
+
end
|
|
628
|
+
|
|
629
|
+
it 'forwards requesting_principal_id to build_semantic_search_sql' do
|
|
630
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
631
|
+
hash_including(requesting_principal_id: 42)
|
|
632
|
+
).and_return('SELECT 1')
|
|
633
|
+
allow(mock_db).to receive(:fetch).and_return(double(all: []))
|
|
634
|
+
|
|
635
|
+
host.handle_query(query: 'test', requesting_principal_id: 42)
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
it 'passes nil requesting_principal_id when not provided (no filter)' do
|
|
639
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
640
|
+
hash_including(requesting_principal_id: nil)
|
|
641
|
+
).and_return('SELECT 1')
|
|
642
|
+
|
|
643
|
+
host.handle_query(query: 'test')
|
|
644
|
+
end
|
|
645
|
+
end
|
|
524
646
|
end
|
|
525
647
|
|
|
526
648
|
describe '#normalize_text_input' do
|
|
@@ -597,6 +719,14 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
597
719
|
).and_call_original
|
|
598
720
|
host.retrieve_relevant(query: 'treatment', domain: 'clinical')
|
|
599
721
|
end
|
|
722
|
+
|
|
723
|
+
it 'forwards requesting_principal_id to build_semantic_search_sql' do
|
|
724
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
725
|
+
hash_including(requesting_principal_id: 7)
|
|
726
|
+
).and_return('SELECT 1')
|
|
727
|
+
|
|
728
|
+
host.retrieve_relevant(query: 'test', requesting_principal_id: 7)
|
|
729
|
+
end
|
|
600
730
|
end
|
|
601
731
|
end
|
|
602
732
|
|
|
@@ -866,6 +996,18 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
866
996
|
expect(result[:redacted]).to eq(2)
|
|
867
997
|
expect(result[:agent_id]).to eq('agent-dead')
|
|
868
998
|
end
|
|
999
|
+
|
|
1000
|
+
it 'clears identity columns on confirmed (redacted) entries' do
|
|
1001
|
+
expect(mock_dataset).to receive(:update).with(
|
|
1002
|
+
hash_including(
|
|
1003
|
+
identity_principal_id: nil,
|
|
1004
|
+
identity_id: nil,
|
|
1005
|
+
identity_canonical_name: nil
|
|
1006
|
+
)
|
|
1007
|
+
).and_return(2)
|
|
1008
|
+
|
|
1009
|
+
host.handle_erasure_request(agent_id: 'agent-dead')
|
|
1010
|
+
end
|
|
869
1011
|
end
|
|
870
1012
|
|
|
871
1013
|
context 'when Sequel raises an error' do
|