lex-apollo 0.4.25 → 0.4.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/lib/legion/extensions/apollo/helpers/graph_query.rb +20 -1
- data/lib/legion/extensions/apollo/runners/knowledge.rb +105 -40
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/spec/legion/extensions/apollo/helpers/graph_query_spec.rb +31 -0
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +164 -0
- data/spec/spec_helper.rb +1 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 85cccff520bb34332f705695f5193cd55e01bb94fb680bf0a51ad81a04ec3112
|
|
4
|
+
data.tar.gz: b3370806ab160e71f3f213fb4401a6a45aa650778a29574b1cc553a9ad34d167
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d45dd95922afbff6cf87391eca84b658359fc3b329f80cfd9404054e2e5fc7cff5b398d2b065f407ab9bff3141ba87b9f3d30e0c980e6c4625231edbf591bc18
|
|
7
|
+
data.tar.gz: 47183b00644bae505cd55c1abad707c5c86d901a38472d186bb5601eb3ec4fa0d2452715a667b3f7ebd0081d00ccb3fc3d6d6fabab61e7d146fdb2890deca7f2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.27] - 2026-05-15
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `handle_ingest` now accepts and persists `access_scope`, `identity_principal_id`, `identity_id`, and `identity_canonical_name` to `apollo_entries`. These kwargs are injected automatically by `legion-apollo`'s identity middleware; no callers need updating. Defaults to `access_scope: 'global'` for backward compatibility.
|
|
7
|
+
- `build_semantic_search_sql` accepts an optional `requesting_principal_id:` kwarg and adds an access-scope SQL filter when provided: `global` entries are always visible, `team` entries are visible when the requesting principal shares a group membership with the submitter, and `private` entries are visible only to the owning principal (checked via both `identity_principal_id` FK and an `identities` subquery for multi-provider auth).
|
|
8
|
+
- `handle_query` and `retrieve_relevant` both accept and forward `requesting_principal_id:` to the SQL builder — covers the GAIA path (`handle_query`) and the direct-call path (`retrieve_relevant`).
|
|
9
|
+
- Browse-mode fallback in `handle_query` (`list_entries_chronologically`) also applies the same access-scope filter when `requesting_principal_id:` is provided.
|
|
10
|
+
- Private-entry dedup guard in `active_duplicate_for_hash`: when `access_scope: 'private'` and `identity_principal_id` is set, the dedup query is scoped per-principal so two different principals writing identical content each get their own row.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- `handle_erasure_request` now clears `identity_principal_id`, `identity_id`, and `identity_canonical_name` on confirmed (redacted) entries in addition to the existing `source_agent`/`source_provider`/`source_channel` redaction — GDPR right-to-erasure compliance gap.
|
|
14
|
+
|
|
15
|
+
## [0.4.26] - 2026-05-11
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- Handle `Sequel::UniqueConstraintViolation` in `create_candidate_entry` gracefully — a race condition during concurrent knowledge ingestion can cause two threads to pass the content_hash dedup check simultaneously and both attempt to insert the same row. On collision, the rescue block now looks up the existing winner row by content_hash (excluding archived) and returns its ID so the caller continues normally (access log, contradiction detection, etc.) instead of propagating a database error.
|
|
19
|
+
- Added `Sequel::UniqueConstraintViolation` stub to the test-only Sequel shim so the race-condition rescue path is exercisable in unit tests without a live database.
|
|
20
|
+
|
|
3
21
|
## [0.4.25] - 2026-05-08
|
|
4
22
|
|
|
5
23
|
### Fixed
|
|
@@ -52,7 +52,9 @@ module Legion
|
|
|
52
52
|
SQL
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
|
|
55
|
+
def build_semantic_search_sql(limit: default_query_limit, min_confidence: default_query_min_confidence,
|
|
56
|
+
statuses: nil, tags: nil, domain: nil,
|
|
57
|
+
requesting_principal_id: nil, **)
|
|
56
58
|
conditions = ["e.confidence >= #{min_confidence}"]
|
|
57
59
|
|
|
58
60
|
if statuses&.any?
|
|
@@ -67,6 +69,23 @@ module Legion
|
|
|
67
69
|
|
|
68
70
|
conditions << "e.knowledge_domain = '#{domain}'" if domain
|
|
69
71
|
|
|
72
|
+
if requesting_principal_id
|
|
73
|
+
pid = requesting_principal_id.to_i
|
|
74
|
+
conditions << <<~SCOPE_SQL.strip
|
|
75
|
+
(e.access_scope = 'global'
|
|
76
|
+
OR (e.access_scope = 'private'
|
|
77
|
+
AND (e.identity_principal_id = #{pid}
|
|
78
|
+
OR e.identity_id IN (SELECT id FROM identities WHERE principal_id = #{pid})))
|
|
79
|
+
OR (e.access_scope = 'team'
|
|
80
|
+
AND EXISTS (
|
|
81
|
+
SELECT 1 FROM identity_group_memberships igm1
|
|
82
|
+
JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id
|
|
83
|
+
WHERE igm1.principal_id = #{pid}
|
|
84
|
+
AND igm2.principal_id = e.identity_principal_id
|
|
85
|
+
)))
|
|
86
|
+
SCOPE_SQL
|
|
87
|
+
end
|
|
88
|
+
|
|
70
89
|
where_clause = conditions.join(' AND ')
|
|
71
90
|
|
|
72
91
|
<<~SQL
|
|
@@ -82,7 +82,11 @@ module Legion
|
|
|
82
82
|
}
|
|
83
83
|
end
|
|
84
84
|
|
|
85
|
-
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown',
|
|
85
|
+
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', # rubocop:disable Metrics/ParameterLists
|
|
86
|
+
source_provider: nil, source_channel: nil, knowledge_domain: nil,
|
|
87
|
+
submitted_by: nil, submitted_from: nil, content_hash: nil, context: {},
|
|
88
|
+
skip: false, access_scope: 'global', identity_principal_id: nil,
|
|
89
|
+
identity_id: nil, identity_canonical_name: nil, **)
|
|
86
90
|
return { status: :skipped } if skip
|
|
87
91
|
|
|
88
92
|
content = normalize_text_input(content)
|
|
@@ -92,7 +96,8 @@ module Legion
|
|
|
92
96
|
return early_error if early_error
|
|
93
97
|
|
|
94
98
|
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
95
|
-
existing = active_duplicate_for_hash(hash
|
|
99
|
+
existing = active_duplicate_for_hash(hash, access_scope: access_scope,
|
|
100
|
+
identity_principal_id: identity_principal_id)
|
|
96
101
|
if existing
|
|
97
102
|
log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
|
|
98
103
|
return { success: true, entry_id: existing.id, deduped: true }
|
|
@@ -102,7 +107,11 @@ module Legion
|
|
|
102
107
|
content_type_sym = content_type.to_s
|
|
103
108
|
metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
|
|
104
109
|
source_provider: source_provider, source_channel: source_channel,
|
|
105
|
-
submitted_by: submitted_by, submitted_from: submitted_from
|
|
110
|
+
submitted_by: submitted_by, submitted_from: submitted_from,
|
|
111
|
+
access_scope: access_scope,
|
|
112
|
+
identity_principal_id: identity_principal_id,
|
|
113
|
+
identity_id: identity_id,
|
|
114
|
+
identity_canonical_name: identity_canonical_name)
|
|
106
115
|
|
|
107
116
|
corroborated, existing_id = find_corroboration(
|
|
108
117
|
embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
|
|
@@ -133,7 +142,10 @@ module Legion
|
|
|
133
142
|
{ success: false, error: e.message }
|
|
134
143
|
end
|
|
135
144
|
|
|
136
|
-
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit,
|
|
145
|
+
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, # rubocop:disable Metrics/CyclomaticComplexity, Metrics/ParameterLists
|
|
146
|
+
min_confidence: Helpers::GraphQuery.default_query_min_confidence,
|
|
147
|
+
status: UNSET, tags: nil, domain: nil, agent_id: 'unknown',
|
|
148
|
+
requesting_principal_id: nil, **)
|
|
137
149
|
return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
|
|
138
150
|
|
|
139
151
|
entry_model = Helpers::DataModels.apollo_entry
|
|
@@ -143,19 +155,22 @@ module Legion
|
|
|
143
155
|
log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
144
156
|
if browse_query?(query)
|
|
145
157
|
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
146
|
-
status_defaulted: status_defaulted, tags: tags, domain: domain
|
|
158
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain,
|
|
159
|
+
requesting_principal_id: requesting_principal_id)
|
|
147
160
|
end
|
|
148
161
|
|
|
149
162
|
embedding = embed_text(query)
|
|
150
163
|
if embedding.nil?
|
|
151
164
|
log.warn('Apollo Knowledge.handle_query embedding unavailable; falling back to browse query')
|
|
152
165
|
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
153
|
-
status_defaulted: status_defaulted, tags: tags, domain: domain
|
|
166
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain,
|
|
167
|
+
requesting_principal_id: requesting_principal_id)
|
|
154
168
|
end
|
|
155
169
|
|
|
156
170
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
157
171
|
limit: limit, min_confidence: min_confidence,
|
|
158
|
-
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
|
|
172
|
+
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain,
|
|
173
|
+
requesting_principal_id: requesting_principal_id
|
|
159
174
|
)
|
|
160
175
|
|
|
161
176
|
db = entry_model.db
|
|
@@ -251,7 +266,9 @@ module Legion
|
|
|
251
266
|
{ success: false, error: e.message }
|
|
252
267
|
end
|
|
253
268
|
|
|
254
|
-
def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
|
|
269
|
+
def retrieve_relevant(query: nil, limit: Helpers::Confidence.apollo_setting(:query, :retrieval_limit, default: 5),
|
|
270
|
+
min_confidence: Helpers::GraphQuery.default_query_min_confidence,
|
|
271
|
+
tags: nil, domain: nil, skip: false, requesting_principal_id: nil, **)
|
|
255
272
|
return { status: :skipped } if skip
|
|
256
273
|
|
|
257
274
|
return { success: false, error: 'apollo_data_not_available' } unless Helpers::DataModels.apollo_entry_available?
|
|
@@ -268,7 +285,8 @@ module Legion
|
|
|
268
285
|
|
|
269
286
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
270
287
|
limit: limit, min_confidence: min_confidence,
|
|
271
|
-
statuses: %w[confirmed candidate], tags: tags, domain: domain
|
|
288
|
+
statuses: %w[confirmed candidate], tags: tags, domain: domain,
|
|
289
|
+
requesting_principal_id: requesting_principal_id
|
|
272
290
|
)
|
|
273
291
|
|
|
274
292
|
db = Helpers::DataModels.apollo_entry.db
|
|
@@ -341,10 +359,16 @@ module Legion
|
|
|
341
359
|
.exclude(status: 'confirmed')
|
|
342
360
|
.delete
|
|
343
361
|
|
|
344
|
-
# Redact attribution on confirmed entries (corroborated, retain knowledge)
|
|
345
362
|
redacted = conn[:apollo_entries]
|
|
346
363
|
.where(source_agent: agent_id, status: 'confirmed')
|
|
347
|
-
.update(
|
|
364
|
+
.update(
|
|
365
|
+
source_agent: 'redacted',
|
|
366
|
+
source_provider: nil,
|
|
367
|
+
source_channel: nil,
|
|
368
|
+
identity_principal_id: nil,
|
|
369
|
+
identity_id: nil,
|
|
370
|
+
identity_canonical_name: nil
|
|
371
|
+
)
|
|
348
372
|
|
|
349
373
|
{ deleted: deleted, redacted: redacted, agent_id: agent_id }
|
|
350
374
|
.tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
|
|
@@ -425,57 +449,86 @@ module Legion
|
|
|
425
449
|
normalize_text_input(value)[0, max_length]
|
|
426
450
|
end
|
|
427
451
|
|
|
428
|
-
def active_duplicate_for_hash(hash)
|
|
452
|
+
def active_duplicate_for_hash(hash, access_scope: nil, identity_principal_id: nil)
|
|
429
453
|
return nil unless hash
|
|
430
454
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
455
|
+
dataset = Helpers::DataModels.apollo_entry
|
|
456
|
+
.where(content_hash: hash)
|
|
457
|
+
.exclude(status: 'archived')
|
|
458
|
+
|
|
459
|
+
dataset = dataset.where(identity_principal_id: identity_principal_id) if access_scope == 'private' && identity_principal_id
|
|
460
|
+
|
|
461
|
+
existing = dataset.first
|
|
435
462
|
existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
436
463
|
log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
|
|
437
464
|
existing
|
|
438
465
|
end
|
|
439
466
|
|
|
440
|
-
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:,
|
|
467
|
+
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, # rubocop:disable Metrics/ParameterLists
|
|
468
|
+
submitted_by:, submitted_from:, access_scope: 'global',
|
|
469
|
+
identity_principal_id: nil, identity_id: nil, identity_canonical_name: nil)
|
|
441
470
|
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
442
471
|
agent = truncate_for_column(source_agent, 50) || 'unknown'
|
|
443
472
|
|
|
444
|
-
{ tags:
|
|
445
|
-
domain:
|
|
446
|
-
source_agent:
|
|
447
|
-
source_provider:
|
|
448
|
-
source_channel:
|
|
449
|
-
submitted_by:
|
|
450
|
-
submitted_from:
|
|
473
|
+
{ tags: tag_array,
|
|
474
|
+
domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
|
|
475
|
+
source_agent: agent,
|
|
476
|
+
source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
|
|
477
|
+
source_channel: truncate_for_column(source_channel, 100),
|
|
478
|
+
submitted_by: truncate_for_column(submitted_by, 255),
|
|
479
|
+
submitted_from: truncate_for_column(submitted_from, 255),
|
|
480
|
+
access_scope: access_scope || 'global',
|
|
481
|
+
identity_principal_id: identity_principal_id.is_a?(Integer) ? identity_principal_id : nil,
|
|
482
|
+
identity_id: identity_id.is_a?(Integer) ? identity_id : nil,
|
|
483
|
+
identity_canonical_name: identity_canonical_name&.to_s&.slice(0, 255) }
|
|
451
484
|
end
|
|
452
485
|
|
|
453
486
|
def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
|
|
454
487
|
new_entry = Helpers::DataModels.apollo_entry.create(
|
|
455
|
-
content:
|
|
456
|
-
content_type:
|
|
457
|
-
confidence:
|
|
458
|
-
source_agent:
|
|
459
|
-
source_provider:
|
|
460
|
-
source_channel:
|
|
461
|
-
source_context:
|
|
462
|
-
tags:
|
|
463
|
-
status:
|
|
464
|
-
knowledge_domain:
|
|
465
|
-
submitted_by:
|
|
466
|
-
submitted_from:
|
|
467
|
-
content_hash:
|
|
468
|
-
|
|
488
|
+
content: content,
|
|
489
|
+
content_type: content_type,
|
|
490
|
+
confidence: Helpers::Confidence.initial_confidence,
|
|
491
|
+
source_agent: metadata[:source_agent],
|
|
492
|
+
source_provider: metadata[:source_provider],
|
|
493
|
+
source_channel: metadata[:source_channel],
|
|
494
|
+
source_context: json_dump(context.is_a?(Hash) ? context : {}),
|
|
495
|
+
tags: Sequel.pg_array(metadata[:tags]),
|
|
496
|
+
status: 'candidate',
|
|
497
|
+
knowledge_domain: metadata[:domain],
|
|
498
|
+
submitted_by: metadata[:submitted_by],
|
|
499
|
+
submitted_from: metadata[:submitted_from],
|
|
500
|
+
content_hash: content_hash,
|
|
501
|
+
access_scope: metadata[:access_scope] || 'global',
|
|
502
|
+
identity_principal_id: metadata[:identity_principal_id],
|
|
503
|
+
identity_id: metadata[:identity_id],
|
|
504
|
+
identity_canonical_name: metadata[:identity_canonical_name],
|
|
505
|
+
embedding: embedding ? Sequel.lit("'[#{embedding.join(',')}]'::vector") : nil
|
|
469
506
|
)
|
|
470
|
-
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
|
|
507
|
+
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]} access_scope=#{metadata[:access_scope]}") # rubocop:disable Layout/LineLength
|
|
471
508
|
new_entry.id
|
|
509
|
+
rescue Sequel::UniqueConstraintViolation => e
|
|
510
|
+
# Race condition: another thread/process inserted the same content_hash between our
|
|
511
|
+
# dedup check and this insert. Fetch and return the winner's id so the caller can
|
|
512
|
+
# continue normally (access log, contradiction detection, etc.).
|
|
513
|
+
winner = Helpers::DataModels.apollo_entry
|
|
514
|
+
.where(content_hash: content_hash)
|
|
515
|
+
.exclude(status: 'archived')
|
|
516
|
+
.first
|
|
517
|
+
if winner
|
|
518
|
+
log.warn("Apollo Knowledge.create_candidate_entry race_dedup entry_id=#{winner.id} content_hash=#{content_hash} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
|
|
519
|
+
winner.id
|
|
520
|
+
else
|
|
521
|
+
handle_exception(e, level: :warn, handled: true, operation: 'apollo.knowledge.create_candidate_entry',
|
|
522
|
+
content_hash: content_hash)
|
|
523
|
+
nil
|
|
524
|
+
end
|
|
472
525
|
end
|
|
473
526
|
|
|
474
527
|
def browse_query?(query)
|
|
475
528
|
query.to_s.strip.length < 3
|
|
476
529
|
end
|
|
477
530
|
|
|
478
|
-
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
|
|
531
|
+
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:, requesting_principal_id: nil)
|
|
479
532
|
log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
480
533
|
dataset = Helpers::DataModels.apollo_entry.exclude(status: 'archived')
|
|
481
534
|
requested = Array(status).map(&:to_s).reject(&:empty?)
|
|
@@ -483,6 +536,18 @@ module Legion
|
|
|
483
536
|
dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
|
|
484
537
|
dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
|
|
485
538
|
|
|
539
|
+
if requesting_principal_id
|
|
540
|
+
pid = requesting_principal_id.to_i
|
|
541
|
+
dataset = dataset.where(
|
|
542
|
+
Sequel.lit(
|
|
543
|
+
"(access_scope = 'global' " \
|
|
544
|
+
"OR (access_scope = 'private' AND (identity_principal_id = ? OR identity_id IN (SELECT id FROM identities WHERE principal_id = ?))) " \
|
|
545
|
+
"OR (access_scope = 'team' AND EXISTS (SELECT 1 FROM identity_group_memberships igm1 JOIN identity_group_memberships igm2 ON igm1.group_id = igm2.group_id WHERE igm1.principal_id = ? AND igm2.principal_id = identity_principal_id)))", # rubocop:disable Layout/LineLength
|
|
546
|
+
pid, pid, pid
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
end
|
|
550
|
+
|
|
486
551
|
entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
|
|
487
552
|
format_entry(entry.is_a?(Hash) ? entry : entry.values)
|
|
488
553
|
end
|
|
@@ -65,5 +65,36 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::GraphQuery do
|
|
|
65
65
|
expect(sql).to include("'dns'")
|
|
66
66
|
expect(sql).to include('&&')
|
|
67
67
|
end
|
|
68
|
+
|
|
69
|
+
context 'without requesting_principal_id' do
|
|
70
|
+
it 'includes no access_scope filter' do
|
|
71
|
+
sql = described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3)
|
|
72
|
+
expect(sql).not_to include('access_scope')
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
context 'with requesting_principal_id' do
|
|
77
|
+
let(:sql) { described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3, requesting_principal_id: 42) }
|
|
78
|
+
|
|
79
|
+
it 'allows global entries unconditionally' do
|
|
80
|
+
expect(sql).to include("access_scope = 'global'")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it 'allows private entries owned by the principal via principal FK' do
|
|
84
|
+
expect(sql).to include('e.identity_principal_id = 42')
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'allows private entries owned by the principal via identity subquery (multi-provider auth)' do
|
|
88
|
+
expect(sql).to include('SELECT id FROM identities WHERE principal_id = 42')
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'allows team entries when principal shares a group with the submitter' do
|
|
92
|
+
expect(sql).to include('identity_group_memberships')
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it 'wraps the entire access_scope condition in parentheses so it does not break AND chaining' do
|
|
96
|
+
expect(sql).to match(/AND \(.*access_scope.*\)/m)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
68
99
|
end
|
|
69
100
|
end
|
|
@@ -321,6 +321,30 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
321
321
|
expect(result[:deduped]).to be true
|
|
322
322
|
expect(result[:entry_id]).to eq('uuid-existing')
|
|
323
323
|
end
|
|
324
|
+
|
|
325
|
+
it 'recovers gracefully when a concurrent ingest wins the content_hash unique constraint race' do
|
|
326
|
+
# Simulate: dedup check passes (nil — no existing entry yet), then .create
|
|
327
|
+
# raises UniqueConstraintViolation (another thread inserted between check and insert).
|
|
328
|
+
# create_candidate_entry must rescue and return the existing entry's id so the
|
|
329
|
+
# caller succeeds rather than propagating a database error.
|
|
330
|
+
race_entry = double('race_entry', id: 'uuid-race-winner')
|
|
331
|
+
|
|
332
|
+
allow(mock_entry_class).to receive(:create)
|
|
333
|
+
.and_raise(Sequel::UniqueConstraintViolation, 'duplicate key value violates unique constraint "idx_apollo_content_hash"')
|
|
334
|
+
|
|
335
|
+
collision_dataset = double('collision_dataset')
|
|
336
|
+
allow(mock_entry_class).to receive(:where).with(content_hash: anything).and_return(collision_dataset)
|
|
337
|
+
allow(collision_dataset).to receive(:exclude).with(status: 'archived').and_return(collision_dataset)
|
|
338
|
+
# First call: dedup pre-check returns nil (not yet in DB).
|
|
339
|
+
# Second call: post-collision lookup returns the winner inserted by the other thread.
|
|
340
|
+
allow(collision_dataset).to receive(:first).and_return(nil, race_entry)
|
|
341
|
+
|
|
342
|
+
result = host.handle_ingest(content: 'concurrent content', content_type: 'fact',
|
|
343
|
+
source_agent: 'agent-1',
|
|
344
|
+
content_hash: 'd3861b2862454c5a6a9e480829333841')
|
|
345
|
+
expect(result[:success]).to be true
|
|
346
|
+
expect(result[:entry_id]).to eq('uuid-race-winner')
|
|
347
|
+
end
|
|
324
348
|
end
|
|
325
349
|
end
|
|
326
350
|
|
|
@@ -347,6 +371,95 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
347
371
|
end
|
|
348
372
|
end
|
|
349
373
|
|
|
374
|
+
context 'identity kwargs persistence' do
|
|
375
|
+
let(:mock_entry_class2) { double('ApolloEntry2') }
|
|
376
|
+
let(:mock_expertise_class2) { double('ApolloExpertise2') }
|
|
377
|
+
let(:mock_access_log_class2) { double('ApolloAccessLog2') }
|
|
378
|
+
let(:mock_entry2) { double('entry2', id: 99, embedding: nil) }
|
|
379
|
+
|
|
380
|
+
before do
|
|
381
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class2)
|
|
382
|
+
stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class2)
|
|
383
|
+
stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class2)
|
|
384
|
+
scoped_ds = double('scoped_ds2', first: nil, where: double('scoped_ds2b', first: nil))
|
|
385
|
+
allow(mock_entry_class2).to receive(:where).and_return(scoped_ds)
|
|
386
|
+
allow(scoped_ds).to receive(:exclude).and_return(scoped_ds)
|
|
387
|
+
allow(mock_expertise_class2).to receive(:where).and_return(double(first: nil))
|
|
388
|
+
allow(mock_expertise_class2).to receive(:create)
|
|
389
|
+
allow(mock_access_log_class2).to receive(:create)
|
|
390
|
+
allow(host).to receive(:embed_text).and_return(nil)
|
|
391
|
+
allow(host).to receive(:find_corroboration).and_return([false, nil])
|
|
392
|
+
allow(host).to receive(:detect_contradictions).and_return([])
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
it 'passes identity_principal_id and access_scope through to create_candidate_entry' do
|
|
396
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
397
|
+
hash_including(identity_principal_id: 42, access_scope: 'private')
|
|
398
|
+
).and_return(mock_entry2)
|
|
399
|
+
|
|
400
|
+
host.handle_ingest(
|
|
401
|
+
content: 'test fact', tags: [],
|
|
402
|
+
identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
|
|
403
|
+
access_scope: 'private'
|
|
404
|
+
)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
it 'defaults access_scope to global when not provided' do
|
|
408
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
409
|
+
hash_including(access_scope: 'global')
|
|
410
|
+
).and_return(mock_entry2)
|
|
411
|
+
|
|
412
|
+
host.handle_ingest(content: 'test fact', tags: [])
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
it 'persists identity_id and identity_canonical_name' do
|
|
416
|
+
expect(mock_entry_class2).to receive(:create).with(
|
|
417
|
+
hash_including(identity_id: 7, identity_canonical_name: 'alice')
|
|
418
|
+
).and_return(mock_entry2)
|
|
419
|
+
|
|
420
|
+
host.handle_ingest(
|
|
421
|
+
content: 'test fact', tags: [],
|
|
422
|
+
identity_principal_id: 42, identity_id: 7, identity_canonical_name: 'alice',
|
|
423
|
+
access_scope: 'private'
|
|
424
|
+
)
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
context 'dedup: private entries from different principals are not deduplicated' do
|
|
429
|
+
let(:mock_entry_class3) { double('ApolloEntry3') }
|
|
430
|
+
let(:mock_expertise_class3) { double('ApolloExpertise3') }
|
|
431
|
+
let(:mock_access_log_class3) { double('ApolloAccessLog3') }
|
|
432
|
+
let(:mock_entry_a) { double('entry_a', id: 7, embedding: nil) }
|
|
433
|
+
let(:mock_entry_b) { double('entry_b', id: 8, embedding: nil) }
|
|
434
|
+
|
|
435
|
+
before do
|
|
436
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class3)
|
|
437
|
+
stub_const('Legion::Data::Model::ApolloExpertise', mock_expertise_class3)
|
|
438
|
+
stub_const('Legion::Data::Model::ApolloAccessLog', mock_access_log_class3)
|
|
439
|
+
allow(mock_expertise_class3).to receive(:where).and_return(double(first: nil))
|
|
440
|
+
allow(mock_expertise_class3).to receive(:create)
|
|
441
|
+
allow(mock_access_log_class3).to receive(:create)
|
|
442
|
+
allow(host).to receive(:embed_text).and_return(nil)
|
|
443
|
+
allow(host).to receive(:find_corroboration).and_return([false, nil])
|
|
444
|
+
allow(host).to receive(:detect_contradictions).and_return([])
|
|
445
|
+
# default dedup returns nil (no duplicate) — supports chained .where for private scope
|
|
446
|
+
scoped_ds3 = double('dedup_chain3', first: nil)
|
|
447
|
+
allow(scoped_ds3).to receive(:where).and_return(double('scoped_ds3b', first: nil))
|
|
448
|
+
allow(scoped_ds3).to receive(:exclude).and_return(scoped_ds3)
|
|
449
|
+
allow(mock_entry_class3).to receive(:where).and_return(scoped_ds3)
|
|
450
|
+
# two different principals each get their own entry
|
|
451
|
+
allow(mock_entry_class3).to receive(:create).and_return(mock_entry_a, mock_entry_b)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
it 'does not deduplicate private entries from different principals' do
|
|
455
|
+
result1 = host.handle_ingest(content: 'same fact', content_type: :observation,
|
|
456
|
+
access_scope: 'private', identity_principal_id: 1)
|
|
457
|
+
result2 = host.handle_ingest(content: 'same fact', content_type: :observation,
|
|
458
|
+
access_scope: 'private', identity_principal_id: 2)
|
|
459
|
+
expect(result1[:entry_id]).not_to eq(result2[:entry_id])
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
|
|
350
463
|
context 'early-return warn logs' do
|
|
351
464
|
let(:logger) { instance_double('Logger', debug: nil, info: nil, warn: nil) }
|
|
352
465
|
|
|
@@ -497,6 +610,37 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
497
610
|
expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
|
|
498
611
|
end
|
|
499
612
|
end
|
|
613
|
+
|
|
614
|
+
context 'access scope forwarding' do
|
|
615
|
+
let(:mock_entry_class) { double('ApolloEntry') }
|
|
616
|
+
let(:mock_db) { double('db') }
|
|
617
|
+
|
|
618
|
+
before do
|
|
619
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
|
|
620
|
+
allow(Legion::LLM::Call::Embeddings).to receive(:generate)
|
|
621
|
+
.and_return({ vector: Array.new(1024, 0.1), model: 'test', provider: :ollama, dimensions: 1024, tokens: 0 })
|
|
622
|
+
allow(mock_entry_class).to receive(:db).and_return(mock_db)
|
|
623
|
+
allow(mock_db).to receive(:fetch).and_return(double(all: []))
|
|
624
|
+
allow(mock_entry_class).to receive(:where).and_return(double(update: true))
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
it 'forwards requesting_principal_id to build_semantic_search_sql' do
|
|
628
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
629
|
+
hash_including(requesting_principal_id: 42)
|
|
630
|
+
).and_return('SELECT 1')
|
|
631
|
+
allow(mock_db).to receive(:fetch).and_return(double(all: []))
|
|
632
|
+
|
|
633
|
+
host.handle_query(query: 'test', requesting_principal_id: 42)
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
it 'passes nil requesting_principal_id when not provided (no filter)' do
|
|
637
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
638
|
+
hash_including(requesting_principal_id: nil)
|
|
639
|
+
).and_return('SELECT 1')
|
|
640
|
+
|
|
641
|
+
host.handle_query(query: 'test')
|
|
642
|
+
end
|
|
643
|
+
end
|
|
500
644
|
end
|
|
501
645
|
|
|
502
646
|
describe '#normalize_text_input' do
|
|
@@ -573,6 +717,14 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
573
717
|
).and_call_original
|
|
574
718
|
host.retrieve_relevant(query: 'treatment', domain: 'clinical')
|
|
575
719
|
end
|
|
720
|
+
|
|
721
|
+
it 'forwards requesting_principal_id to build_semantic_search_sql' do
|
|
722
|
+
expect(Legion::Extensions::Apollo::Helpers::GraphQuery).to receive(:build_semantic_search_sql).with(
|
|
723
|
+
hash_including(requesting_principal_id: 7)
|
|
724
|
+
).and_return('SELECT 1')
|
|
725
|
+
|
|
726
|
+
host.retrieve_relevant(query: 'test', requesting_principal_id: 7)
|
|
727
|
+
end
|
|
576
728
|
end
|
|
577
729
|
end
|
|
578
730
|
|
|
@@ -842,6 +994,18 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
842
994
|
expect(result[:redacted]).to eq(2)
|
|
843
995
|
expect(result[:agent_id]).to eq('agent-dead')
|
|
844
996
|
end
|
|
997
|
+
|
|
998
|
+
it 'clears identity columns on confirmed (redacted) entries' do
|
|
999
|
+
expect(mock_dataset).to receive(:update).with(
|
|
1000
|
+
hash_including(
|
|
1001
|
+
identity_principal_id: nil,
|
|
1002
|
+
identity_id: nil,
|
|
1003
|
+
identity_canonical_name: nil
|
|
1004
|
+
)
|
|
1005
|
+
).and_return(2)
|
|
1006
|
+
|
|
1007
|
+
host.handle_erasure_request(agent_id: 'agent-dead')
|
|
1008
|
+
end
|
|
845
1009
|
end
|
|
846
1010
|
|
|
847
1011
|
context 'when Sequel raises an error' do
|
data/spec/spec_helper.rb
CHANGED