lex-knowledge 0.6.9 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 319636e39b5a3f036355f455f03ae585a9902e7510028ae2d37ce88387d09ee9
|
|
4
|
+
data.tar.gz: 7a6c81ac4db6261d2388b3a16b8a6d28dc7b81571d4bff5e49c4cb623ca40576
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 763094b4aad5b43d300823e8c74843cb8e7a199401a5a61cfe308f9d77a3461445d8175e3eb8e0b5599ad28c4c797d77bc37b038464f9ddb93f86918de6b40a9
|
|
7
|
+
data.tar.gz: a4d39884a98025e49509f281dfab9c94501a12c824f4ea08ec20795bea80d36b9bde7ec54c657b965f1b489f36a04f3e48bda6df85471246eb59d42a5bb10c27
|
|
@@ -62,6 +62,9 @@ module Legion
|
|
|
62
62
|
end
|
|
63
63
|
private_class_method :split_section
|
|
64
64
|
|
|
65
|
+
# Hash must match Legion::Extensions::Apollo::Helpers::Writeback.content_hash
|
|
66
|
+
# so knowledge chunks deduplicate consistently with Apollo writeback and still
|
|
67
|
+
# fit older apollo_entries.content_hash columns fixed at MD5 length.
|
|
65
68
|
def build_chunk(section, content, index)
|
|
66
69
|
{
|
|
67
70
|
content: content,
|
|
@@ -70,11 +73,23 @@ module Legion
|
|
|
70
73
|
source_file: section[:source_file],
|
|
71
74
|
token_count: (content.length.to_f / CHARS_PER_TOKEN).ceil,
|
|
72
75
|
chunk_index: index,
|
|
73
|
-
content_hash:
|
|
76
|
+
content_hash: apollo_compatible_content_hash(content)
|
|
74
77
|
}
|
|
75
78
|
end
|
|
76
79
|
private_class_method :build_chunk
|
|
77
80
|
|
|
81
|
+
def apollo_compatible_content_hash(content)
|
|
82
|
+
if defined?(Legion::Extensions::Apollo::Helpers::Writeback)
|
|
83
|
+
Legion::Extensions::Apollo::Helpers::Writeback.content_hash(content)
|
|
84
|
+
else
|
|
85
|
+
# Fallback when apollo isn't loaded - match its MD5+normalize semantics
|
|
86
|
+
# so future apollo-backed lookups still work.
|
|
87
|
+
normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
|
|
88
|
+
::Digest::MD5.hexdigest(normalized)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
private_class_method :apollo_compatible_content_hash
|
|
92
|
+
|
|
78
93
|
def settings_max_tokens
|
|
79
94
|
return nil unless defined?(Legion::Settings)
|
|
80
95
|
|
|
@@ -211,10 +211,24 @@ module Legion
|
|
|
211
211
|
return :created unless defined?(Legion::Extensions::Apollo)
|
|
212
212
|
return :skipped if !force && exists
|
|
213
213
|
|
|
214
|
-
ingest_to_apollo(chunk, embedding)
|
|
214
|
+
result = ingest_to_apollo(chunk, embedding)
|
|
215
|
+
# handle_ingest returns a Hash on both success and failure paths; the upsert
|
|
216
|
+
# status must reflect the actual persistence outcome, not just the `force` flag.
|
|
217
|
+
# Previously any non-raising return was treated as success, producing
|
|
218
|
+
# false-positive :created/:updated responses to callers.
|
|
219
|
+
unless result.is_a?(Hash) && result[:success] == true
|
|
220
|
+
hash_prefix = chunk[:content_hash]&.slice(0, 12)
|
|
221
|
+
content_len = chunk[:content]&.length
|
|
222
|
+
error = result.is_a?(Hash) ? result[:error].inspect : "non-hash result class=#{result.class}"
|
|
223
|
+
log.warn(
|
|
224
|
+
'[knowledge][upsert_chunk] apollo persistence not confirmed ' \
|
|
225
|
+
"error=#{error} chunk_hash=#{hash_prefix} chunk_len=#{content_len}"
|
|
226
|
+
)
|
|
227
|
+
return :skipped
|
|
228
|
+
end
|
|
215
229
|
force ? :updated : :created
|
|
216
230
|
rescue StandardError => e
|
|
217
|
-
log.warn(e.message)
|
|
231
|
+
log.warn("[knowledge][upsert_chunk] unexpected error class=#{e.class} message=#{e.message} chunk_hash=#{chunk[:content_hash]&.slice(0, 12)}")
|
|
218
232
|
:skipped
|
|
219
233
|
end
|
|
220
234
|
private_class_method :upsert_chunk_with_embedding
|