legion-llm 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/legion/llm/embeddings.rb +11 -5
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f52899e131c7b40d4dba1576584fa0e9709022db8c83dfc6c2bf21044c58efe4
|
|
4
|
+
data.tar.gz: a45b7ba54bd018efbdaa61c1b13d53608560c432cb804ccb565aea5e99e0087e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ea38c1ca6a6298e7aae10524afa822a2e85a83d0958ea8c830cf046470a3260db3605a0a59fed85bca08939a1914c86e226e306a11bbb602ffb6b6b13cfa6181
|
|
7
|
+
data.tar.gz: a57924ff5d697c34160d1fb5aa4ce515f55a7e0f52e5bab8d740bcbe31bfb6a947d475e44404f3f84b8078441df6087f1b8f466d25a0eb6e937781c601c99b7b
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.6.2] - 2026-03-31
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- Reduce `OLLAMA_CONTEXT_CHARS` from 2048 to 1400 for 512-token models (mxbai-embed-large, bge-large, snowflake-arctic-embed) to account for real tokenization ratios (~3 chars/token)
|
|
9
|
+
- `generate_ollama` now catches context-length rejections and retries with chunking at 60% char limit instead of failing over to a potentially broken provider
|
|
10
|
+
|
|
5
11
|
## [0.6.1] - 2026-03-31
|
|
6
12
|
|
|
7
13
|
### Added
|
|
@@ -17,12 +17,12 @@ module Legion
|
|
|
17
17
|
TARGET_DIMENSION = 1024
|
|
18
18
|
|
|
19
19
|
OLLAMA_CONTEXT_CHARS = {
|
|
20
|
-
'mxbai-embed-large' =>
|
|
21
|
-
'bge-large' =>
|
|
22
|
-
'snowflake-arctic-embed' =>
|
|
23
|
-
'nomic-embed-text' =>
|
|
20
|
+
'mxbai-embed-large' => 1400,
|
|
21
|
+
'bge-large' => 1400,
|
|
22
|
+
'snowflake-arctic-embed' => 1400,
|
|
23
|
+
'nomic-embed-text' => 24_000
|
|
24
24
|
}.freeze
|
|
25
|
-
OLLAMA_DEFAULT_CONTEXT_CHARS =
|
|
25
|
+
OLLAMA_DEFAULT_CONTEXT_CHARS = 1400
|
|
26
26
|
|
|
27
27
|
PREFIX_REGISTRY = {
|
|
28
28
|
'nomic-embed-text' => { document: 'search_document: ', query: 'search_query: ' },
|
|
@@ -219,6 +219,12 @@ module Legion
|
|
|
219
219
|
return dimension_error(model, :ollama, vector) if vector.is_a?(String)
|
|
220
220
|
|
|
221
221
|
{ vector: vector, model: model, provider: :ollama, dimensions: vector&.size || 0, tokens: 0 }
|
|
222
|
+
rescue RuntimeError => e
|
|
223
|
+
raise unless e.message.include?('input length exceeds')
|
|
224
|
+
|
|
225
|
+
reduced = (max_chars * 0.6).to_i
|
|
226
|
+
Legion::Logging.info("Ollama context exceeded, retrying with chunking at #{reduced} chars") if defined?(Legion::Logging)
|
|
227
|
+
generate_ollama_chunked(text: text, model: model, max_chars: reduced)
|
|
222
228
|
end
|
|
223
229
|
|
|
224
230
|
def generate_ollama_chunked(text:, model:, max_chars:)
|
data/lib/legion/llm/version.rb
CHANGED