legion-llm 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bca23ecab222b8337fa88d7b2c62558334a547f24a86eb5cfb2a91e19cdddaaa
4
- data.tar.gz: e6ef75f61a86d8a6e6bb1ed73a775180d2a70e3c5e42dbacb66ca43762b95dac
3
+ metadata.gz: f52899e131c7b40d4dba1576584fa0e9709022db8c83dfc6c2bf21044c58efe4
4
+ data.tar.gz: a45b7ba54bd018efbdaa61c1b13d53608560c432cb804ccb565aea5e99e0087e
5
5
  SHA512:
6
- metadata.gz: 4e5d23bc887ceffee4e66212c45b83211deb7ad7d2da73fe199701ca894175cafe6245db62d4a330e2f2c3642e2b33d8d9e7d1751c8ff43294c473cefd876f18
7
- data.tar.gz: ed094dfd2c7ff7771a9d8eeb0ecd05114ebe49cf79e402e5c69cf4f573746b0c53d51ad7fe5ba47cbd18fe3a9fe8d91b059cd73f399b3d082a1123dac3414485
6
+ metadata.gz: ea38c1ca6a6298e7aae10524afa822a2e85a83d0958ea8c830cf046470a3260db3605a0a59fed85bca08939a1914c86e226e306a11bbb602ffb6b6b13cfa6181
7
+ data.tar.gz: a57924ff5d697c34160d1fb5aa4ce515f55a7e0f52e5bab8d740bcbe31bfb6a947d475e44404f3f84b8078441df6087f1b8f466d25a0eb6e937781c601c99b7b
data/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.6.2] - 2026-03-31
6
+
7
+ ### Fixed
8
+ - Reduce `OLLAMA_CONTEXT_CHARS` from 2048 to 1400 for 512-token models (mxbai-embed-large, bge-large, snowflake-arctic-embed) to account for real tokenization ratios (~3 chars/token)
9
+ - `generate_ollama` now catches context-length rejections and retries with chunking at 60% char limit instead of failing over to a potentially broken provider
10
+
5
11
  ## [0.6.1] - 2026-03-31
6
12
 
7
13
  ### Added
@@ -17,12 +17,12 @@ module Legion
17
17
  TARGET_DIMENSION = 1024
18
18
 
19
19
  OLLAMA_CONTEXT_CHARS = {
20
- 'mxbai-embed-large' => 2048,
21
- 'bge-large' => 2048,
22
- 'snowflake-arctic-embed' => 2048,
23
- 'nomic-embed-text' => 32_768
20
+ 'mxbai-embed-large' => 1400,
21
+ 'bge-large' => 1400,
22
+ 'snowflake-arctic-embed' => 1400,
23
+ 'nomic-embed-text' => 24_000
24
24
  }.freeze
25
- OLLAMA_DEFAULT_CONTEXT_CHARS = 2048
25
+ OLLAMA_DEFAULT_CONTEXT_CHARS = 1400
26
26
 
27
27
  PREFIX_REGISTRY = {
28
28
  'nomic-embed-text' => { document: 'search_document: ', query: 'search_query: ' },
@@ -219,6 +219,12 @@ module Legion
219
219
  return dimension_error(model, :ollama, vector) if vector.is_a?(String)
220
220
 
221
221
  { vector: vector, model: model, provider: :ollama, dimensions: vector&.size || 0, tokens: 0 }
222
+ rescue RuntimeError => e
223
+ raise unless e.message.include?('input length exceeds')
224
+
225
+ reduced = (max_chars * 0.6).to_i
226
+ Legion::Logging.info("Ollama context exceeded, retrying with chunking at #{reduced} chars") if defined?(Legion::Logging)
227
+ generate_ollama_chunked(text: text, model: model, max_chars: reduced)
222
228
  end
223
229
 
224
230
  def generate_ollama_chunked(text:, model:, max_chars:)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.6.1'
5
+ VERSION = '0.6.2'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity