embedding_util 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb8276720e51a283fb4b8c70507ae54d25ea968caffe8142f636a6e6ea948b71
4
- data.tar.gz: eccb6e6d11006238a1af1bfc25fb3b5146325bf78a1deb0f447a73541dec128d
3
+ metadata.gz: d7fe54bc241ead7c4f0aab2f252dfedff61627a1e6cbc3814e02096e455849bb
4
+ data.tar.gz: 540daea5a8b3ac13123b879f4644622bda81697981a4270867510be752bcb3c8
5
5
  SHA512:
6
- metadata.gz: e625a2389828218a8f39cc4dc69d6ea965f59e032d190407668029df227f3c7ffee32ce30b64141d8a38654573ac591315c08696f051db3f68772efaf1b7bf34
7
- data.tar.gz: 5da07a5487e7167128b67d8b6e39009d6a2fd4c0949daf1e61ae2cb9e98fa668beecd618713b1d0545daa47a86ac6f4e77f66d3429093c658852b594b4aa3cc5
6
+ metadata.gz: 8fd953d5fe00539c084aed32136ab17c3645a401ef768778baa2534b3125f8fcd57771839b11e1d1890896adbc60850bfb82e74c3b07708df04f9efff82eb5b5
7
+ data.tar.gz: 54d4812b7425a12ee64841e43604375267ef25ca1f6d0f9a661fd1e1e4d710fc8208904b9b836a233c10820be624ec248df063ab98cfb81709f8b6c8efd88693
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [0.1.3] - 2026-06-10
2
+
3
+ - Set self-hosted reranker `--batch-size` and `--ubatch-size` together
4
+ - Retry managed reranker batch-size failures with both values raised to `4096`
5
+ - Update endpoint guidance to recommend increasing both llama.cpp batch-size settings for app-managed rerankers
6
+
1
7
  ## [0.1.2] - 2026-06-10
2
8
 
3
9
  - Add self-hosted reranker recovery for llama.cpp physical batch-size failures
data/README.md CHANGED
@@ -118,9 +118,9 @@ Reranker model:
118
118
 
119
119
  - repo: `ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF`
120
120
  - file: `qwen3-reranker-0.6b-q8_0.gguf`
121
- - server flags: `--reranking --ubatch-size 1024`
121
+ - server flags: `--reranking --batch-size 1024 --ubatch-size 1024`
122
122
 
123
- For self-hosted rerankers, `embedding_util` starts with `--ubatch-size 1024`. If a larger rerank request exceeds llama.cpp's physical batch size, the managed reranker is restarted once with `config.reranker_max_ubatch_size`, which defaults to `4096`, and the request is retried.
123
+ For self-hosted rerankers, `embedding_util` starts with `--batch-size 1024 --ubatch-size 1024`. If a larger rerank request exceeds llama.cpp's physical batch size, the managed reranker is restarted once with both values set to `config.reranker_max_ubatch_size`, which defaults to `4096`, and the request is retried.
124
124
 
125
125
  Do not combine embedding and reranking flags for this profile. Run separate local servers.
126
126
 
@@ -19,7 +19,7 @@ module EmbeddingUtil
19
19
  repo: "ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF",
20
20
  file: "qwen3-reranker-0.6b-q8_0.gguf",
21
21
  model: "qwen3-reranker-0.6b",
22
- server_flags: ["--reranking", "--ubatch-size", "1024"]
22
+ server_flags: ["--reranking", "--batch-size", "1024", "--ubatch-size", "1024"]
23
23
  }
24
24
  )
25
25
 
@@ -153,8 +153,8 @@ module EmbeddingUtil
153
153
  message = "#{uri} returned #{response.code}: #{response.body}"
154
154
  return message unless reranker_batch_size_error?(path, response.body)
155
155
 
156
- "#{message}. Restart the reranker server with a larger llama.cpp --ubatch-size; " \
157
- "embedding_util-managed reranker servers use --ubatch-size 1024 by default."
156
+ "#{message}. Restart the reranker server with larger llama.cpp --batch-size and --ubatch-size values; " \
157
+ "embedding_util-managed reranker servers use 1024 by default and can retry with 4096."
158
158
  end
159
159
 
160
160
  def reranker_batch_size_error?(path, body)
@@ -127,10 +127,10 @@ module EmbeddingUtil
127
127
  flags = server_model.settings.fetch(:server_flags)
128
128
  return flags unless server_model.capability == :reranker
129
129
 
130
- with_ubatch_size(flags, config.reranker_ubatch_size)
130
+ with_reranker_batch_size(flags, config.reranker_ubatch_size)
131
131
  end
132
132
 
133
- def with_ubatch_size(flags, size)
133
+ def with_reranker_batch_size(flags, size)
134
134
  filtered = []
135
135
  skip_next = false
136
136
  flags.each do |flag|
@@ -139,14 +139,14 @@ module EmbeddingUtil
139
139
  next
140
140
  end
141
141
 
142
- if ["--ubatch-size", "-ub"].include?(flag)
142
+ if ["--batch-size", "-b", "--ubatch-size", "-ub"].include?(flag)
143
143
  skip_next = true
144
144
  next
145
145
  end
146
146
 
147
147
  filtered << flag
148
148
  end
149
- filtered + ["--ubatch-size", size.to_s]
149
+ filtered + ["--batch-size", size.to_s, "--ubatch-size", size.to_s]
150
150
  end
151
151
 
152
152
  def required_port(host, port)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EmbeddingUtil
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embedding_util
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - hmdne