embedding_util 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +2 -2
- data/lib/embedding_util/profiles.rb +1 -1
- data/lib/embedding_util/providers/endpoint.rb +2 -2
- data/lib/embedding_util/server_manager.rb +4 -4
- data/lib/embedding_util/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d7fe54bc241ead7c4f0aab2f252dfedff61627a1e6cbc3814e02096e455849bb
|
|
4
|
+
data.tar.gz: 540daea5a8b3ac13123b879f4644622bda81697981a4270867510be752bcb3c8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8fd953d5fe00539c084aed32136ab17c3645a401ef768778baa2534b3125f8fcd57771839b11e1d1890896adbc60850bfb82e74c3b07708df04f9efff82eb5b5
|
|
7
|
+
data.tar.gz: 54d4812b7425a12ee64841e43604375267ef25ca1f6d0f9a661fd1e1e4d710fc8208904b9b836a233c10820be624ec248df063ab98cfb81709f8b6c8efd88693
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
## [0.1.3] - 2026-06-10
|
|
2
|
+
|
|
3
|
+
- Set self-hosted reranker `--batch-size` and `--ubatch-size` together
|
|
4
|
+
- Retry managed reranker batch-size failures with both values raised to `4096`
|
|
5
|
+
- Update endpoint guidance to recommend increasing both llama.cpp batch-size settings for app-managed rerankers
|
|
6
|
+
|
|
1
7
|
## [0.1.2] - 2026-06-10
|
|
2
8
|
|
|
3
9
|
- Add self-hosted reranker recovery for llama.cpp physical batch-size failures
|
data/README.md
CHANGED
|
@@ -118,9 +118,9 @@ Reranker model:
|
|
|
118
118
|
|
|
119
119
|
- repo: `ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF`
|
|
120
120
|
- file: `qwen3-reranker-0.6b-q8_0.gguf`
|
|
121
|
-
- server flags: `--reranking --ubatch-size 1024`
|
|
121
|
+
- server flags: `--reranking --batch-size 1024 --ubatch-size 1024`
|
|
122
122
|
|
|
123
|
-
For self-hosted rerankers, `embedding_util` starts with `--ubatch-size 1024`. If a larger rerank request exceeds llama.cpp's physical batch size, the managed reranker is restarted once with `config.reranker_max_ubatch_size`, which defaults to `4096`, and the request is retried.
|
|
123
|
+
For self-hosted rerankers, `embedding_util` starts with `--batch-size 1024 --ubatch-size 1024`. If a larger rerank request exceeds llama.cpp's physical batch size, the managed reranker is restarted once with both values set to `config.reranker_max_ubatch_size`, which defaults to `4096`, and the request is retried.
|
|
124
124
|
|
|
125
125
|
Do not combine embedding and reranking flags for this profile. Run separate local servers.
|
|
126
126
|
|
|
@@ -19,7 +19,7 @@ module EmbeddingUtil
|
|
|
19
19
|
repo: "ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF",
|
|
20
20
|
file: "qwen3-reranker-0.6b-q8_0.gguf",
|
|
21
21
|
model: "qwen3-reranker-0.6b",
|
|
22
|
-
server_flags: ["--reranking", "--ubatch-size", "1024"]
|
|
22
|
+
server_flags: ["--reranking", "--batch-size", "1024", "--ubatch-size", "1024"]
|
|
23
23
|
}
|
|
24
24
|
)
|
|
25
25
|
|
|
@@ -153,8 +153,8 @@ module EmbeddingUtil
|
|
|
153
153
|
message = "#{uri} returned #{response.code}: #{response.body}"
|
|
154
154
|
return message unless reranker_batch_size_error?(path, response.body)
|
|
155
155
|
|
|
156
|
-
"#{message}. Restart the reranker server with
|
|
157
|
-
"embedding_util-managed reranker servers use
|
|
156
|
+
"#{message}. Restart the reranker server with larger llama.cpp --batch-size and --ubatch-size values; " \
|
|
157
|
+
"embedding_util-managed reranker servers use 1024 by default and can retry with 4096."
|
|
158
158
|
end
|
|
159
159
|
|
|
160
160
|
def reranker_batch_size_error?(path, body)
|
|
@@ -127,10 +127,10 @@ module EmbeddingUtil
|
|
|
127
127
|
flags = server_model.settings.fetch(:server_flags)
|
|
128
128
|
return flags unless server_model.capability == :reranker
|
|
129
129
|
|
|
130
|
-
|
|
130
|
+
with_reranker_batch_size(flags, config.reranker_ubatch_size)
|
|
131
131
|
end
|
|
132
132
|
|
|
133
|
-
def
|
|
133
|
+
def with_reranker_batch_size(flags, size)
|
|
134
134
|
filtered = []
|
|
135
135
|
skip_next = false
|
|
136
136
|
flags.each do |flag|
|
|
@@ -139,14 +139,14 @@ module EmbeddingUtil
|
|
|
139
139
|
next
|
|
140
140
|
end
|
|
141
141
|
|
|
142
|
-
if ["--ubatch-size", "-ub"].include?(flag)
|
|
142
|
+
if ["--batch-size", "-b", "--ubatch-size", "-ub"].include?(flag)
|
|
143
143
|
skip_next = true
|
|
144
144
|
next
|
|
145
145
|
end
|
|
146
146
|
|
|
147
147
|
filtered << flag
|
|
148
148
|
end
|
|
149
|
-
filtered + ["--ubatch-size", size.to_s]
|
|
149
|
+
filtered + ["--batch-size", size.to_s, "--ubatch-size", size.to_s]
|
|
150
150
|
end
|
|
151
151
|
|
|
152
152
|
def required_port(host, port)
|