parse-stack-next 4.5.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/release.yml +32 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/.vscode/settings.json +3 -0
- data/CHANGELOG.md +305 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +88 -0
- data/lib/parse/cache/redis.rb +249 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/create_lock.rb +14 -2
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +77 -5
- data/parse-stack.png +0 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module Parse
|
|
5
|
+
module Embeddings
|
|
6
|
+
# Abstract base class for embedding providers. Concrete subclasses
|
|
7
|
+
# implement {#embed_text} (and, in v5.1+, optionally {#embed_image}).
|
|
8
|
+
#
|
|
9
|
+
# Provider responsibilities:
|
|
10
|
+
#
|
|
11
|
+
# * Translate a batch of inputs into a batch of float vectors.
|
|
12
|
+
# * Return vectors in **the same order** as inputs.
|
|
13
|
+
# * Call {#validate_response!} before returning so the caller sees
|
|
14
|
+
# a typed {InvalidResponseError} for off-by-one batches and NaN /
|
|
15
|
+
# ±Inf poisoning at the provider boundary — not deep inside a
|
|
16
|
+
# later $vectorSearch call.
|
|
17
|
+
#
|
|
18
|
+
# Subclasses MUST override:
|
|
19
|
+
#
|
|
20
|
+
# * {#embed_text} — `(strings, input_type:) -> Array<Array<Float>>`
|
|
21
|
+
# * {#dimensions} — `Integer`, the fixed output width
|
|
22
|
+
# * {#model_name} — stable identifier for cache keys / `embedding_meta`
|
|
23
|
+
#
|
|
24
|
+
# Subclasses MAY override:
|
|
25
|
+
#
|
|
26
|
+
# * {#embed_image} — v5.1 (multimodal); default `NotImplementedError`
|
|
27
|
+
# * {#embed_batch_size} — provider-recommended batch size hint
|
|
28
|
+
# * {#max_input_tokens} — chunker hint
|
|
29
|
+
# * {#normalize?} — whether output is unit-normalized
|
|
30
|
+
# * {#modalities} — defaults to `[:text]`
|
|
31
|
+
# * {#supports_input_type?} — defaults to `false`
|
|
32
|
+
#
|
|
33
|
+
# @abstract
|
|
34
|
+
class Provider
|
|
35
|
+
# @return [Array<Array<Float>>] vectors aligned 1:1 with `strings`.
|
|
36
|
+
# @raise [NotImplementedError] when the concrete subclass has not
|
|
37
|
+
# overridden the method.
|
|
38
|
+
def embed_text(strings, input_type: :search_document)
|
|
39
|
+
raise NotImplementedError, "#{self.class}#embed_text must be implemented"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @param sources [Array<URI, IO, String>] image sources — URI for
|
|
43
|
+
# remote, IO for streamed bytes, String for base64. Concrete
|
|
44
|
+
# providers document which forms they accept.
|
|
45
|
+
# @param input_type [Symbol] `:search_query` or `:search_document`,
|
|
46
|
+
# parallel to {#embed_text}.
|
|
47
|
+
# @param opts [Hash] provider-specific options (e.g. `dim:` for
|
|
48
|
+
# Matryoshka-style truncation). Forward-compatible escape hatch.
|
|
49
|
+
# @return [Array<Array<Float>>] vectors aligned 1:1 with `sources`.
|
|
50
|
+
# @raise [NotImplementedError] image embedding is a v5.1+ feature.
|
|
51
|
+
def embed_image(sources, input_type: :search_document, **opts)
|
|
52
|
+
raise NotImplementedError, "#{self.class} does not support image embedding"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Batched text embedding. Splits `strings` into chunks of size
|
|
56
|
+
# {#embed_batch_size} (or returns a single-shot call when nil) and
|
|
57
|
+
# concatenates results. Concrete providers should override only
|
|
58
|
+
# when their HTTP shape needs more than naive slicing (e.g. async
|
|
59
|
+
# parallelism, per-request budgets). The default is sufficient for
|
|
60
|
+
# any provider whose `embed_text` accepts an array directly.
|
|
61
|
+
#
|
|
62
|
+
# @param strings [Array<String>]
|
|
63
|
+
# @param input_type [Symbol]
|
|
64
|
+
# @return [Array<Array<Float>>] aligned 1:1 with `strings`.
|
|
65
|
+
def embed_text_batched(strings, input_type: :search_document)
|
|
66
|
+
unless strings.is_a?(Array)
|
|
67
|
+
raise ArgumentError,
|
|
68
|
+
"#{self.class}#embed_text_batched expects Array<String> (got #{strings.class})."
|
|
69
|
+
end
|
|
70
|
+
return [] if strings.empty?
|
|
71
|
+
size = embed_batch_size
|
|
72
|
+
return embed_text(strings, input_type: input_type) if size.nil? || strings.length <= size
|
|
73
|
+
strings.each_slice(size).flat_map do |slice|
|
|
74
|
+
embed_text(slice, input_type: input_type)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# @return [Integer] fixed output width of this provider's vectors.
|
|
79
|
+
def dimensions
|
|
80
|
+
raise NotImplementedError, "#{self.class}#dimensions must be implemented"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# @return [String] stable model identifier (e.g. "text-embedding-3-small").
|
|
84
|
+
# Used as a cache-key component and persisted to `embedding_meta`.
|
|
85
|
+
def model_name
|
|
86
|
+
raise NotImplementedError, "#{self.class}#model_name must be implemented"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# @return [Array<Symbol>] subset of [:text, :image, :audio, :video].
|
|
90
|
+
def modalities
|
|
91
|
+
[:text]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# @return [Integer, nil] provider-recommended batch size, or nil.
|
|
95
|
+
def embed_batch_size
|
|
96
|
+
nil
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# @return [Integer, nil] chunker hint; max tokens per input.
|
|
100
|
+
def max_input_tokens
|
|
101
|
+
nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# @return [Boolean] whether the provider returns unit-normalized
|
|
105
|
+
# vectors. Affects similarity-metric selection (`:cosine` vs
|
|
106
|
+
# `:dotProduct`).
|
|
107
|
+
def normalize?
|
|
108
|
+
false
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# @return [Boolean] whether the provider distinguishes between
|
|
112
|
+
# `:search_query` and `:search_document` inputs. When false the
|
|
113
|
+
# `input_type:` kwarg is accepted (for forward compatibility and
|
|
114
|
+
# cache-key stability) but has no effect on the returned vector.
|
|
115
|
+
def supports_input_type?
|
|
116
|
+
false
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Validate a provider response before returning it from `embed_*`.
|
|
120
|
+
#
|
|
121
|
+
# Raises {InvalidResponseError} on any of:
|
|
122
|
+
#
|
|
123
|
+
# * `vectors.length != input_count` (off-by-one across batch — the
|
|
124
|
+
# most insidious provider bug, since vectors would be silently
|
|
125
|
+
# misaligned with their inputs).
|
|
126
|
+
# * `vectors[i]` is not an Array.
|
|
127
|
+
# * `vectors[i].length != dimensions` (variable-width response).
|
|
128
|
+
# * any element non-Numeric, NaN, or ±Inf.
|
|
129
|
+
#
|
|
130
|
+
# @param input_count [Integer] number of items in the input batch.
|
|
131
|
+
# @param vectors [Array<Array<Float>>] the provider's response.
|
|
132
|
+
# @return [Array<Array<Float>>] vectors, unchanged on success.
|
|
133
|
+
# @raise [InvalidResponseError]
|
|
134
|
+
def validate_response!(input_count, vectors)
|
|
135
|
+
unless vectors.is_a?(Array)
|
|
136
|
+
raise InvalidResponseError,
|
|
137
|
+
"#{self.class}: expected Array of vectors, got #{vectors.class}."
|
|
138
|
+
end
|
|
139
|
+
if vectors.length != input_count
|
|
140
|
+
raise InvalidResponseError,
|
|
141
|
+
"#{self.class}: response length #{vectors.length} != input count #{input_count}."
|
|
142
|
+
end
|
|
143
|
+
dims = dimensions
|
|
144
|
+
vectors.each_with_index do |vec, i|
|
|
145
|
+
unless vec.is_a?(Array)
|
|
146
|
+
raise InvalidResponseError,
|
|
147
|
+
"#{self.class}: response[#{i}] is not an Array (#{vec.class})."
|
|
148
|
+
end
|
|
149
|
+
if vec.length != dims
|
|
150
|
+
raise InvalidResponseError,
|
|
151
|
+
"#{self.class}: response[#{i}] length #{vec.length} != declared dimensions #{dims}."
|
|
152
|
+
end
|
|
153
|
+
vec.each_with_index do |x, j|
|
|
154
|
+
# Strictly Float or Integer. Numeric is too loose — Complex
|
|
155
|
+
# has #finite? and would pass; Rational/BigDecimal serialize
|
|
156
|
+
# to BSON in surprising ways. Vector elements are always
|
|
157
|
+
# floats in practice.
|
|
158
|
+
unless x.is_a?(Float) || x.is_a?(Integer)
|
|
159
|
+
raise InvalidResponseError,
|
|
160
|
+
"#{self.class}: response[#{i}][#{j}] is not Float or Integer (#{x.class})."
|
|
161
|
+
end
|
|
162
|
+
unless x.respond_to?(:finite?) && x.finite?
|
|
163
|
+
raise InvalidResponseError,
|
|
164
|
+
"#{self.class}: response[#{i}][#{j}] is not finite (#{x.inspect})."
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
vectors
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Default {#inspect} that allowlists safe instance vars. Concrete
|
|
172
|
+
# providers holding `@api_key`, `@bearer_token`, etc. inherit a
|
|
173
|
+
# safe `inspect` automatically. Subclasses may extend the
|
|
174
|
+
# allowlist by overriding {#inspect_attrs}.
|
|
175
|
+
def inspect
|
|
176
|
+
attrs = inspect_attrs.map { |k, v| "#{k}=#{v.inspect}" }.join(" ")
|
|
177
|
+
attrs.empty? ? "#<#{self.class}>" : "#<#{self.class} #{attrs}>"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# @return [Hash] attributes safe to surface in {#inspect}. Override
|
|
181
|
+
# in subclasses to add fields; never add credentials.
|
|
182
|
+
def inspect_attrs
|
|
183
|
+
out = {}
|
|
184
|
+
out[:model] = safe_call(:model_name)
|
|
185
|
+
out[:dim] = safe_call(:dimensions)
|
|
186
|
+
out.compact
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# AS::N event name emitted from {#instrument_embed}. Subscribers
|
|
190
|
+
# match this exact string. Parallel namespace to
|
|
191
|
+
# `parse.mongodb.aggregate` / `parse.cache.*` /
|
|
192
|
+
# `parse.agent.tool_call` so a single AS::N subscription tree can
|
|
193
|
+
# cover query, cache, agent, and embedding spend.
|
|
194
|
+
AS_NOTIFICATION_NAME = "parse.embeddings.embed"
|
|
195
|
+
|
|
196
|
+
# Subscribed payload contract. Keys are present on every emit so
|
|
197
|
+
# subscribers can rely on them without `key?` guards (values may
|
|
198
|
+
# be `nil` when the provider does not surface usage telemetry —
|
|
199
|
+
# e.g. {Fixture} has no token cost).
|
|
200
|
+
#
|
|
201
|
+
# * `:provider` [String] — `self.class.name`
|
|
202
|
+
# * `:model` [String] — {#model_name}
|
|
203
|
+
# * `:dimensions` [Integer] — {#dimensions}
|
|
204
|
+
# * `:input_count` [Integer] — number of items in the batch
|
|
205
|
+
# * `:input_type` [Symbol] — `:search_query` / `:search_document`
|
|
206
|
+
# * `:total_tokens` [Integer, nil] — provider-reported token usage; nil when N/A
|
|
207
|
+
# * `:cached` [Boolean] — whether the batch was served from cache (always false in v5.0)
|
|
208
|
+
# * `:error` [String, nil] — `exception.class.name` when the block raised
|
|
209
|
+
#
|
|
210
|
+
# Subscribers should NOT depend on additional keys appearing — the
|
|
211
|
+
# contract is stable. New keys may be added but existing semantics
|
|
212
|
+
# will not change without a deprecation cycle.
|
|
213
|
+
#
|
|
214
|
+
# Synchronous-subscriber discipline: AS::N delivers events on the
|
|
215
|
+
# request thread. A slow subscriber blocks every embed call; an
|
|
216
|
+
# exception in a subscriber surfaces as a request failure. Keep
|
|
217
|
+
# subscribers cheap (counters, in-memory accumulators) or push to
|
|
218
|
+
# non-blocking sinks (StatsD-over-UDP, OTel exporters that batch).
|
|
219
|
+
#
|
|
220
|
+
# The block is yielded the payload Hash so concrete providers can
|
|
221
|
+
# write `:total_tokens` / `:cached` from inside the network call
|
|
222
|
+
# (after parsing the provider's `usage` envelope). Any other field
|
|
223
|
+
# set on the yielded payload also reaches subscribers — but only
|
|
224
|
+
# via the documented keys above. Stick to the contract.
|
|
225
|
+
def instrument_embed(input_count, input_type, **extra)
|
|
226
|
+
payload = {
|
|
227
|
+
provider: self.class.name,
|
|
228
|
+
model: safe_call(:model_name),
|
|
229
|
+
dimensions: safe_call(:dimensions),
|
|
230
|
+
input_count: input_count,
|
|
231
|
+
input_type: input_type,
|
|
232
|
+
total_tokens: nil,
|
|
233
|
+
cached: false,
|
|
234
|
+
error: nil,
|
|
235
|
+
}.merge(extra)
|
|
236
|
+
# Defensive: AS::N is in active_support, which the wider gem
|
|
237
|
+
# already requires; if a downstream caller has loaded the
|
|
238
|
+
# embeddings module without ActiveSupport (e.g. a sliced
|
|
239
|
+
# require of just `parse/embeddings`), fall through.
|
|
240
|
+
unless defined?(ActiveSupport::Notifications)
|
|
241
|
+
return yield(payload)
|
|
242
|
+
end
|
|
243
|
+
result = nil
|
|
244
|
+
ActiveSupport::Notifications.instrument(AS_NOTIFICATION_NAME, payload) do |emit_payload|
|
|
245
|
+
begin
|
|
246
|
+
result = yield(emit_payload)
|
|
247
|
+
rescue StandardError => e
|
|
248
|
+
emit_payload[:error] = e.class.name
|
|
249
|
+
raise
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
result
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
private
|
|
256
|
+
|
|
257
|
+
def safe_call(method)
|
|
258
|
+
public_send(method)
|
|
259
|
+
rescue NotImplementedError
|
|
260
|
+
nil
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "faraday"
|
|
5
|
+
require "json"
|
|
6
|
+
require "uri"
|
|
7
|
+
require_relative "provider"
|
|
8
|
+
|
|
9
|
+
module Parse
|
|
10
|
+
module Embeddings
|
|
11
|
+
# Qwen 3 embeddings provider. Targets Alibaba Cloud DashScope's
|
|
12
|
+
# OpenAI-compatible endpoint (`/compatible-mode/v1/embeddings`),
|
|
13
|
+
# which mirrors the OpenAI request envelope but speaks the
|
|
14
|
+
# `qwen3-embedding-*` model family.
|
|
15
|
+
#
|
|
16
|
+
# Supported models — all three are Matryoshka-capable, so the
|
|
17
|
+
# `dimensions:` constructor kwarg truncates the returned vector
|
|
18
|
+
# to any width ≤ native:
|
|
19
|
+
#
|
|
20
|
+
# * `qwen3-embedding-0.6b` — 1024 dim native, ~32k input tokens.
|
|
21
|
+
# * `qwen3-embedding-4b` — 2560 dim native.
|
|
22
|
+
# * `qwen3-embedding-8b` — 4096 dim native.
|
|
23
|
+
#
|
|
24
|
+
# The same three checkpoints are published open-weight on Hugging
|
|
25
|
+
# Face under Apache 2.0 (`Qwen/Qwen3-Embedding-0.6B`, etc.) — for
|
|
26
|
+
# self-hosted inference behind vLLM / Text Embeddings Inference /
|
|
27
|
+
# llama.cpp, use {LocalHTTP} instead and point it at your gateway.
|
|
28
|
+
#
|
|
29
|
+
# @example registration (DashScope International endpoint)
|
|
30
|
+
# Parse::Embeddings.register(:qwen,
|
|
31
|
+
# Parse::Embeddings::Qwen.new(
|
|
32
|
+
# api_key: ENV.fetch("DASHSCOPE_API_KEY"),
|
|
33
|
+
# model: "qwen3-embedding-8b",
|
|
34
|
+
# ))
|
|
35
|
+
#
|
|
36
|
+
# @example Matryoshka truncation
|
|
37
|
+
# Parse::Embeddings::Qwen.new(
|
|
38
|
+
# api_key: ENV.fetch("DASHSCOPE_API_KEY"),
|
|
39
|
+
# model: "qwen3-embedding-8b",
|
|
40
|
+
# dimensions: 1024, # truncate from 4096 → 1024
|
|
41
|
+
# )
|
|
42
|
+
#
|
|
43
|
+
# == Asymmetric input types
|
|
44
|
+
#
|
|
45
|
+
# Qwen3-Embedding is trained with an instruction-tuned head, but
|
|
46
|
+
# the DashScope compatible-mode endpoint does not currently accept
|
|
47
|
+
# an `input_type` / `task` request field. We therefore set
|
|
48
|
+
# `supports_input_type?` to `false` and drop the SDK-canonical
|
|
49
|
+
# `input_type:` kwarg at the wire — same posture as {OpenAI} and
|
|
50
|
+
# {LocalHTTP}. Callers who want query/passage asymmetry must wrap
|
|
51
|
+
# their text with an explicit instruction prefix client-side; the
|
|
52
|
+
# AS::N event still carries the requested `input_type` so cache
|
|
53
|
+
# keys remain stable.
|
|
54
|
+
class Qwen < Provider
|
|
55
|
+
class AuthenticationError < Error; end
|
|
56
|
+
class BadRequestError < Error; end
|
|
57
|
+
class RateLimitError < Error; end
|
|
58
|
+
class TransientError < Error; end
|
|
59
|
+
|
|
60
|
+
# Default to the international compatible-mode host. Operators
|
|
61
|
+
# in mainland China should override to
|
|
62
|
+
# `https://dashscope.aliyuncs.com/compatible-mode/v1`.
|
|
63
|
+
DEFAULT_BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
|
64
|
+
DEFAULT_MODEL = "qwen3-embedding-8b"
|
|
65
|
+
DEFAULT_TIMEOUT = 30
|
|
66
|
+
DEFAULT_OPEN_TIMEOUT = 5
|
|
67
|
+
DEFAULT_MAX_RETRIES = 3
|
|
68
|
+
# DashScope's compatible endpoint caps embedding requests at 25
|
|
69
|
+
# inputs per call (smaller than OpenAI's 2048). Default below
|
|
70
|
+
# the cap so callers don't have to tune.
|
|
71
|
+
DEFAULT_BATCH_SIZE = 10
|
|
72
|
+
MAX_RESPONSE_BYTES = 16 * 1024 * 1024
|
|
73
|
+
|
|
74
|
+
MODEL_DEFAULT_DIMENSIONS = {
|
|
75
|
+
"qwen3-embedding-0.6b" => 1024,
|
|
76
|
+
"qwen3-embedding-4b" => 2560,
|
|
77
|
+
"qwen3-embedding-8b" => 4096,
|
|
78
|
+
}.freeze
|
|
79
|
+
|
|
80
|
+
MODEL_MAX_INPUT_TOKENS = {
|
|
81
|
+
"qwen3-embedding-0.6b" => 32_000,
|
|
82
|
+
"qwen3-embedding-4b" => 32_000,
|
|
83
|
+
"qwen3-embedding-8b" => 32_000,
|
|
84
|
+
}.freeze
|
|
85
|
+
|
|
86
|
+
# Every Qwen3-Embedding row is Matryoshka-capable. Kept as an
|
|
87
|
+
# explicit allowlist so future non-Matryoshka additions (e.g.
|
|
88
|
+
# qwen-text-embedding-v3) don't silently inherit the behaviour.
|
|
89
|
+
MATRYOSHKA_MODELS = %w[
|
|
90
|
+
qwen3-embedding-0.6b
|
|
91
|
+
qwen3-embedding-4b
|
|
92
|
+
qwen3-embedding-8b
|
|
93
|
+
].freeze
|
|
94
|
+
|
|
95
|
+
# @param api_key [String] required. Sent as `Authorization: Bearer …`.
|
|
96
|
+
# @param model [String] one of {MODEL_DEFAULT_DIMENSIONS}'s keys.
|
|
97
|
+
# @param dimensions [Integer, nil] Matryoshka truncation. Must
|
|
98
|
+
# be ≤ the model's native width.
|
|
99
|
+
# @param base_url [String] override (mainland-China host or a
|
|
100
|
+
# private gateway). Must be HTTPS unless
|
|
101
|
+
# `allow_insecure_base_url: true`.
|
|
102
|
+
# @param timeout [Integer] read timeout, seconds.
|
|
103
|
+
# @param open_timeout [Integer] connect timeout, seconds.
|
|
104
|
+
# @param max_retries [Integer] retry attempts on 429/5xx/timeouts.
|
|
105
|
+
# @param embed_batch_size [Integer] inputs per request (DashScope
|
|
106
|
+
# compatible-mode caps at 25).
|
|
107
|
+
# @param allow_faraday_proxy [Boolean] opt in to proxy / env-proxy
|
|
108
|
+
# autodiscovery. Defaults `false`.
|
|
109
|
+
# @param allow_insecure_base_url [Boolean] permit `http://` base.
|
|
110
|
+
# @param connection [Faraday::Connection, nil] injection seam.
|
|
111
|
+
def initialize(
|
|
112
|
+
api_key:,
|
|
113
|
+
model: DEFAULT_MODEL,
|
|
114
|
+
dimensions: nil,
|
|
115
|
+
base_url: DEFAULT_BASE_URL,
|
|
116
|
+
timeout: DEFAULT_TIMEOUT,
|
|
117
|
+
open_timeout: DEFAULT_OPEN_TIMEOUT,
|
|
118
|
+
max_retries: DEFAULT_MAX_RETRIES,
|
|
119
|
+
embed_batch_size: DEFAULT_BATCH_SIZE,
|
|
120
|
+
allow_faraday_proxy: false,
|
|
121
|
+
allow_insecure_base_url: false,
|
|
122
|
+
connection: nil
|
|
123
|
+
)
|
|
124
|
+
validate_api_key!(api_key)
|
|
125
|
+
validate_model!(model)
|
|
126
|
+
validate_dimensions!(model, dimensions)
|
|
127
|
+
sanitized_base_url = validate_base_url!(base_url, allow_insecure_base_url)
|
|
128
|
+
validate_positive_integer!(:timeout, timeout)
|
|
129
|
+
validate_positive_integer!(:open_timeout, open_timeout)
|
|
130
|
+
validate_non_negative_integer!(:max_retries, max_retries)
|
|
131
|
+
validate_positive_integer!(:embed_batch_size, embed_batch_size)
|
|
132
|
+
|
|
133
|
+
@api_key = api_key
|
|
134
|
+
@model = model
|
|
135
|
+
@dimensions = dimensions || MODEL_DEFAULT_DIMENSIONS.fetch(model)
|
|
136
|
+
@base_url = sanitized_base_url
|
|
137
|
+
@timeout = timeout
|
|
138
|
+
@open_timeout = open_timeout
|
|
139
|
+
@max_retries = max_retries
|
|
140
|
+
@embed_batch_size = embed_batch_size
|
|
141
|
+
@allow_faraday_proxy = allow_faraday_proxy
|
|
142
|
+
@connection = connection || build_connection
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def dimensions
|
|
146
|
+
@dimensions
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def model_name
|
|
150
|
+
@model
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def embed_batch_size
|
|
154
|
+
@embed_batch_size
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def max_input_tokens
|
|
158
|
+
MODEL_MAX_INPUT_TOKENS[@model]
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def normalize?
|
|
162
|
+
# Qwen3-Embedding is documented unit-normalized at the head.
|
|
163
|
+
true
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def supports_input_type?
|
|
167
|
+
# DashScope compatible-mode does not accept a wire-level
|
|
168
|
+
# input_type / task field. The kwarg threads through for
|
|
169
|
+
# cache-key stability but is dropped at the request.
|
|
170
|
+
false
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# @param strings [Array<String>] inputs.
|
|
174
|
+
# @param input_type [Symbol] accepted for forward compatibility,
|
|
175
|
+
# dropped at the wire (see {#supports_input_type?}).
|
|
176
|
+
# @return [Array<Array<Float>>] vectors aligned 1:1 with `strings`.
|
|
177
|
+
def embed_text(strings, input_type: :search_document)
|
|
178
|
+
unless strings.is_a?(Array)
|
|
179
|
+
raise ArgumentError,
|
|
180
|
+
"Parse::Embeddings::Qwen#embed_text expects Array<String> (got #{strings.class})."
|
|
181
|
+
end
|
|
182
|
+
return [] if strings.empty?
|
|
183
|
+
strings.each_with_index do |s, i|
|
|
184
|
+
unless s.is_a?(String)
|
|
185
|
+
raise ArgumentError,
|
|
186
|
+
"Parse::Embeddings::Qwen#embed_text strings[#{i}] is not a String (#{s.class})."
|
|
187
|
+
end
|
|
188
|
+
if s.empty?
|
|
189
|
+
raise ArgumentError,
|
|
190
|
+
"Parse::Embeddings::Qwen#embed_text strings[#{i}] is empty; Qwen rejects empty inputs."
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
body = {
|
|
195
|
+
model: @model,
|
|
196
|
+
input: strings,
|
|
197
|
+
encoding_format: "float",
|
|
198
|
+
}
|
|
199
|
+
# Forward `dimensions` only when active width differs from
|
|
200
|
+
# native. Sending native width is a no-op on DashScope but
|
|
201
|
+
# we keep the wire minimal to avoid drift across future
|
|
202
|
+
# endpoint revisions.
|
|
203
|
+
if MATRYOSHKA_MODELS.include?(@model) &&
|
|
204
|
+
@dimensions != MODEL_DEFAULT_DIMENSIONS.fetch(@model)
|
|
205
|
+
body[:dimensions] = @dimensions
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
instrument_embed(strings.length, input_type) do |emit_payload|
|
|
209
|
+
payload = post_embeddings(body)
|
|
210
|
+
if payload.is_a?(Hash) && payload["usage"].is_a?(Hash)
|
|
211
|
+
tt = payload["usage"]["total_tokens"]
|
|
212
|
+
emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
|
|
213
|
+
end
|
|
214
|
+
vectors = extract_vectors!(payload, strings.length)
|
|
215
|
+
validate_response!(strings.length, vectors)
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def inspect_attrs
|
|
220
|
+
super.merge(base: safe_base_host, retries: @max_retries)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
protected
|
|
224
|
+
|
|
225
|
+
def build_connection
|
|
226
|
+
headers = {
|
|
227
|
+
"Authorization" => "Bearer #{@api_key}",
|
|
228
|
+
"Content-Type" => "application/json",
|
|
229
|
+
"Accept" => "application/json",
|
|
230
|
+
"User-Agent" => "parse-stack-embeddings/#{user_agent_version}",
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
faraday_opts = { url: @base_url, headers: headers }
|
|
234
|
+
faraday_opts[:proxy] = nil unless @allow_faraday_proxy
|
|
235
|
+
|
|
236
|
+
conn = Faraday.new(**faraday_opts) do |f|
|
|
237
|
+
f.options.timeout = @timeout
|
|
238
|
+
f.options.open_timeout = @open_timeout
|
|
239
|
+
f.adapter Faraday.default_adapter
|
|
240
|
+
end
|
|
241
|
+
conn.proxy = nil if !@allow_faraday_proxy && conn.respond_to?(:proxy=)
|
|
242
|
+
conn
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def post_embeddings(body)
|
|
246
|
+
attempts = 0
|
|
247
|
+
loop do
|
|
248
|
+
attempts += 1
|
|
249
|
+
begin
|
|
250
|
+
response = @connection.post("embeddings") do |req|
|
|
251
|
+
req.body = body.to_json
|
|
252
|
+
end
|
|
253
|
+
rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
|
|
254
|
+
if attempts > @max_retries
|
|
255
|
+
raise TransientError, "Parse::Embeddings::Qwen: #{e.class} after #{attempts} attempt(s)."
|
|
256
|
+
end
|
|
257
|
+
sleep(backoff_seconds(attempts))
|
|
258
|
+
next
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
status = response.status
|
|
262
|
+
return parse_json_body!(response.body) if status >= 200 && status < 300
|
|
263
|
+
|
|
264
|
+
if status == 401
|
|
265
|
+
raise AuthenticationError, "Parse::Embeddings::Qwen: 401 Unauthorized — check api_key."
|
|
266
|
+
end
|
|
267
|
+
if status == 429
|
|
268
|
+
if attempts > @max_retries
|
|
269
|
+
raise RateLimitError, "Parse::Embeddings::Qwen: 429 rate limited after #{attempts} attempt(s)."
|
|
270
|
+
end
|
|
271
|
+
sleep(retry_after_seconds(response) || backoff_seconds(attempts))
|
|
272
|
+
next
|
|
273
|
+
end
|
|
274
|
+
if status >= 500
|
|
275
|
+
if attempts > @max_retries
|
|
276
|
+
raise TransientError, "Parse::Embeddings::Qwen: #{status} after #{attempts} attempt(s)."
|
|
277
|
+
end
|
|
278
|
+
sleep(backoff_seconds(attempts))
|
|
279
|
+
next
|
|
280
|
+
end
|
|
281
|
+
raise BadRequestError, "Parse::Embeddings::Qwen: #{status} from POST /embeddings."
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def parse_json_body!(body)
|
|
286
|
+
s = body.to_s
|
|
287
|
+
if s.bytesize > MAX_RESPONSE_BYTES
|
|
288
|
+
raise InvalidResponseError,
|
|
289
|
+
"Parse::Embeddings::Qwen: response body exceeds #{MAX_RESPONSE_BYTES} bytes " \
|
|
290
|
+
"(#{s.bytesize}). Refusing to parse."
|
|
291
|
+
end
|
|
292
|
+
JSON.parse(s, max_nesting: 32)
|
|
293
|
+
rescue JSON::ParserError => e
|
|
294
|
+
raise InvalidResponseError,
|
|
295
|
+
"Parse::Embeddings::Qwen: response is not valid JSON (#{e.message})."
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def extract_vectors!(payload, input_count)
|
|
299
|
+
unless payload.is_a?(Hash)
|
|
300
|
+
raise InvalidResponseError,
|
|
301
|
+
"Parse::Embeddings::Qwen: response body is not a JSON object."
|
|
302
|
+
end
|
|
303
|
+
data = payload["data"]
|
|
304
|
+
unless data.is_a?(Array)
|
|
305
|
+
raise InvalidResponseError,
|
|
306
|
+
"Parse::Embeddings::Qwen: response.data is not an Array."
|
|
307
|
+
end
|
|
308
|
+
if data.length != input_count
|
|
309
|
+
raise InvalidResponseError,
|
|
310
|
+
"Parse::Embeddings::Qwen: response.data.length #{data.length} != input count #{input_count}."
|
|
311
|
+
end
|
|
312
|
+
sorted = data.each_with_index.map do |entry, i|
|
|
313
|
+
unless entry.is_a?(Hash)
|
|
314
|
+
raise InvalidResponseError,
|
|
315
|
+
"Parse::Embeddings::Qwen: response.data[#{i}] is not a JSON object."
|
|
316
|
+
end
|
|
317
|
+
idx = entry["index"]
|
|
318
|
+
unless idx.is_a?(Integer) && idx >= 0 && idx < input_count
|
|
319
|
+
raise InvalidResponseError,
|
|
320
|
+
"Parse::Embeddings::Qwen: response.data[#{i}].index #{idx.inspect} out of range."
|
|
321
|
+
end
|
|
322
|
+
[idx, entry["embedding"]]
|
|
323
|
+
end
|
|
324
|
+
indices = sorted.map(&:first)
|
|
325
|
+
if indices.uniq.length != indices.length
|
|
326
|
+
raise InvalidResponseError, "Parse::Embeddings::Qwen: duplicate index in response.data."
|
|
327
|
+
end
|
|
328
|
+
sorted.sort_by(&:first).map(&:last)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def backoff_seconds(attempt)
|
|
332
|
+
[0.5 * (2**(attempt - 1)), 30.0].min
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def retry_after_seconds(response)
|
|
336
|
+
ra = response.respond_to?(:headers) ? response.headers["retry-after"] || response.headers["Retry-After"] : nil
|
|
337
|
+
return nil unless ra
|
|
338
|
+
v = ra.to_f
|
|
339
|
+
v.positive? ? [v, 60.0].min : nil
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
private
|
|
343
|
+
|
|
344
|
+
def validate_api_key!(api_key)
|
|
345
|
+
unless api_key.is_a?(String) && !api_key.empty?
|
|
346
|
+
raise ArgumentError, "Parse::Embeddings::Qwen: api_key must be a non-empty String."
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
def validate_model!(model)
|
|
351
|
+
unless MODEL_DEFAULT_DIMENSIONS.key?(model)
|
|
352
|
+
raise ArgumentError,
|
|
353
|
+
"Parse::Embeddings::Qwen: unknown model #{model.inspect}. " \
|
|
354
|
+
"Supported: #{MODEL_DEFAULT_DIMENSIONS.keys.inspect}."
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def validate_dimensions!(model, dimensions)
|
|
359
|
+
return if dimensions.nil?
|
|
360
|
+
unless dimensions.is_a?(Integer) && dimensions.positive?
|
|
361
|
+
raise ArgumentError,
|
|
362
|
+
"Parse::Embeddings::Qwen: dimensions must be a positive Integer (got #{dimensions.inspect})."
|
|
363
|
+
end
|
|
364
|
+
native = MODEL_DEFAULT_DIMENSIONS.fetch(model)
|
|
365
|
+
if dimensions > native
|
|
366
|
+
raise ArgumentError,
|
|
367
|
+
"Parse::Embeddings::Qwen: dimensions #{dimensions} exceeds native #{native} for #{model}."
|
|
368
|
+
end
|
|
369
|
+
if !MATRYOSHKA_MODELS.include?(model) && dimensions != native
|
|
370
|
+
raise ArgumentError,
|
|
371
|
+
"Parse::Embeddings::Qwen: model #{model.inspect} does not support custom dimensions " \
|
|
372
|
+
"(Matryoshka-capable models: #{MATRYOSHKA_MODELS.inspect})."
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
def validate_base_url!(base_url, allow_insecure)
|
|
377
|
+
unless base_url.is_a?(String) && !base_url.empty?
|
|
378
|
+
raise ArgumentError, "Parse::Embeddings::Qwen: base_url must be a non-empty String."
|
|
379
|
+
end
|
|
380
|
+
begin
|
|
381
|
+
uri = URI.parse(base_url)
|
|
382
|
+
rescue URI::InvalidURIError => e
|
|
383
|
+
raise ArgumentError, "Parse::Embeddings::Qwen: base_url is not a valid URL (#{e.message})."
|
|
384
|
+
end
|
|
385
|
+
unless %w[http https].include?(uri.scheme)
|
|
386
|
+
raise ArgumentError,
|
|
387
|
+
"Parse::Embeddings::Qwen: base_url must be http(s):// (got scheme #{uri.scheme.inspect})."
|
|
388
|
+
end
|
|
389
|
+
if uri.scheme == "http" && !allow_insecure
|
|
390
|
+
raise ArgumentError,
|
|
391
|
+
"Parse::Embeddings::Qwen: refusing http:// base_url. Pass allow_insecure_base_url: true to opt in."
|
|
392
|
+
end
|
|
393
|
+
if uri.host.nil? || uri.host.empty?
|
|
394
|
+
raise ArgumentError, "Parse::Embeddings::Qwen: base_url must include a host."
|
|
395
|
+
end
|
|
396
|
+
if uri.userinfo
|
|
397
|
+
raise ArgumentError,
|
|
398
|
+
"Parse::Embeddings::Qwen: base_url must not contain userinfo (credentials). " \
|
|
399
|
+
"Use the api_key parameter and a clean URL."
|
|
400
|
+
end
|
|
401
|
+
uri.to_s
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def validate_positive_integer!(name, value)
|
|
405
|
+
unless value.is_a?(Integer) && value.positive?
|
|
406
|
+
raise ArgumentError,
|
|
407
|
+
"Parse::Embeddings::Qwen: #{name} must be a positive Integer (got #{value.inspect})."
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def validate_non_negative_integer!(name, value)
|
|
412
|
+
unless value.is_a?(Integer) && value >= 0
|
|
413
|
+
raise ArgumentError,
|
|
414
|
+
"Parse::Embeddings::Qwen: #{name} must be a non-negative Integer (got #{value.inspect})."
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def user_agent_version
|
|
419
|
+
defined?(Parse::Stack::VERSION) ? Parse::Stack::VERSION : "unknown"
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
def safe_base_host
|
|
423
|
+
uri = URI.parse(@base_url)
|
|
424
|
+
host = uri.host
|
|
425
|
+
host && !host.empty? ? "#{uri.scheme}://#{host}" : nil
|
|
426
|
+
rescue URI::InvalidURIError
|
|
427
|
+
nil
|
|
428
|
+
end
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
end
|