parse-stack-next 4.5.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/release.yml +32 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/.vscode/settings.json +3 -0
- data/CHANGELOG.md +305 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +88 -0
- data/lib/parse/cache/redis.rb +249 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/create_lock.rb +14 -2
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +77 -5
- data/parse-stack.png +0 -0
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "faraday"
|
|
5
|
+
require "json"
|
|
6
|
+
require "uri"
|
|
7
|
+
require_relative "provider"
|
|
8
|
+
|
|
9
|
+
module Parse
|
|
10
|
+
module Embeddings
|
|
11
|
+
# OpenAI embeddings provider. Wraps `POST /v1/embeddings` and the
|
|
12
|
+
# `text-embedding-3-small`, `text-embedding-3-large`, and legacy
|
|
13
|
+
# `text-embedding-ada-002` models.
|
|
14
|
+
#
|
|
15
|
+
# @example registration
|
|
16
|
+
# Parse::Embeddings.register(:openai,
|
|
17
|
+
# Parse::Embeddings::OpenAI.new(
|
|
18
|
+
# api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
19
|
+
# model: "text-embedding-3-small",
|
|
20
|
+
# ))
|
|
21
|
+
#
|
|
22
|
+
# == Security
|
|
23
|
+
#
|
|
24
|
+
# * The Faraday connection refuses `ssl: { verify: false }` on the
|
|
25
|
+
# production HTTPS base URL and refuses `proxy:` unless the caller
|
|
26
|
+
# opts in via `allow_faraday_proxy: true`. Env-proxy autodiscovery
|
|
27
|
+
# (`HTTPS_PROXY` etc.) is suppressed by default — same model as
|
|
28
|
+
# `Parse::Client`.
|
|
29
|
+
# * `#inspect` (inherited from {Provider}) never surfaces `@api_key`.
|
|
30
|
+
# * `Authorization`, `OpenAI-Organization`, and `OpenAI-Project`
|
|
31
|
+
# headers are added to {Parse::Middleware::BodyBuilder::REDACTED_HEADERS}
|
|
32
|
+
# so Faraday logging cannot leak them.
|
|
33
|
+
#
|
|
34
|
+
# == Errors
|
|
35
|
+
#
|
|
36
|
+
# All errors inherit from {Parse::Embeddings::Error}:
|
|
37
|
+
#
|
|
38
|
+
# * {AuthenticationError} — 401 from OpenAI.
|
|
39
|
+
# * {RateLimitError} — 429 from OpenAI (retried up to `max_retries`).
|
|
40
|
+
# * {BadRequestError} — 400/404 (not retried).
|
|
41
|
+
# * {TransientError} — 5xx or network/timeout (retried).
|
|
42
|
+
# * {InvalidResponseError} — response shape violates the contract.
|
|
43
|
+
class OpenAI < Provider
|
|
44
|
+
# Subclasses of {Parse::Embeddings::Error} specific to OpenAI's
|
|
45
|
+
# HTTP boundary. Concrete enough for retry middleware to switch
|
|
46
|
+
# on; opaque enough that callers don't depend on response bodies.
|
|
47
|
+
class AuthenticationError < Error; end
|
|
48
|
+
class BadRequestError < Error; end
|
|
49
|
+
class RateLimitError < Error; end
|
|
50
|
+
class TransientError < Error; end
|
|
51
|
+
|
|
52
|
+
DEFAULT_BASE_URL = "https://api.openai.com/v1"
|
|
53
|
+
DEFAULT_MODEL = "text-embedding-3-small"
|
|
54
|
+
DEFAULT_TIMEOUT = 30
|
|
55
|
+
DEFAULT_OPEN_TIMEOUT = 5
|
|
56
|
+
DEFAULT_MAX_RETRIES = 3
|
|
57
|
+
DEFAULT_BATCH_SIZE = 100
|
|
58
|
+
|
|
59
|
+
# Hard ceiling on the response body we'll parse. A legitimate
|
|
60
|
+
# OpenAI embeddings response for the worst-case configuration
|
|
61
|
+
# (100 inputs × text-embedding-3-large, 3072 floats × ~12 chars
|
|
62
|
+
# per encoded float) is ~3.6 MB. We allow 16 MB to leave generous
|
|
63
|
+
# headroom for usage telemetry and future fields, while still
|
|
64
|
+
# bounding the buffer an adversarial / misconfigured base_url
|
|
65
|
+
# could ship at us before the 30s timeout fires.
|
|
66
|
+
MAX_RESPONSE_BYTES = 16 * 1024 * 1024
|
|
67
|
+
|
|
68
|
+
# Native vector widths for each supported model. `text-embedding-3-*`
|
|
69
|
+
# also accept a `dimensions:` parameter that truncates the output
|
|
70
|
+
# (Matryoshka-style) — when set, it overrides the native width.
|
|
71
|
+
MODEL_DEFAULT_DIMENSIONS = {
|
|
72
|
+
"text-embedding-3-small" => 1536,
|
|
73
|
+
"text-embedding-3-large" => 3072,
|
|
74
|
+
"text-embedding-ada-002" => 1536,
|
|
75
|
+
}.freeze
|
|
76
|
+
|
|
77
|
+
# Max input tokens per item for the supported models. Provided as
|
|
78
|
+
# a chunker hint via {#max_input_tokens}.
|
|
79
|
+
MODEL_MAX_INPUT_TOKENS = {
|
|
80
|
+
"text-embedding-3-small" => 8191,
|
|
81
|
+
"text-embedding-3-large" => 8191,
|
|
82
|
+
"text-embedding-ada-002" => 8191,
|
|
83
|
+
}.freeze
|
|
84
|
+
|
|
85
|
+
# @param api_key [String] required. Sent as `Authorization: Bearer …`.
|
|
86
|
+
# @param model [String] one of {MODEL_DEFAULT_DIMENSIONS}'s keys.
|
|
87
|
+
# @param dimensions [Integer, nil] override output width (3-series
|
|
88
|
+
# only). When nil, uses the model's native dimensions.
|
|
89
|
+
# @param base_url [String] override (Azure / proxy). Must be HTTPS
|
|
90
|
+
# unless `allow_insecure_base_url: true`.
|
|
91
|
+
# @param organization [String, nil] sent as `OpenAI-Organization`.
|
|
92
|
+
# @param project [String, nil] sent as `OpenAI-Project`.
|
|
93
|
+
# @param timeout [Integer] read timeout, seconds.
|
|
94
|
+
# @param open_timeout [Integer] connect timeout, seconds.
|
|
95
|
+
# @param max_retries [Integer] retry attempts on 429/5xx/timeouts.
|
|
96
|
+
# @param embed_batch_size [Integer] inputs per request.
|
|
97
|
+
# @param allow_faraday_proxy [Boolean] opt in to proxy / env-proxy
|
|
98
|
+
# autodiscovery. Defaults `false` — matches `Parse::Client`.
|
|
99
|
+
# @param allow_insecure_base_url [Boolean] permit `http://` base
|
|
100
|
+
# (local Ollama-shaped proxies). Defaults `false`.
|
|
101
|
+
# @param connection [Faraday::Connection, nil] injection seam for
|
|
102
|
+
# tests. When nil, a connection is built from the other options.
|
|
103
|
+
def initialize(
|
|
104
|
+
api_key:,
|
|
105
|
+
model: DEFAULT_MODEL,
|
|
106
|
+
dimensions: nil,
|
|
107
|
+
base_url: DEFAULT_BASE_URL,
|
|
108
|
+
organization: nil,
|
|
109
|
+
project: nil,
|
|
110
|
+
timeout: DEFAULT_TIMEOUT,
|
|
111
|
+
open_timeout: DEFAULT_OPEN_TIMEOUT,
|
|
112
|
+
max_retries: DEFAULT_MAX_RETRIES,
|
|
113
|
+
embed_batch_size: DEFAULT_BATCH_SIZE,
|
|
114
|
+
allow_faraday_proxy: false,
|
|
115
|
+
allow_insecure_base_url: false,
|
|
116
|
+
connection: nil
|
|
117
|
+
)
|
|
118
|
+
validate_api_key!(api_key)
|
|
119
|
+
validate_model!(model)
|
|
120
|
+
validate_dimensions!(model, dimensions)
|
|
121
|
+
sanitized_base_url = validate_base_url!(base_url, allow_insecure_base_url)
|
|
122
|
+
validate_positive_integer!(:timeout, timeout)
|
|
123
|
+
validate_positive_integer!(:open_timeout, open_timeout)
|
|
124
|
+
validate_non_negative_integer!(:max_retries, max_retries)
|
|
125
|
+
validate_positive_integer!(:embed_batch_size, embed_batch_size)
|
|
126
|
+
|
|
127
|
+
@api_key = api_key
|
|
128
|
+
@model = model
|
|
129
|
+
@dimensions = dimensions || MODEL_DEFAULT_DIMENSIONS.fetch(model)
|
|
130
|
+
@base_url = sanitized_base_url
|
|
131
|
+
@organization = organization
|
|
132
|
+
@project = project
|
|
133
|
+
@timeout = timeout
|
|
134
|
+
@open_timeout = open_timeout
|
|
135
|
+
@max_retries = max_retries
|
|
136
|
+
@embed_batch_size = embed_batch_size
|
|
137
|
+
@allow_faraday_proxy = allow_faraday_proxy
|
|
138
|
+
@connection = connection || build_connection
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def dimensions
|
|
142
|
+
@dimensions
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def model_name
|
|
146
|
+
@model
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def embed_batch_size
|
|
150
|
+
@embed_batch_size
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def max_input_tokens
|
|
154
|
+
MODEL_MAX_INPUT_TOKENS[@model]
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def normalize?
|
|
158
|
+
# OpenAI's text-embedding-3-* and ada-002 all return
|
|
159
|
+
# unit-normalized vectors. Documented in the API reference.
|
|
160
|
+
true
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def supports_input_type?
|
|
164
|
+
# OpenAI does NOT distinguish search_query vs search_document.
|
|
165
|
+
# We accept the kwarg (for cache-key stability across providers)
|
|
166
|
+
# but it does not affect the request payload. See {#embed_text}.
|
|
167
|
+
false
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# @param strings [Array<String>] inputs.
|
|
171
|
+
# @param input_type [Symbol] accepted for forward compatibility,
|
|
172
|
+
# ignored at the wire level — OpenAI does not asymmetrize
|
|
173
|
+
# query vs document. The base {#embed_text_batched} threads the
|
|
174
|
+
# value through; this implementation drops it.
|
|
175
|
+
# @return [Array<Array<Float>>] vectors aligned 1:1 with `strings`.
|
|
176
|
+
def embed_text(strings, input_type: :search_document)
|
|
177
|
+
unless strings.is_a?(Array)
|
|
178
|
+
raise ArgumentError,
|
|
179
|
+
"Parse::Embeddings::OpenAI#embed_text expects Array<String> (got #{strings.class})."
|
|
180
|
+
end
|
|
181
|
+
return [] if strings.empty?
|
|
182
|
+
strings.each_with_index do |s, i|
|
|
183
|
+
unless s.is_a?(String)
|
|
184
|
+
raise ArgumentError,
|
|
185
|
+
"Parse::Embeddings::OpenAI#embed_text strings[#{i}] is not a String (#{s.class})."
|
|
186
|
+
end
|
|
187
|
+
if s.empty?
|
|
188
|
+
raise ArgumentError,
|
|
189
|
+
"Parse::Embeddings::OpenAI#embed_text strings[#{i}] is empty; OpenAI rejects empty inputs."
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
body = { input: strings, model: @model }
|
|
194
|
+
# `dimensions:` is only valid for text-embedding-3-*. Sending it
|
|
195
|
+
# to ada-002 yields a 400. When the caller specified an override
|
|
196
|
+
# we always forward it; when the model is 3-series and we're
|
|
197
|
+
# using the default, we still forward to make the contract
|
|
198
|
+
# explicit (and to assert the server returns what we expect).
|
|
199
|
+
body[:dimensions] = @dimensions if @model.start_with?("text-embedding-3-")
|
|
200
|
+
|
|
201
|
+
instrument_embed(strings.length, input_type) do |emit_payload|
|
|
202
|
+
payload = post_embeddings(body)
|
|
203
|
+
# OpenAI's response envelope carries `usage: { prompt_tokens,
|
|
204
|
+
# total_tokens }`. Forward total_tokens (the operator-facing
|
|
205
|
+
# cost number) into the AS::N payload so cost subscribers can
|
|
206
|
+
# budget embedding spend on the same footing as
|
|
207
|
+
# `parse.agent.tool_call` token cost. Defensive on shape — a
|
|
208
|
+
# mock / proxy that strips the usage block must not crash the
|
|
209
|
+
# request path.
|
|
210
|
+
if payload.is_a?(Hash) && payload["usage"].is_a?(Hash)
|
|
211
|
+
tt = payload["usage"]["total_tokens"]
|
|
212
|
+
emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
|
|
213
|
+
end
|
|
214
|
+
vectors = extract_vectors!(payload, strings.length)
|
|
215
|
+
validate_response!(strings.length, vectors)
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Override the Provider's safe inspect to add OpenAI-specific
|
|
220
|
+
# non-sensitive attrs. `@base_url` is redacted to host-only
|
|
221
|
+
# because operators may point this provider at an Azure / Ollama
|
|
222
|
+
# endpoint they consider sensitive — the same policy
|
|
223
|
+
# `post_embeddings` applies when raising on transient errors.
|
|
224
|
+
def inspect_attrs
|
|
225
|
+
super.merge(base: safe_base_host, retries: @max_retries)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
protected
|
|
229
|
+
|
|
230
|
+
# Subclass extension points. Azure/Ollama/Voyage adapters can
|
|
231
|
+
# override these to swap the auth header shape, the URL path, the
|
|
232
|
+
# JSON envelope, or the retry policy without re-implementing the
|
|
233
|
+
# validation layer above.
|
|
234
|
+
#
|
|
235
|
+
# `build_connection` — Faraday wiring (override for Azure
|
|
236
|
+
# `api-key:` header form).
|
|
237
|
+
# `post_embeddings` — request + retry loop.
|
|
238
|
+
# `parse_json_body!` — JSON parse + bounded-size check.
|
|
239
|
+
# `extract_vectors!` — response envelope shape.
|
|
240
|
+
# `backoff_seconds` — sleep schedule between retries.
|
|
241
|
+
# `retry_after_seconds` — Retry-After header interpretation.
|
|
242
|
+
|
|
243
|
+
def build_connection
|
|
244
|
+
headers = {
|
|
245
|
+
"Authorization" => "Bearer #{@api_key}",
|
|
246
|
+
"Content-Type" => "application/json",
|
|
247
|
+
"Accept" => "application/json",
|
|
248
|
+
"User-Agent" => "parse-stack-embeddings/#{user_agent_version}",
|
|
249
|
+
}
|
|
250
|
+
headers["OpenAI-Organization"] = @organization if @organization
|
|
251
|
+
headers["OpenAI-Project"] = @project if @project
|
|
252
|
+
|
|
253
|
+
# Mirror Parse::Client: when proxy is NOT explicitly opted in,
|
|
254
|
+
# pass `proxy: nil` to suppress Faraday's automatic discovery of
|
|
255
|
+
# HTTPS_PROXY / HTTP_PROXY env vars. When opted in, omit the
|
|
256
|
+
# key entirely so Faraday's normal env-discovery runs.
|
|
257
|
+
faraday_opts = { url: @base_url, headers: headers }
|
|
258
|
+
faraday_opts[:proxy] = nil unless @allow_faraday_proxy
|
|
259
|
+
|
|
260
|
+
conn = Faraday.new(**faraday_opts) do |f|
|
|
261
|
+
f.options.timeout = @timeout
|
|
262
|
+
f.options.open_timeout = @open_timeout
|
|
263
|
+
f.adapter Faraday.default_adapter
|
|
264
|
+
end
|
|
265
|
+
# Belt-and-suspenders mirroring Parse::Client (see client.rb): Faraday may
|
|
266
|
+
# still synthesise a ProxyOptions from env regardless of the `proxy: nil`
|
|
267
|
+
# we passed in opts, so we re-assert post-construction.
|
|
268
|
+
conn.proxy = nil if !@allow_faraday_proxy && conn.respond_to?(:proxy=)
|
|
269
|
+
conn
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Single POST with bounded retry. Inline implementation — we don't
|
|
273
|
+
# depend on faraday-retry (not in the runtime gemspec) and the
|
|
274
|
+
# logic is small enough to audit in place.
|
|
275
|
+
def post_embeddings(body)
|
|
276
|
+
attempts = 0
|
|
277
|
+
loop do
|
|
278
|
+
attempts += 1
|
|
279
|
+
begin
|
|
280
|
+
response = @connection.post("embeddings") do |req|
|
|
281
|
+
req.body = body.to_json
|
|
282
|
+
end
|
|
283
|
+
rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
|
|
284
|
+
# Surface e.class only — Faraday's message often contains
|
|
285
|
+
# the full URL (which may be a customer Azure/Ollama base)
|
|
286
|
+
# and we don't want that flowing into error trackers.
|
|
287
|
+
if attempts > @max_retries
|
|
288
|
+
raise TransientError, "Parse::Embeddings::OpenAI: #{e.class} after #{attempts} attempt(s)."
|
|
289
|
+
end
|
|
290
|
+
sleep(backoff_seconds(attempts))
|
|
291
|
+
next
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
status = response.status
|
|
295
|
+
return parse_json_body!(response.body) if status >= 200 && status < 300
|
|
296
|
+
|
|
297
|
+
if status == 401
|
|
298
|
+
raise AuthenticationError,
|
|
299
|
+
"Parse::Embeddings::OpenAI: 401 Unauthorized — check api_key."
|
|
300
|
+
end
|
|
301
|
+
if status == 429
|
|
302
|
+
if attempts > @max_retries
|
|
303
|
+
raise RateLimitError,
|
|
304
|
+
"Parse::Embeddings::OpenAI: 429 rate limited after #{attempts} attempt(s)."
|
|
305
|
+
end
|
|
306
|
+
sleep(retry_after_seconds(response) || backoff_seconds(attempts))
|
|
307
|
+
next
|
|
308
|
+
end
|
|
309
|
+
if status >= 500
|
|
310
|
+
if attempts > @max_retries
|
|
311
|
+
raise TransientError,
|
|
312
|
+
"Parse::Embeddings::OpenAI: #{status} after #{attempts} attempt(s)."
|
|
313
|
+
end
|
|
314
|
+
sleep(backoff_seconds(attempts))
|
|
315
|
+
next
|
|
316
|
+
end
|
|
317
|
+
# 4xx other than 401/429 — don't retry. Surface the error
|
|
318
|
+
# without the response body (which may echo input we don't
|
|
319
|
+
# want in error tracking) and without @base_url (which may be
|
|
320
|
+
# a customer-configured Azure/Ollama URL captured by error
|
|
321
|
+
# trackers).
|
|
322
|
+
raise BadRequestError,
|
|
323
|
+
"Parse::Embeddings::OpenAI: #{status} from POST /embeddings."
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def parse_json_body!(body)
|
|
328
|
+
# NOTE: we no longer short-circuit on Hash. A pre-parsed Hash
|
|
329
|
+
# from a test adapter bypassed the MAX_RESPONSE_BYTES check
|
|
330
|
+
# AND the max_nesting cap — both defenses against a misbehaving
|
|
331
|
+
# adapter or operator-configured base_url. Tests that want to
|
|
332
|
+
# inject a parsed hash should do so via the `connection:` seam
|
|
333
|
+
# which still runs through Faraday and emits a String body.
|
|
334
|
+
s = body.to_s
|
|
335
|
+
if s.bytesize > MAX_RESPONSE_BYTES
|
|
336
|
+
raise InvalidResponseError,
|
|
337
|
+
"Parse::Embeddings::OpenAI: response body exceeds #{MAX_RESPONSE_BYTES} bytes " \
|
|
338
|
+
"(#{s.bytesize}). Refusing to parse."
|
|
339
|
+
end
|
|
340
|
+
# `max_nesting:` caps JSON's recursion depth to defend against
|
|
341
|
+
# adversarial payloads on a customer-configured base_url. A
|
|
342
|
+
# well-formed OpenAI response is at most ~5 levels deep.
|
|
343
|
+
JSON.parse(s, max_nesting: 32)
|
|
344
|
+
rescue JSON::ParserError => e
|
|
345
|
+
raise InvalidResponseError,
|
|
346
|
+
"Parse::Embeddings::OpenAI: response is not valid JSON (#{e.message})."
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def extract_vectors!(payload, input_count)
|
|
350
|
+
unless payload.is_a?(Hash)
|
|
351
|
+
raise InvalidResponseError,
|
|
352
|
+
"Parse::Embeddings::OpenAI: response body is not a JSON object."
|
|
353
|
+
end
|
|
354
|
+
data = payload["data"]
|
|
355
|
+
unless data.is_a?(Array)
|
|
356
|
+
raise InvalidResponseError,
|
|
357
|
+
"Parse::Embeddings::OpenAI: response.data is not an Array."
|
|
358
|
+
end
|
|
359
|
+
if data.length != input_count
|
|
360
|
+
raise InvalidResponseError,
|
|
361
|
+
"Parse::Embeddings::OpenAI: response.data.length #{data.length} != input count #{input_count}."
|
|
362
|
+
end
|
|
363
|
+
# OpenAI documents that `data[].index` reflects request order,
|
|
364
|
+
# but the API spec allows out-of-order responses. Sort defensively.
|
|
365
|
+
sorted = data.each_with_index.map do |entry, i|
|
|
366
|
+
unless entry.is_a?(Hash)
|
|
367
|
+
raise InvalidResponseError,
|
|
368
|
+
"Parse::Embeddings::OpenAI: response.data[#{i}] is not a JSON object."
|
|
369
|
+
end
|
|
370
|
+
idx = entry["index"]
|
|
371
|
+
unless idx.is_a?(Integer) && idx >= 0 && idx < input_count
|
|
372
|
+
raise InvalidResponseError,
|
|
373
|
+
"Parse::Embeddings::OpenAI: response.data[#{i}].index #{idx.inspect} out of range."
|
|
374
|
+
end
|
|
375
|
+
[idx, entry["embedding"]]
|
|
376
|
+
end
|
|
377
|
+
indices = sorted.map(&:first)
|
|
378
|
+
if indices.uniq.length != indices.length
|
|
379
|
+
raise InvalidResponseError,
|
|
380
|
+
"Parse::Embeddings::OpenAI: duplicate index in response.data."
|
|
381
|
+
end
|
|
382
|
+
sorted.sort_by(&:first).map(&:last)
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# Exponential backoff with deterministic ceiling.
|
|
386
|
+
#
|
|
387
|
+
# NOTE: no jitter. {Parse::Client#request} (lib/parse/client.rb)
|
|
388
|
+
# multiplies its sleep by `0.75 + rand * 0.5` to de-correlate
|
|
389
|
+
# fleet-wide retries. We deliberately omit that here: this
|
|
390
|
+
# provider is intended to be driven by a single rate-limited
|
|
391
|
+
# job runner (Sidekiq throttler, AS::Worker bucket, etc.) that
|
|
392
|
+
# already paces concurrent requests against OpenAI's rate
|
|
393
|
+
# limits. Per-call jitter on top of an external limiter only
|
|
394
|
+
# masks coordination bugs. Operators driving this provider from
|
|
395
|
+
# an unbounded worker pool should add their own jitter
|
|
396
|
+
# (subclass and override) — otherwise a fleet-wide 429 will
|
|
397
|
+
# synchronize the retry storm exponentially.
|
|
398
|
+
def backoff_seconds(attempt)
|
|
399
|
+
# 0.5, 1.0, 2.0, 4.0, 8.0 … capped at 30s
|
|
400
|
+
[0.5 * (2**(attempt - 1)), 30.0].min
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def retry_after_seconds(response)
|
|
404
|
+
ra = response.respond_to?(:headers) ? response.headers["retry-after"] || response.headers["Retry-After"] : nil
|
|
405
|
+
return nil unless ra
|
|
406
|
+
v = ra.to_f
|
|
407
|
+
v.positive? ? [v, 60.0].min : nil
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
private
|
|
411
|
+
|
|
412
|
+
def validate_api_key!(api_key)
|
|
413
|
+
unless api_key.is_a?(String) && !api_key.empty?
|
|
414
|
+
raise ArgumentError,
|
|
415
|
+
"Parse::Embeddings::OpenAI: api_key must be a non-empty String."
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def validate_model!(model)
|
|
420
|
+
unless MODEL_DEFAULT_DIMENSIONS.key?(model)
|
|
421
|
+
raise ArgumentError,
|
|
422
|
+
"Parse::Embeddings::OpenAI: unknown model #{model.inspect}. " \
|
|
423
|
+
"Supported: #{MODEL_DEFAULT_DIMENSIONS.keys.inspect}."
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def validate_dimensions!(model, dimensions)
|
|
428
|
+
return if dimensions.nil?
|
|
429
|
+
unless dimensions.is_a?(Integer) && dimensions.positive?
|
|
430
|
+
raise ArgumentError,
|
|
431
|
+
"Parse::Embeddings::OpenAI: dimensions must be a positive Integer (got #{dimensions.inspect})."
|
|
432
|
+
end
|
|
433
|
+
native = MODEL_DEFAULT_DIMENSIONS.fetch(model)
|
|
434
|
+
if dimensions > native
|
|
435
|
+
raise ArgumentError,
|
|
436
|
+
"Parse::Embeddings::OpenAI: dimensions #{dimensions} exceeds native #{native} for #{model}."
|
|
437
|
+
end
|
|
438
|
+
if !model.start_with?("text-embedding-3-") && dimensions != native
|
|
439
|
+
raise ArgumentError,
|
|
440
|
+
"Parse::Embeddings::OpenAI: model #{model.inspect} does not support custom dimensions " \
|
|
441
|
+
"(only text-embedding-3-* do)."
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
# Parse base_url with URI, reject userinfo and non-http(s) schemes,
|
|
446
|
+
# and return a normalized credential-free string suitable for safe
|
|
447
|
+
# interpolation into log lines and error messages. Refuses
|
|
448
|
+
# `http://` unless the caller opts in via `allow_insecure_base_url`.
|
|
449
|
+
def validate_base_url!(base_url, allow_insecure)
|
|
450
|
+
unless base_url.is_a?(String) && !base_url.empty?
|
|
451
|
+
raise ArgumentError,
|
|
452
|
+
"Parse::Embeddings::OpenAI: base_url must be a non-empty String."
|
|
453
|
+
end
|
|
454
|
+
begin
|
|
455
|
+
uri = URI.parse(base_url)
|
|
456
|
+
rescue URI::InvalidURIError => e
|
|
457
|
+
raise ArgumentError,
|
|
458
|
+
"Parse::Embeddings::OpenAI: base_url is not a valid URL (#{e.message})."
|
|
459
|
+
end
|
|
460
|
+
unless %w[http https].include?(uri.scheme)
|
|
461
|
+
raise ArgumentError,
|
|
462
|
+
"Parse::Embeddings::OpenAI: base_url must be http(s):// (got scheme #{uri.scheme.inspect})."
|
|
463
|
+
end
|
|
464
|
+
if uri.scheme == "http" && !allow_insecure
|
|
465
|
+
raise ArgumentError,
|
|
466
|
+
"Parse::Embeddings::OpenAI: refusing http:// base_url. " \
|
|
467
|
+
"Pass allow_insecure_base_url: true to opt in (local proxies only)."
|
|
468
|
+
end
|
|
469
|
+
if uri.host.nil? || uri.host.empty?
|
|
470
|
+
raise ArgumentError,
|
|
471
|
+
"Parse::Embeddings::OpenAI: base_url must include a host."
|
|
472
|
+
end
|
|
473
|
+
# Reject embedded credentials outright. `https://user:pass@host/`
|
|
474
|
+
# would otherwise leak via inspect, error messages, and any
|
|
475
|
+
# error-tracker that captures the URL.
|
|
476
|
+
if uri.userinfo
|
|
477
|
+
raise ArgumentError,
|
|
478
|
+
"Parse::Embeddings::OpenAI: base_url must not contain userinfo (credentials). " \
|
|
479
|
+
"Use the api_key parameter and a clean URL."
|
|
480
|
+
end
|
|
481
|
+
# Return a normalized, credential-free string. We round-trip
|
|
482
|
+
# through URI so callers don't accidentally inject userinfo via
|
|
483
|
+
# later concatenation.
|
|
484
|
+
uri.to_s
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
def validate_positive_integer!(name, value)
|
|
488
|
+
unless value.is_a?(Integer) && value.positive?
|
|
489
|
+
raise ArgumentError,
|
|
490
|
+
"Parse::Embeddings::OpenAI: #{name} must be a positive Integer (got #{value.inspect})."
|
|
491
|
+
end
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def validate_non_negative_integer!(name, value)
|
|
495
|
+
unless value.is_a?(Integer) && value >= 0
|
|
496
|
+
raise ArgumentError,
|
|
497
|
+
"Parse::Embeddings::OpenAI: #{name} must be a non-negative Integer (got #{value.inspect})."
|
|
498
|
+
end
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def user_agent_version
|
|
502
|
+
defined?(Parse::Stack::VERSION) ? Parse::Stack::VERSION : "unknown"
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# Host-only form of the configured base URL — for {#inspect_attrs}.
|
|
506
|
+
# Operators may set @base_url to an Azure deployment URL or an
|
|
507
|
+
# internal Ollama endpoint; surfacing the full URL via #inspect
|
|
508
|
+
# would put that in any error tracker / log scrape that captures
|
|
509
|
+
# `.inspect`. Host alone is enough to identify the provider in
|
|
510
|
+
# dev logs without leaking deployment paths or query strings.
|
|
511
|
+
def safe_base_host
|
|
512
|
+
uri = URI.parse(@base_url)
|
|
513
|
+
host = uri.host
|
|
514
|
+
host && !host.empty? ? "#{uri.scheme}://#{host}" : nil
|
|
515
|
+
rescue URI::InvalidURIError
|
|
516
|
+
nil
|
|
517
|
+
end
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
end
|