parse-stack-next 4.5.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/.bundle/config +2 -0
  3. data/.env.sample +17 -3
  4. data/.github/workflows/codeql.yml +44 -0
  5. data/.github/workflows/docs.yml +39 -0
  6. data/.github/workflows/release.yml +32 -0
  7. data/.github/workflows/ruby.yml +8 -6
  8. data/.gitignore +4 -0
  9. data/.vscode/settings.json +3 -0
  10. data/CHANGELOG.md +305 -72
  11. data/Gemfile.lock +10 -3
  12. data/LICENSE.txt +1 -1
  13. data/README.md +190 -219
  14. data/Rakefile +1 -1
  15. data/SECURITY.md +30 -0
  16. data/assets/parse-stack-next-avatar.png +0 -0
  17. data/assets/parse-stack-next-avatar.svg +37 -0
  18. data/assets/parse-stack-next-banner.png +0 -0
  19. data/assets/parse-stack-next-banner.svg +45 -0
  20. data/assets/parse-stack-next-social-preview.png +0 -0
  21. data/docs/atlas_vector_search_guide.md +511 -0
  22. data/docs/client_sdk_guide.md +1320 -0
  23. data/docs/mcp_guide.md +225 -104
  24. data/docs/mongodb_direct_guide.md +21 -4
  25. data/docs/usage_guide.md +585 -0
  26. data/examples/transaction_example.rb +28 -28
  27. data/lib/parse/acl_scope.rb +2 -2
  28. data/lib/parse/agent/mcp_rack_app.rb +184 -16
  29. data/lib/parse/agent/metadata_dsl.rb +16 -16
  30. data/lib/parse/agent/pipeline_validator.rb +28 -1
  31. data/lib/parse/agent/prompts.rb +5 -5
  32. data/lib/parse/agent/tools.rb +287 -14
  33. data/lib/parse/agent.rb +209 -12
  34. data/lib/parse/api/analytics.rb +27 -5
  35. data/lib/parse/api/files.rb +6 -2
  36. data/lib/parse/api/push.rb +21 -4
  37. data/lib/parse/api/server.rb +59 -0
  38. data/lib/parse/api/users.rb +26 -2
  39. data/lib/parse/atlas_search/index_manager.rb +84 -0
  40. data/lib/parse/atlas_search.rb +37 -9
  41. data/lib/parse/cache/pool.rb +88 -0
  42. data/lib/parse/cache/redis.rb +249 -0
  43. data/lib/parse/client/body_builder.rb +94 -0
  44. data/lib/parse/client/caching.rb +109 -9
  45. data/lib/parse/client/response.rb +27 -0
  46. data/lib/parse/client.rb +74 -3
  47. data/lib/parse/console.rb +203 -0
  48. data/lib/parse/embeddings/cohere.rb +484 -0
  49. data/lib/parse/embeddings/fixture.rb +130 -0
  50. data/lib/parse/embeddings/jina.rb +454 -0
  51. data/lib/parse/embeddings/local_http.rb +492 -0
  52. data/lib/parse/embeddings/openai.rb +520 -0
  53. data/lib/parse/embeddings/provider.rb +264 -0
  54. data/lib/parse/embeddings/qwen.rb +431 -0
  55. data/lib/parse/embeddings/voyage.rb +550 -0
  56. data/lib/parse/embeddings.rb +225 -0
  57. data/lib/parse/graphql/scalars.rb +53 -0
  58. data/lib/parse/graphql/type_generator.rb +264 -0
  59. data/lib/parse/graphql.rb +48 -0
  60. data/lib/parse/live_query/client.rb +24 -5
  61. data/lib/parse/live_query/subscription.rb +17 -6
  62. data/lib/parse/live_query.rb +9 -4
  63. data/lib/parse/model/associations/collection_proxy.rb +2 -2
  64. data/lib/parse/model/associations/has_many.rb +32 -1
  65. data/lib/parse/model/associations/has_one.rb +17 -0
  66. data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
  67. data/lib/parse/model/classes/user.rb +307 -11
  68. data/lib/parse/model/clp.rb +1 -1
  69. data/lib/parse/model/core/create_lock.rb +14 -2
  70. data/lib/parse/model/core/embed_managed.rb +296 -0
  71. data/lib/parse/model/core/fetching.rb +4 -4
  72. data/lib/parse/model/core/indexing.rb +53 -14
  73. data/lib/parse/model/core/parse_reference.rb +3 -3
  74. data/lib/parse/model/core/properties.rb +70 -1
  75. data/lib/parse/model/core/querying.rb +57 -1
  76. data/lib/parse/model/core/vector_searchable.rb +285 -0
  77. data/lib/parse/model/file.rb +16 -4
  78. data/lib/parse/model/model.rb +26 -10
  79. data/lib/parse/model/object.rb +63 -6
  80. data/lib/parse/model/pointer.rb +16 -2
  81. data/lib/parse/model/shortnames.rb +2 -0
  82. data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
  83. data/lib/parse/model/vector.rb +102 -0
  84. data/lib/parse/mongodb.rb +90 -8
  85. data/lib/parse/pipeline_security.rb +59 -2
  86. data/lib/parse/query/constraints.rb +16 -14
  87. data/lib/parse/query/ordering.rb +1 -1
  88. data/lib/parse/query.rb +137 -64
  89. data/lib/parse/stack/generators/templates/model.erb +2 -2
  90. data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
  91. data/lib/parse/stack/generators/templates/model_role.rb +1 -1
  92. data/lib/parse/stack/generators/templates/model_session.rb +1 -1
  93. data/lib/parse/stack/generators/templates/parse.rb +1 -1
  94. data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
  95. data/lib/parse/stack/version.rb +1 -1
  96. data/lib/parse/stack.rb +375 -73
  97. data/lib/parse/two_factor_auth/user_extension.rb +5 -2
  98. data/lib/parse/vector_search.rb +341 -0
  99. data/parse-stack-next.gemspec +10 -9
  100. data/scripts/docker/docker-compose.test.yml +18 -0
  101. data/scripts/start-parse.sh +6 -0
  102. data/scripts/vector_prototype/create_vector_index.js +105 -0
  103. data/scripts/vector_prototype/fetch_embeddings.py +241 -0
  104. data/scripts/vector_prototype/fixture_manifest.json +9 -0
  105. data/scripts/vector_prototype/query_prototype.rb +84 -0
  106. data/scripts/vector_prototype/run.sh +34 -0
  107. metadata +77 -5
  108. data/parse-stack.png +0 -0
@@ -0,0 +1,454 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ require "faraday"
5
+ require "json"
6
+ require "uri"
7
+ require_relative "provider"
8
+
9
+ module Parse
10
+ module Embeddings
11
+ # Jina AI embeddings provider. Wraps `POST /v1/embeddings`.
12
+ #
13
+ # Supported text-capable models:
14
+ #
15
+ # * **v5 text family** — `jina-embeddings-v5-text-small`,
16
+ # `jina-embeddings-v5-text-nano`.
17
+ # * **v5 omni family (text mode)** — `jina-embeddings-v5-omni-small`,
18
+ # `jina-embeddings-v5-omni-nano`. These models are multimodal at
19
+ # the network boundary but accept plain-text inputs through this
20
+ # provider just like the text-only variants.
21
+ # * **v4** — `jina-embeddings-v4` (Matryoshka, multimodal; text
22
+ # inputs only here).
23
+ # * **v3** — `jina-embeddings-v3` (Matryoshka, 32–1024).
24
+ # * **code embeddings** — `jina-code-embeddings-0.5b`,
25
+ # `jina-code-embeddings-1.5b`.
26
+ #
27
+ # Rerankers (`jina-reranker-*`), VLM (`jina-vlm`),
28
+ # image-only (`jina-clip-v2`), and `ReaderLM-v2` are NOT exposed
29
+ # through this provider — they don't fit the `embed_text` contract.
30
+ # They'll surface through forthcoming `embed_image` / rerank /
31
+ # generation hooks.
32
+ #
33
+ # @example registration
34
+ # Parse::Embeddings.register(:jina,
35
+ # Parse::Embeddings::Jina.new(
36
+ # api_key: ENV.fetch("JINA_API_KEY"),
37
+ # model: "jina-embeddings-v3",
38
+ # ))
39
+ #
40
+ # == Asymmetric input types
41
+ #
42
+ # Jina uses a `task` request field with the following canonical
43
+ # values (mapped from SDK-canonical `input_type:` Symbols):
44
+ #
45
+ # * `:search_query` → `"retrieval.query"`
46
+ # * `:search_document` → `"retrieval.passage"`
47
+ # * `:classification` → `"classification"`
48
+ # * `:clustering` → `"separation"`
49
+ #
50
+ # The `Provider#supports_input_type?` flag returns `true` here so
51
+ # cache-keying middleware can branch on it. Code-embedding models
52
+ # accept the `task` field and use it to bias the head.
53
+ #
54
+ # == Matryoshka dimensions
55
+ #
56
+ # `jina-embeddings-v3`, `jina-embeddings-v4`, and the v5 family
57
+ # support Matryoshka-style output-width truncation via the
58
+ # `dimensions` request field. Pass `dimensions:` to the constructor
59
+ # to set the desired width (must be ≤ the model's native width).
60
+ class Jina < Provider
61
+ class AuthenticationError < Error; end
62
+ class BadRequestError < Error; end
63
+ class RateLimitError < Error; end
64
+ class TransientError < Error; end
65
+
66
+ DEFAULT_BASE_URL = "https://api.jina.ai/v1"
67
+ DEFAULT_MODEL = "jina-embeddings-v3"
68
+ DEFAULT_TIMEOUT = 30
69
+ DEFAULT_OPEN_TIMEOUT = 5
70
+ DEFAULT_MAX_RETRIES = 3
71
+ DEFAULT_BATCH_SIZE = 100
72
+ MAX_RESPONSE_BYTES = 16 * 1024 * 1024
73
+
74
+ # Native vector widths. The Matryoshka-capable rows allow the
75
+ # caller to truncate via the `dimensions:` kwarg.
76
+ MODEL_DEFAULT_DIMENSIONS = {
77
+ "jina-embeddings-v5-omni-small" => 1024,
78
+ "jina-embeddings-v5-omni-nano" => 512,
79
+ "jina-embeddings-v5-text-small" => 1024,
80
+ "jina-embeddings-v5-text-nano" => 512,
81
+ "jina-embeddings-v4" => 2048,
82
+ "jina-embeddings-v3" => 1024,
83
+ "jina-code-embeddings-1.5b" => 1024,
84
+ "jina-code-embeddings-0.5b" => 1024,
85
+ }.freeze
86
+
87
+ MODEL_MAX_INPUT_TOKENS = {
88
+ "jina-embeddings-v5-omni-small" => 32_000,
89
+ "jina-embeddings-v5-omni-nano" => 32_000,
90
+ "jina-embeddings-v5-text-small" => 32_000,
91
+ "jina-embeddings-v5-text-nano" => 32_000,
92
+ "jina-embeddings-v4" => 32_000,
93
+ "jina-embeddings-v3" => 8_192,
94
+ "jina-code-embeddings-1.5b" => 32_000,
95
+ "jina-code-embeddings-0.5b" => 32_000,
96
+ }.freeze
97
+
98
+ # Models that accept the Matryoshka `dimensions` field. Other
99
+ # rows must pass the native width or no override.
100
+ MATRYOSHKA_MODELS = %w[
101
+ jina-embeddings-v5-omni-small
102
+ jina-embeddings-v5-omni-nano
103
+ jina-embeddings-v5-text-small
104
+ jina-embeddings-v5-text-nano
105
+ jina-embeddings-v4
106
+ jina-embeddings-v3
107
+ ].freeze
108
+
109
+ # Map SDK-canonical input_type symbols to Jina `task` strings.
110
+ INPUT_TYPE_WIRE_VALUES = {
111
+ search_query: "retrieval.query",
112
+ search_document: "retrieval.passage",
113
+ classification: "classification",
114
+ clustering: "separation",
115
+ }.freeze
116
+
117
+ # @param api_key [String] required. Sent as `Authorization: Bearer …`.
118
+ # @param model [String] one of {MODEL_DEFAULT_DIMENSIONS}'s keys.
119
+ # @param dimensions [Integer, nil] Matryoshka truncation. Only
120
+ # {MATRYOSHKA_MODELS} accept this; for others must be nil or
121
+ # equal to the native width.
122
+ # @param base_url [String] override. Must be HTTPS unless
123
+ # `allow_insecure_base_url: true`.
124
+ # @param timeout [Integer] read timeout, seconds.
125
+ # @param open_timeout [Integer] connect timeout, seconds.
126
+ # @param max_retries [Integer] retry attempts on 429/5xx/timeouts.
127
+ # @param embed_batch_size [Integer] inputs per request.
128
+ # @param allow_faraday_proxy [Boolean] opt in to proxy / env-proxy
129
+ # autodiscovery. Defaults `false`.
130
+ # @param allow_insecure_base_url [Boolean] permit `http://` base.
131
+ # @param connection [Faraday::Connection, nil] injection seam.
132
+ def initialize(
133
+ api_key:,
134
+ model: DEFAULT_MODEL,
135
+ dimensions: nil,
136
+ base_url: DEFAULT_BASE_URL,
137
+ timeout: DEFAULT_TIMEOUT,
138
+ open_timeout: DEFAULT_OPEN_TIMEOUT,
139
+ max_retries: DEFAULT_MAX_RETRIES,
140
+ embed_batch_size: DEFAULT_BATCH_SIZE,
141
+ allow_faraday_proxy: false,
142
+ allow_insecure_base_url: false,
143
+ connection: nil
144
+ )
145
+ validate_api_key!(api_key)
146
+ validate_model!(model)
147
+ validate_dimensions!(model, dimensions)
148
+ sanitized_base_url = validate_base_url!(base_url, allow_insecure_base_url)
149
+ validate_positive_integer!(:timeout, timeout)
150
+ validate_positive_integer!(:open_timeout, open_timeout)
151
+ validate_non_negative_integer!(:max_retries, max_retries)
152
+ validate_positive_integer!(:embed_batch_size, embed_batch_size)
153
+
154
+ @api_key = api_key
155
+ @model = model
156
+ @dimensions = dimensions || MODEL_DEFAULT_DIMENSIONS.fetch(model)
157
+ @base_url = sanitized_base_url
158
+ @timeout = timeout
159
+ @open_timeout = open_timeout
160
+ @max_retries = max_retries
161
+ @embed_batch_size = embed_batch_size
162
+ @allow_faraday_proxy = allow_faraday_proxy
163
+ @connection = connection || build_connection
164
+ end
165
+
166
+ def dimensions
167
+ @dimensions
168
+ end
169
+
170
+ def model_name
171
+ @model
172
+ end
173
+
174
+ def embed_batch_size
175
+ @embed_batch_size
176
+ end
177
+
178
+ def max_input_tokens
179
+ MODEL_MAX_INPUT_TOKENS[@model]
180
+ end
181
+
182
+ def normalize?
183
+ # Jina's v3/v4/v5 embeddings are documented unit-normalized.
184
+ true
185
+ end
186
+
187
+ def supports_input_type?
188
+ true
189
+ end
190
+
191
+ # @param strings [Array<String>] inputs.
192
+ # @param input_type [Symbol] one of {INPUT_TYPE_WIRE_VALUES}'s keys.
193
+ # @return [Array<Array<Float>>] vectors aligned 1:1 with `strings`.
194
+ def embed_text(strings, input_type: :search_document)
195
+ unless strings.is_a?(Array)
196
+ raise ArgumentError,
197
+ "Parse::Embeddings::Jina#embed_text expects Array<String> (got #{strings.class})."
198
+ end
199
+ return [] if strings.empty?
200
+ strings.each_with_index do |s, i|
201
+ unless s.is_a?(String)
202
+ raise ArgumentError,
203
+ "Parse::Embeddings::Jina#embed_text strings[#{i}] is not a String (#{s.class})."
204
+ end
205
+ if s.empty?
206
+ raise ArgumentError,
207
+ "Parse::Embeddings::Jina#embed_text strings[#{i}] is empty; Jina rejects empty inputs."
208
+ end
209
+ end
210
+ unless INPUT_TYPE_WIRE_VALUES.key?(input_type)
211
+ raise ArgumentError,
212
+ "Parse::Embeddings::Jina#embed_text input_type #{input_type.inspect} not in " \
213
+ "#{INPUT_TYPE_WIRE_VALUES.keys.inspect}."
214
+ end
215
+ task_value = INPUT_TYPE_WIRE_VALUES[input_type]
216
+
217
+ body = {
218
+ model: @model,
219
+ input: strings,
220
+ task: task_value,
221
+ embedding_type: "float",
222
+ }
223
+ # Forward `dimensions` only for Matryoshka-capable models whose
224
+ # active width differs from native. Sending it to a non-Matryoshka
225
+ # model would yield a 400 from Jina.
226
+ if MATRYOSHKA_MODELS.include?(@model) &&
227
+ @dimensions != MODEL_DEFAULT_DIMENSIONS.fetch(@model)
228
+ body[:dimensions] = @dimensions
229
+ end
230
+
231
+ instrument_embed(strings.length, input_type) do |emit_payload|
232
+ payload = post_embeddings(body)
233
+ if payload.is_a?(Hash) && payload["usage"].is_a?(Hash)
234
+ tt = payload["usage"]["total_tokens"]
235
+ emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
236
+ end
237
+ vectors = extract_vectors!(payload, strings.length)
238
+ validate_response!(strings.length, vectors)
239
+ end
240
+ end
241
+
242
+ def inspect_attrs
243
+ super.merge(base: safe_base_host, retries: @max_retries)
244
+ end
245
+
246
+ protected
247
+
248
+ def build_connection
249
+ headers = {
250
+ "Authorization" => "Bearer #{@api_key}",
251
+ "Content-Type" => "application/json",
252
+ "Accept" => "application/json",
253
+ "User-Agent" => "parse-stack-embeddings/#{user_agent_version}",
254
+ }
255
+
256
+ faraday_opts = { url: @base_url, headers: headers }
257
+ faraday_opts[:proxy] = nil unless @allow_faraday_proxy
258
+
259
+ conn = Faraday.new(**faraday_opts) do |f|
260
+ f.options.timeout = @timeout
261
+ f.options.open_timeout = @open_timeout
262
+ f.adapter Faraday.default_adapter
263
+ end
264
+ conn.proxy = nil if !@allow_faraday_proxy && conn.respond_to?(:proxy=)
265
+ conn
266
+ end
267
+
268
+ def post_embeddings(body)
269
+ attempts = 0
270
+ loop do
271
+ attempts += 1
272
+ begin
273
+ response = @connection.post("embeddings") do |req|
274
+ req.body = body.to_json
275
+ end
276
+ rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
277
+ if attempts > @max_retries
278
+ raise TransientError, "Parse::Embeddings::Jina: #{e.class} after #{attempts} attempt(s)."
279
+ end
280
+ sleep(backoff_seconds(attempts))
281
+ next
282
+ end
283
+
284
+ status = response.status
285
+ return parse_json_body!(response.body) if status >= 200 && status < 300
286
+
287
+ if status == 401
288
+ raise AuthenticationError, "Parse::Embeddings::Jina: 401 Unauthorized — check api_key."
289
+ end
290
+ if status == 429
291
+ if attempts > @max_retries
292
+ raise RateLimitError, "Parse::Embeddings::Jina: 429 rate limited after #{attempts} attempt(s)."
293
+ end
294
+ sleep(retry_after_seconds(response) || backoff_seconds(attempts))
295
+ next
296
+ end
297
+ if status >= 500
298
+ if attempts > @max_retries
299
+ raise TransientError, "Parse::Embeddings::Jina: #{status} after #{attempts} attempt(s)."
300
+ end
301
+ sleep(backoff_seconds(attempts))
302
+ next
303
+ end
304
+ raise BadRequestError, "Parse::Embeddings::Jina: #{status} from POST /embeddings."
305
+ end
306
+ end
307
+
308
+ def parse_json_body!(body)
309
+ s = body.to_s
310
+ if s.bytesize > MAX_RESPONSE_BYTES
311
+ raise InvalidResponseError,
312
+ "Parse::Embeddings::Jina: response body exceeds #{MAX_RESPONSE_BYTES} bytes " \
313
+ "(#{s.bytesize}). Refusing to parse."
314
+ end
315
+ JSON.parse(s, max_nesting: 32)
316
+ rescue JSON::ParserError => e
317
+ raise InvalidResponseError,
318
+ "Parse::Embeddings::Jina: response is not valid JSON (#{e.message})."
319
+ end
320
+
321
+ def extract_vectors!(payload, input_count)
322
+ unless payload.is_a?(Hash)
323
+ raise InvalidResponseError,
324
+ "Parse::Embeddings::Jina: response body is not a JSON object."
325
+ end
326
+ data = payload["data"]
327
+ unless data.is_a?(Array)
328
+ raise InvalidResponseError,
329
+ "Parse::Embeddings::Jina: response.data is not an Array."
330
+ end
331
+ if data.length != input_count
332
+ raise InvalidResponseError,
333
+ "Parse::Embeddings::Jina: response.data.length #{data.length} != input count #{input_count}."
334
+ end
335
+ sorted = data.each_with_index.map do |entry, i|
336
+ unless entry.is_a?(Hash)
337
+ raise InvalidResponseError,
338
+ "Parse::Embeddings::Jina: response.data[#{i}] is not a JSON object."
339
+ end
340
+ idx = entry["index"]
341
+ unless idx.is_a?(Integer) && idx >= 0 && idx < input_count
342
+ raise InvalidResponseError,
343
+ "Parse::Embeddings::Jina: response.data[#{i}].index #{idx.inspect} out of range."
344
+ end
345
+ [idx, entry["embedding"]]
346
+ end
347
+ indices = sorted.map(&:first)
348
+ if indices.uniq.length != indices.length
349
+ raise InvalidResponseError, "Parse::Embeddings::Jina: duplicate index in response.data."
350
+ end
351
+ sorted.sort_by(&:first).map(&:last)
352
+ end
353
+
354
+ def backoff_seconds(attempt)
355
+ [0.5 * (2**(attempt - 1)), 30.0].min
356
+ end
357
+
358
+ def retry_after_seconds(response)
359
+ ra = response.respond_to?(:headers) ? response.headers["retry-after"] || response.headers["Retry-After"] : nil
360
+ return nil unless ra
361
+ v = ra.to_f
362
+ v.positive? ? [v, 60.0].min : nil
363
+ end
364
+
365
+ private
366
+
367
+ def validate_api_key!(api_key)
368
+ unless api_key.is_a?(String) && !api_key.empty?
369
+ raise ArgumentError, "Parse::Embeddings::Jina: api_key must be a non-empty String."
370
+ end
371
+ end
372
+
373
+ def validate_model!(model)
374
+ unless MODEL_DEFAULT_DIMENSIONS.key?(model)
375
+ raise ArgumentError,
376
+ "Parse::Embeddings::Jina: unknown model #{model.inspect}. " \
377
+ "Supported: #{MODEL_DEFAULT_DIMENSIONS.keys.inspect}."
378
+ end
379
+ end
380
+
381
+ def validate_dimensions!(model, dimensions)
382
+ return if dimensions.nil?
383
+ unless dimensions.is_a?(Integer) && dimensions.positive?
384
+ raise ArgumentError,
385
+ "Parse::Embeddings::Jina: dimensions must be a positive Integer (got #{dimensions.inspect})."
386
+ end
387
+ native = MODEL_DEFAULT_DIMENSIONS.fetch(model)
388
+ if dimensions > native
389
+ raise ArgumentError,
390
+ "Parse::Embeddings::Jina: dimensions #{dimensions} exceeds native #{native} for #{model}."
391
+ end
392
+ if !MATRYOSHKA_MODELS.include?(model) && dimensions != native
393
+ raise ArgumentError,
394
+ "Parse::Embeddings::Jina: model #{model.inspect} does not support custom dimensions " \
395
+ "(Matryoshka-capable models: #{MATRYOSHKA_MODELS.inspect})."
396
+ end
397
+ end
398
+
399
+ def validate_base_url!(base_url, allow_insecure)
400
+ unless base_url.is_a?(String) && !base_url.empty?
401
+ raise ArgumentError, "Parse::Embeddings::Jina: base_url must be a non-empty String."
402
+ end
403
+ begin
404
+ uri = URI.parse(base_url)
405
+ rescue URI::InvalidURIError => e
406
+ raise ArgumentError, "Parse::Embeddings::Jina: base_url is not a valid URL (#{e.message})."
407
+ end
408
+ unless %w[http https].include?(uri.scheme)
409
+ raise ArgumentError,
410
+ "Parse::Embeddings::Jina: base_url must be http(s):// (got scheme #{uri.scheme.inspect})."
411
+ end
412
+ if uri.scheme == "http" && !allow_insecure
413
+ raise ArgumentError,
414
+ "Parse::Embeddings::Jina: refusing http:// base_url. Pass allow_insecure_base_url: true to opt in."
415
+ end
416
+ if uri.host.nil? || uri.host.empty?
417
+ raise ArgumentError, "Parse::Embeddings::Jina: base_url must include a host."
418
+ end
419
+ if uri.userinfo
420
+ raise ArgumentError,
421
+ "Parse::Embeddings::Jina: base_url must not contain userinfo (credentials). " \
422
+ "Use the api_key parameter and a clean URL."
423
+ end
424
+ uri.to_s
425
+ end
426
+
427
+ def validate_positive_integer!(name, value)
428
+ unless value.is_a?(Integer) && value.positive?
429
+ raise ArgumentError,
430
+ "Parse::Embeddings::Jina: #{name} must be a positive Integer (got #{value.inspect})."
431
+ end
432
+ end
433
+
434
+ def validate_non_negative_integer!(name, value)
435
+ unless value.is_a?(Integer) && value >= 0
436
+ raise ArgumentError,
437
+ "Parse::Embeddings::Jina: #{name} must be a non-negative Integer (got #{value.inspect})."
438
+ end
439
+ end
440
+
441
+ def user_agent_version
442
+ defined?(Parse::Stack::VERSION) ? Parse::Stack::VERSION : "unknown"
443
+ end
444
+
445
+ def safe_base_host
446
+ uri = URI.parse(@base_url)
447
+ host = uri.host
448
+ host && !host.empty? ? "#{uri.scheme}://#{host}" : nil
449
+ rescue URI::InvalidURIError
450
+ nil
451
+ end
452
+ end
453
+ end
454
+ end