parse-stack-next 4.5.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.env.sample +17 -3
  3. data/.github/workflows/codeql.yml +44 -0
  4. data/.github/workflows/docs.yml +39 -0
  5. data/.github/workflows/ruby.yml +8 -6
  6. data/.gitignore +4 -0
  7. data/CHANGELOG.md +295 -72
  8. data/Gemfile.lock +10 -3
  9. data/LICENSE.txt +1 -1
  10. data/README.md +190 -219
  11. data/Rakefile +1 -1
  12. data/SECURITY.md +30 -0
  13. data/assets/parse-stack-next-avatar.png +0 -0
  14. data/assets/parse-stack-next-avatar.svg +37 -0
  15. data/assets/parse-stack-next-banner.png +0 -0
  16. data/assets/parse-stack-next-banner.svg +45 -0
  17. data/assets/parse-stack-next-social-preview.png +0 -0
  18. data/docs/atlas_vector_search_guide.md +511 -0
  19. data/docs/client_sdk_guide.md +1320 -0
  20. data/docs/mcp_guide.md +225 -104
  21. data/docs/mongodb_direct_guide.md +21 -4
  22. data/docs/usage_guide.md +585 -0
  23. data/examples/transaction_example.rb +28 -28
  24. data/lib/parse/acl_scope.rb +2 -2
  25. data/lib/parse/agent/mcp_rack_app.rb +184 -16
  26. data/lib/parse/agent/metadata_dsl.rb +16 -16
  27. data/lib/parse/agent/pipeline_validator.rb +28 -1
  28. data/lib/parse/agent/prompts.rb +5 -5
  29. data/lib/parse/agent/tools.rb +287 -14
  30. data/lib/parse/agent.rb +209 -12
  31. data/lib/parse/api/analytics.rb +27 -5
  32. data/lib/parse/api/files.rb +6 -2
  33. data/lib/parse/api/push.rb +21 -4
  34. data/lib/parse/api/server.rb +59 -0
  35. data/lib/parse/api/users.rb +26 -2
  36. data/lib/parse/atlas_search/index_manager.rb +84 -0
  37. data/lib/parse/atlas_search.rb +37 -9
  38. data/lib/parse/cache/pool.rb +73 -0
  39. data/lib/parse/cache/redis.rb +190 -0
  40. data/lib/parse/client/body_builder.rb +94 -0
  41. data/lib/parse/client/caching.rb +109 -9
  42. data/lib/parse/client/response.rb +27 -0
  43. data/lib/parse/client.rb +74 -3
  44. data/lib/parse/console.rb +203 -0
  45. data/lib/parse/embeddings/cohere.rb +484 -0
  46. data/lib/parse/embeddings/fixture.rb +130 -0
  47. data/lib/parse/embeddings/jina.rb +454 -0
  48. data/lib/parse/embeddings/local_http.rb +492 -0
  49. data/lib/parse/embeddings/openai.rb +520 -0
  50. data/lib/parse/embeddings/provider.rb +264 -0
  51. data/lib/parse/embeddings/qwen.rb +431 -0
  52. data/lib/parse/embeddings/voyage.rb +550 -0
  53. data/lib/parse/embeddings.rb +225 -0
  54. data/lib/parse/graphql/scalars.rb +53 -0
  55. data/lib/parse/graphql/type_generator.rb +264 -0
  56. data/lib/parse/graphql.rb +48 -0
  57. data/lib/parse/live_query/client.rb +24 -5
  58. data/lib/parse/live_query/subscription.rb +17 -6
  59. data/lib/parse/live_query.rb +9 -4
  60. data/lib/parse/model/associations/collection_proxy.rb +2 -2
  61. data/lib/parse/model/associations/has_many.rb +32 -1
  62. data/lib/parse/model/associations/has_one.rb +17 -0
  63. data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
  64. data/lib/parse/model/classes/user.rb +307 -11
  65. data/lib/parse/model/clp.rb +1 -1
  66. data/lib/parse/model/core/embed_managed.rb +296 -0
  67. data/lib/parse/model/core/fetching.rb +4 -4
  68. data/lib/parse/model/core/indexing.rb +53 -14
  69. data/lib/parse/model/core/parse_reference.rb +3 -3
  70. data/lib/parse/model/core/properties.rb +70 -1
  71. data/lib/parse/model/core/querying.rb +57 -1
  72. data/lib/parse/model/core/vector_searchable.rb +285 -0
  73. data/lib/parse/model/file.rb +16 -4
  74. data/lib/parse/model/model.rb +26 -10
  75. data/lib/parse/model/object.rb +63 -6
  76. data/lib/parse/model/pointer.rb +16 -2
  77. data/lib/parse/model/shortnames.rb +2 -0
  78. data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
  79. data/lib/parse/model/vector.rb +102 -0
  80. data/lib/parse/mongodb.rb +90 -8
  81. data/lib/parse/pipeline_security.rb +59 -2
  82. data/lib/parse/query/constraints.rb +16 -14
  83. data/lib/parse/query/ordering.rb +1 -1
  84. data/lib/parse/query.rb +137 -64
  85. data/lib/parse/stack/generators/templates/model.erb +2 -2
  86. data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
  87. data/lib/parse/stack/generators/templates/model_role.rb +1 -1
  88. data/lib/parse/stack/generators/templates/model_session.rb +1 -1
  89. data/lib/parse/stack/generators/templates/parse.rb +1 -1
  90. data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
  91. data/lib/parse/stack/version.rb +1 -1
  92. data/lib/parse/stack.rb +375 -73
  93. data/lib/parse/two_factor_auth/user_extension.rb +5 -2
  94. data/lib/parse/vector_search.rb +341 -0
  95. data/parse-stack-next.gemspec +10 -9
  96. data/scripts/docker/docker-compose.test.yml +18 -0
  97. data/scripts/start-parse.sh +6 -0
  98. data/scripts/vector_prototype/create_vector_index.js +105 -0
  99. data/scripts/vector_prototype/fetch_embeddings.py +241 -0
  100. data/scripts/vector_prototype/fixture_manifest.json +9 -0
  101. data/scripts/vector_prototype/query_prototype.rb +84 -0
  102. data/scripts/vector_prototype/run.sh +34 -0
  103. metadata +75 -5
  104. data/parse-stack.png +0 -0
@@ -0,0 +1,492 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ require "faraday"
5
+ require "ipaddr"
6
+ require "json"
7
+ require "resolv"
8
+ require "uri"
9
+ require_relative "provider"
10
+ require_relative "../model/file"
11
+
12
+ module Parse
13
+ module Embeddings
14
+ # Generic OpenAI-compatible local embedding provider. Talks to any
15
+ # server that exposes `POST <base_url>/embeddings` with the OpenAI
16
+ # request/response shape — covers Ollama (`/v1`), LM Studio (`/v1`),
17
+ # vLLM, llama.cpp's `server`, and any reverse-proxy that translates
18
+ # to a local model runner.
19
+ #
20
+ # @example Ollama on the same host
21
+ # Parse::Embeddings.register(:ollama,
22
+ # Parse::Embeddings::LocalHTTP.new(
23
+ # base_url: "http://localhost:11434/v1",
24
+ # model: "nomic-embed-text",
25
+ # dimensions: 768,
26
+ # allow_private_endpoint: true,
27
+ # ))
28
+ #
29
+ # @example public OpenAI-compatible proxy (e.g. internal gateway on a public DNS name)
30
+ # Parse::Embeddings.register(:gateway,
31
+ # Parse::Embeddings::LocalHTTP.new(
32
+ # base_url: "https://embeddings.example.com/v1",
33
+ # api_key: ENV.fetch("GATEWAY_API_KEY"),
34
+ # model: "bge-small-en-v1.5",
35
+ # dimensions: 384,
36
+ # ))
37
+ #
38
+ # == SSRF gate
39
+ #
40
+ # The `base_url` is resolved at construction time and the resolved
41
+ # addresses are checked against {Parse::File::BLOCKED_CIDRS}
42
+ # (loopback, RFC1918, link-local, cloud-metadata, CGNAT, IPv6 ULA,
43
+ # …). When ANY resolved address falls in a private/internal range,
44
+ # the constructor refuses unless the caller opts in via
45
+ # `allow_private_endpoint: true`.
46
+ #
47
+ # The opt-in is a deliberate, audit-able gate — Parse::Embeddings
48
+ # registration is configuration code, not user input, so opting in
49
+ # to "yes, this base_url really is my Ollama on localhost" is a
50
+ # one-line decision by the operator at boot time. A `Kernel#warn`
51
+ # fires when the opt-in is taken so the choice shows up in operator
52
+ # logs / `bundle exec rake about` output.
53
+ #
54
+ # `http://` base URLs are accepted with `allow_private_endpoint: true`
55
+ # (the typical local-runner deployment), and refused otherwise unless
56
+ # the caller also passes `allow_insecure_base_url: true` (escape
57
+ # hatch for self-signed internal HTTPS proxies fronted by http://).
58
+ #
59
+ # == Why no fixed model whitelist
60
+ #
61
+ # Ollama, LM Studio, and vLLM all serve operator-chosen models —
62
+ # we cannot enumerate "supported" models the way {OpenAI} can. The
63
+ # constructor instead takes the `dimensions:` explicitly, and the
64
+ # provider's {#validate_response!} (inherited) enforces that every
65
+ # returned vector matches that width. Mis-specified dimensions
66
+ # surface as {InvalidResponseError} on the first embed call.
67
+ #
68
+ # == Security
69
+ #
70
+ # * Configure-time SSRF gate (above).
71
+ # * The Faraday connection refuses `proxy:` unless the caller opts
72
+ # in via `allow_faraday_proxy: true`. Env-proxy autodiscovery is
73
+ # suppressed by default — same model as {OpenAI}.
74
+ # * `#inspect` (inherited from {Provider}) never surfaces `@api_key`.
75
+ class LocalHTTP < Provider
76
+ class AuthenticationError < Error; end
77
+ class BadRequestError < Error; end
78
+ class RateLimitError < Error; end
79
+ class TransientError < Error; end
80
+
81
+ DEFAULT_TIMEOUT = 30
82
+ DEFAULT_OPEN_TIMEOUT = 5
83
+ DEFAULT_MAX_RETRIES = 3
84
+ DEFAULT_BATCH_SIZE = 32
85
+ MAX_RESPONSE_BYTES = 16 * 1024 * 1024
86
+
87
+ # @param base_url [String] required. Must be http(s):// with a host.
88
+ # @param model [String] required. Identifier the local server expects
89
+ # in the `model` request field. Persisted to `embedding_meta`.
90
+ # @param dimensions [Integer] required. Width of vectors the local
91
+ # model produces. Enforced by {Provider#validate_response!}.
92
+ # @param api_key [String, nil] optional. When present, sent as
93
+ # `Authorization: Bearer …`. Local runners typically accept any
94
+ # value or no header.
95
+ # @param normalize [Boolean] whether the local model returns
96
+ # unit-normalized vectors. Defaults to `false` (Ollama and most
97
+ # local models do NOT normalize; bge-* and OpenAI do). Affects
98
+ # similarity metric selection downstream.
99
+ # @param timeout [Integer] read timeout, seconds.
100
+ # @param open_timeout [Integer] connect timeout, seconds.
101
+ # @param max_retries [Integer] retry attempts on 429/5xx/timeouts.
102
+ # @param embed_batch_size [Integer] inputs per request.
103
+ # @param allow_private_endpoint [Boolean] required when `base_url`
104
+ # resolves to a private/internal/loopback address. Defaults
105
+ # `false`; opting in emits a one-time warning per provider
106
+ # instance.
107
+ # @param allow_insecure_base_url [Boolean] permit `http://` for
108
+ # PUBLIC base URLs. Defaults `false`. Independent of
109
+ # `allow_private_endpoint` (which already implies http:// is fine
110
+ # for the local case).
111
+ # @param allow_faraday_proxy [Boolean] opt in to proxy / env-proxy
112
+ # autodiscovery. Defaults `false`.
113
+ # @param connection [Faraday::Connection, nil] injection seam.
114
+ def initialize(
115
+ base_url:,
116
+ model:,
117
+ dimensions:,
118
+ api_key: nil,
119
+ normalize: false,
120
+ timeout: DEFAULT_TIMEOUT,
121
+ open_timeout: DEFAULT_OPEN_TIMEOUT,
122
+ max_retries: DEFAULT_MAX_RETRIES,
123
+ embed_batch_size: DEFAULT_BATCH_SIZE,
124
+ allow_private_endpoint: false,
125
+ allow_insecure_base_url: false,
126
+ allow_faraday_proxy: false,
127
+ connection: nil
128
+ )
129
+ validate_model!(model)
130
+ validate_dimensions!(dimensions)
131
+ validate_optional_api_key!(api_key)
132
+ unless [true, false].include?(normalize)
133
+ raise ArgumentError,
134
+ "Parse::Embeddings::LocalHTTP: normalize must be true or false (got #{normalize.inspect})."
135
+ end
136
+ validate_positive_integer!(:timeout, timeout)
137
+ validate_positive_integer!(:open_timeout, open_timeout)
138
+ validate_non_negative_integer!(:max_retries, max_retries)
139
+ validate_positive_integer!(:embed_batch_size, embed_batch_size)
140
+
141
+ sanitized_base_url, resolved_addrs, is_private =
142
+ validate_base_url_and_gate_ssrf!(base_url,
143
+ allow_private_endpoint: allow_private_endpoint,
144
+ allow_insecure_base_url: allow_insecure_base_url)
145
+ if is_private
146
+ # Audit log. Emits once per instance — Kernel#warn so it lands
147
+ # on stderr and any logger that captures it. Operators running
148
+ # a hardened environment can grep this to confirm every
149
+ # private-endpoint opt-in was intentional.
150
+ warn "Parse::Embeddings::LocalHTTP: allow_private_endpoint=true for #{sanitized_base_url} — " \
151
+ "resolved to private address(es) #{resolved_addrs.map(&:to_s).inspect}."
152
+ end
153
+
154
+ @base_url = sanitized_base_url
155
+ @model = model
156
+ @dimensions = dimensions
157
+ @api_key = api_key
158
+ @normalize = normalize
159
+ @timeout = timeout
160
+ @open_timeout = open_timeout
161
+ @max_retries = max_retries
162
+ @embed_batch_size = embed_batch_size
163
+ @allow_faraday_proxy = allow_faraday_proxy
164
+ @connection = connection || build_connection
165
+ end
166
+
167
+ def dimensions
168
+ @dimensions
169
+ end
170
+
171
+ def model_name
172
+ @model
173
+ end
174
+
175
+ def embed_batch_size
176
+ @embed_batch_size
177
+ end
178
+
179
+ def normalize?
180
+ @normalize
181
+ end
182
+
183
+ def supports_input_type?
184
+ # The OpenAI-compatible local runners do not asymmetrize. Some
185
+ # models (bge-*) have a documented query prefix, but the local
186
+ # server itself doesn't expose `input_type:` — callers wrap the
187
+ # query text instead. We accept the kwarg for cache-key stability
188
+ # but drop it at the wire level.
189
+ false
190
+ end
191
+
192
+ # @param strings [Array<String>] inputs.
193
+ # @param input_type [Symbol] accepted for forward compatibility,
194
+ # ignored at the wire level.
195
+ # @return [Array<Array<Float>>] vectors aligned 1:1 with `strings`.
196
+ def embed_text(strings, input_type: :search_document)
197
+ unless strings.is_a?(Array)
198
+ raise ArgumentError,
199
+ "Parse::Embeddings::LocalHTTP#embed_text expects Array<String> (got #{strings.class})."
200
+ end
201
+ return [] if strings.empty?
202
+ strings.each_with_index do |s, i|
203
+ unless s.is_a?(String)
204
+ raise ArgumentError,
205
+ "Parse::Embeddings::LocalHTTP#embed_text strings[#{i}] is not a String (#{s.class})."
206
+ end
207
+ if s.empty?
208
+ raise ArgumentError,
209
+ "Parse::Embeddings::LocalHTTP#embed_text strings[#{i}] is empty; local runners typically reject empty inputs."
210
+ end
211
+ end
212
+
213
+ body = { input: strings, model: @model }
214
+
215
+ instrument_embed(strings.length, input_type) do |emit_payload|
216
+ payload = post_embeddings(body)
217
+ # Local runners may or may not include `usage`. When present,
218
+ # forward total_tokens to the AS::N payload.
219
+ if payload.is_a?(Hash) && payload["usage"].is_a?(Hash)
220
+ tt = payload["usage"]["total_tokens"]
221
+ emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
222
+ end
223
+ vectors = extract_vectors!(payload, strings.length)
224
+ validate_response!(strings.length, vectors)
225
+ end
226
+ end
227
+
228
+ def inspect_attrs
229
+ super.merge(base: safe_base_host, retries: @max_retries)
230
+ end
231
+
232
+ protected
233
+
234
+ def build_connection
235
+ headers = {
236
+ "Content-Type" => "application/json",
237
+ "Accept" => "application/json",
238
+ "User-Agent" => "parse-stack-embeddings/#{user_agent_version}",
239
+ }
240
+ headers["Authorization"] = "Bearer #{@api_key}" if @api_key
241
+
242
+ faraday_opts = { url: @base_url, headers: headers }
243
+ faraday_opts[:proxy] = nil unless @allow_faraday_proxy
244
+
245
+ conn = Faraday.new(**faraday_opts) do |f|
246
+ f.options.timeout = @timeout
247
+ f.options.open_timeout = @open_timeout
248
+ f.adapter Faraday.default_adapter
249
+ end
250
+ conn.proxy = nil if !@allow_faraday_proxy && conn.respond_to?(:proxy=)
251
+ conn
252
+ end
253
+
254
+ def post_embeddings(body)
255
+ attempts = 0
256
+ loop do
257
+ attempts += 1
258
+ begin
259
+ response = @connection.post("embeddings") do |req|
260
+ req.body = body.to_json
261
+ end
262
+ rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
263
+ if attempts > @max_retries
264
+ raise TransientError, "Parse::Embeddings::LocalHTTP: #{e.class} after #{attempts} attempt(s)."
265
+ end
266
+ sleep(backoff_seconds(attempts))
267
+ next
268
+ end
269
+
270
+ status = response.status
271
+ return parse_json_body!(response.body) if status >= 200 && status < 300
272
+
273
+ if status == 401
274
+ raise AuthenticationError,
275
+ "Parse::Embeddings::LocalHTTP: 401 Unauthorized — check api_key."
276
+ end
277
+ if status == 429
278
+ if attempts > @max_retries
279
+ raise RateLimitError,
280
+ "Parse::Embeddings::LocalHTTP: 429 rate limited after #{attempts} attempt(s)."
281
+ end
282
+ sleep(retry_after_seconds(response) || backoff_seconds(attempts))
283
+ next
284
+ end
285
+ if status >= 500
286
+ if attempts > @max_retries
287
+ raise TransientError,
288
+ "Parse::Embeddings::LocalHTTP: #{status} after #{attempts} attempt(s)."
289
+ end
290
+ sleep(backoff_seconds(attempts))
291
+ next
292
+ end
293
+ raise BadRequestError,
294
+ "Parse::Embeddings::LocalHTTP: #{status} from POST /embeddings."
295
+ end
296
+ end
297
+
298
+ def parse_json_body!(body)
299
+ s = body.to_s
300
+ if s.bytesize > MAX_RESPONSE_BYTES
301
+ raise InvalidResponseError,
302
+ "Parse::Embeddings::LocalHTTP: response body exceeds #{MAX_RESPONSE_BYTES} bytes " \
303
+ "(#{s.bytesize}). Refusing to parse."
304
+ end
305
+ JSON.parse(s, max_nesting: 32)
306
+ rescue JSON::ParserError => e
307
+ raise InvalidResponseError,
308
+ "Parse::Embeddings::LocalHTTP: response is not valid JSON (#{e.message})."
309
+ end
310
+
311
+ # Accept the OpenAI-compatible shape. Some local runners omit
312
+ # `index` or return data in request order without it; tolerate
313
+ # both forms by falling back to positional alignment when the
314
+ # field is missing across the entire response.
315
+ def extract_vectors!(payload, input_count)
316
+ unless payload.is_a?(Hash)
317
+ raise InvalidResponseError,
318
+ "Parse::Embeddings::LocalHTTP: response body is not a JSON object."
319
+ end
320
+ data = payload["data"]
321
+ unless data.is_a?(Array)
322
+ raise InvalidResponseError,
323
+ "Parse::Embeddings::LocalHTTP: response.data is not an Array."
324
+ end
325
+ if data.length != input_count
326
+ raise InvalidResponseError,
327
+ "Parse::Embeddings::LocalHTTP: response.data.length #{data.length} != input count #{input_count}."
328
+ end
329
+ all_have_index = data.all? { |e| e.is_a?(Hash) && e["index"].is_a?(Integer) }
330
+ if all_have_index
331
+ sorted = data.map do |entry|
332
+ idx = entry["index"]
333
+ unless idx >= 0 && idx < input_count
334
+ raise InvalidResponseError,
335
+ "Parse::Embeddings::LocalHTTP: response.data entry index #{idx} out of range."
336
+ end
337
+ [idx, entry["embedding"]]
338
+ end
339
+ if sorted.map(&:first).uniq.length != sorted.length
340
+ raise InvalidResponseError,
341
+ "Parse::Embeddings::LocalHTTP: duplicate index in response.data."
342
+ end
343
+ sorted.sort_by(&:first).map(&:last)
344
+ else
345
+ data.each_with_index.map do |entry, i|
346
+ unless entry.is_a?(Hash)
347
+ raise InvalidResponseError,
348
+ "Parse::Embeddings::LocalHTTP: response.data[#{i}] is not a JSON object."
349
+ end
350
+ entry["embedding"]
351
+ end
352
+ end
353
+ end
354
+
355
+ def backoff_seconds(attempt)
356
+ [0.5 * (2**(attempt - 1)), 30.0].min
357
+ end
358
+
359
+ def retry_after_seconds(response)
360
+ ra = response.respond_to?(:headers) ? response.headers["retry-after"] || response.headers["Retry-After"] : nil
361
+ return nil unless ra
362
+ v = ra.to_f
363
+ v.positive? ? [v, 60.0].min : nil
364
+ end
365
+
366
+ private
367
+
368
+ # @return [Array(String, Array<IPAddr>, Boolean)] sanitized URL,
369
+ # resolved addresses (may be empty when unresolved AND opted-in
370
+ # for a private endpoint via hostname), and a flag indicating
371
+ # whether the host resolved to a private address.
372
+ def validate_base_url_and_gate_ssrf!(base_url, allow_private_endpoint:, allow_insecure_base_url:)
373
+ unless base_url.is_a?(String) && !base_url.empty?
374
+ raise ArgumentError,
375
+ "Parse::Embeddings::LocalHTTP: base_url must be a non-empty String."
376
+ end
377
+ begin
378
+ uri = URI.parse(base_url)
379
+ rescue URI::InvalidURIError => e
380
+ raise ArgumentError,
381
+ "Parse::Embeddings::LocalHTTP: base_url is not a valid URL (#{e.message})."
382
+ end
383
+ unless %w[http https].include?(uri.scheme)
384
+ raise ArgumentError,
385
+ "Parse::Embeddings::LocalHTTP: base_url must be http(s):// (got scheme #{uri.scheme.inspect})."
386
+ end
387
+ host = uri.host
388
+ if host.nil? || host.empty?
389
+ raise ArgumentError,
390
+ "Parse::Embeddings::LocalHTTP: base_url must include a host."
391
+ end
392
+ if uri.userinfo
393
+ raise ArgumentError,
394
+ "Parse::Embeddings::LocalHTTP: base_url must not contain userinfo (credentials). " \
395
+ "Use the api_key parameter and a clean URL."
396
+ end
397
+
398
+ resolved = Parse::File.resolve_addresses(host)
399
+ if resolved.empty?
400
+ # DNS failure at construction time. Without resolved addresses
401
+ # the SSRF gate has nothing to evaluate, so a hostname that
402
+ # fails to resolve now but resolves later (lazy propagation,
403
+ # attacker-timed flip, split-horizon DNS) would skip the gate
404
+ # entirely. Refuse fail-closed unless the operator has already
405
+ # opted into private endpoints — in which case a transient
406
+ # DNS failure is an acceptable trade-off for the lazy-runner
407
+ # case (Ollama starting after the Rails boot).
408
+ unless allow_private_endpoint
409
+ raise ArgumentError,
410
+ "Parse::Embeddings::LocalHTTP: could not resolve base_url host #{host.inspect}. " \
411
+ "Pass allow_private_endpoint: true if the host is intentionally local/transient."
412
+ end
413
+ end
414
+ # Empty-resolution under allow_private_endpoint is treated as
415
+ # private for the http:// scheme gate below, since the operator
416
+ # has already asserted local-class trust.
417
+ is_private =
418
+ if resolved.empty?
419
+ allow_private_endpoint
420
+ else
421
+ resolved.any? { |ip| Parse::File::BLOCKED_CIDRS.any? { |cidr| cidr.include?(ip) } }
422
+ end
423
+
424
+ if is_private && !allow_private_endpoint
425
+ raise ArgumentError,
426
+ "Parse::Embeddings::LocalHTTP: refusing base_url that resolves to a private/internal " \
427
+ "address (#{resolved.map(&:to_s).inspect}). Pass allow_private_endpoint: true to opt in."
428
+ end
429
+
430
+ # http:// scheme: allowed when the endpoint is private (the
431
+ # typical local-runner case) OR the caller has explicitly
432
+ # opted into insecure public HTTP. Refused otherwise.
433
+ if uri.scheme == "http" && !is_private && !allow_insecure_base_url
434
+ raise ArgumentError,
435
+ "Parse::Embeddings::LocalHTTP: refusing http:// base_url for a public host. " \
436
+ "Pass allow_private_endpoint: true (private hosts) or allow_insecure_base_url: true " \
437
+ "(public hosts, escape hatch only)."
438
+ end
439
+
440
+ [uri.to_s, resolved, is_private]
441
+ end
442
+
443
+ def validate_model!(model)
444
+ unless model.is_a?(String) && !model.empty?
445
+ raise ArgumentError,
446
+ "Parse::Embeddings::LocalHTTP: model must be a non-empty String."
447
+ end
448
+ end
449
+
450
+ def validate_dimensions!(dimensions)
451
+ unless dimensions.is_a?(Integer) && dimensions.positive?
452
+ raise ArgumentError,
453
+ "Parse::Embeddings::LocalHTTP: dimensions must be a positive Integer (got #{dimensions.inspect})."
454
+ end
455
+ end
456
+
457
+ def validate_optional_api_key!(api_key)
458
+ return if api_key.nil?
459
+ unless api_key.is_a?(String) && !api_key.empty?
460
+ raise ArgumentError,
461
+ "Parse::Embeddings::LocalHTTP: api_key, when provided, must be a non-empty String."
462
+ end
463
+ end
464
+
465
+ def validate_positive_integer!(name, value)
466
+ unless value.is_a?(Integer) && value.positive?
467
+ raise ArgumentError,
468
+ "Parse::Embeddings::LocalHTTP: #{name} must be a positive Integer (got #{value.inspect})."
469
+ end
470
+ end
471
+
472
+ def validate_non_negative_integer!(name, value)
473
+ unless value.is_a?(Integer) && value >= 0
474
+ raise ArgumentError,
475
+ "Parse::Embeddings::LocalHTTP: #{name} must be a non-negative Integer (got #{value.inspect})."
476
+ end
477
+ end
478
+
479
+ def user_agent_version
480
+ defined?(Parse::Stack::VERSION) ? Parse::Stack::VERSION : "unknown"
481
+ end
482
+
483
+ def safe_base_host
484
+ uri = URI.parse(@base_url)
485
+ host = uri.host
486
+ host && !host.empty? ? "#{uri.scheme}://#{host}" : nil
487
+ rescue URI::InvalidURIError
488
+ nil
489
+ end
490
+ end
491
+ end
492
+ end