parse-stack-next 4.5.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/release.yml +32 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/.vscode/settings.json +3 -0
- data/CHANGELOG.md +305 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +88 -0
- data/lib/parse/cache/redis.rb +249 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/create_lock.rb +14 -2
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +77 -5
- data/parse-stack.png +0 -0
data/lib/parse/atlas_search.rb
CHANGED
|
@@ -243,6 +243,18 @@ module Parse
|
|
|
243
243
|
# @option options [Hash] :sort sort specification (default: by relevance score)
|
|
244
244
|
# @option options [Boolean] :raw return raw MongoDB documents (default: false)
|
|
245
245
|
# @option options [String] :class_name Parse class name for object conversion
|
|
246
|
+
# @option options [String] :session_token Parse session token used to scope
|
|
247
|
+
# ACL/CLP enforcement to the owning user.
|
|
248
|
+
# @option options [Boolean] :master run with master-key semantics and bypass
|
|
249
|
+
# ACL/CLP enforcement (default: false).
|
|
250
|
+
# @option options [Parse::User, Parse::Pointer] :acl_user act as the given
|
|
251
|
+
# user pointer for ACL evaluation (no REST equivalent; mongo-direct only).
|
|
252
|
+
# @option options [String, Parse::Role] :acl_role act as the given role for
|
|
253
|
+
# ACL evaluation (no REST equivalent; mongo-direct only).
|
|
254
|
+
# @option options [Symbol] :read_preference MongoDB read preference applied
|
|
255
|
+
# to the underlying collection (e.g. +:secondary+).
|
|
256
|
+
# @option options [Integer] :max_time_ms maximum server-side execution time
|
|
257
|
+
# in milliseconds for the aggregate command.
|
|
246
258
|
#
|
|
247
259
|
# @return [Parse::AtlasSearch::SearchResult] search result object
|
|
248
260
|
#
|
|
@@ -393,6 +405,19 @@ module Parse
|
|
|
393
405
|
# @option options [Integer] :limit max suggestions to return (default: 10)
|
|
394
406
|
# @option options [Hash] :filter additional constraints
|
|
395
407
|
# @option options [Boolean] :raw return raw documents (default: false)
|
|
408
|
+
# @option options [String] :class_name Parse class name for object conversion.
|
|
409
|
+
# @option options [String] :session_token Parse session token used to scope
|
|
410
|
+
# ACL/CLP enforcement to the owning user.
|
|
411
|
+
# @option options [Boolean] :master run with master-key semantics and bypass
|
|
412
|
+
# ACL/CLP enforcement (default: false).
|
|
413
|
+
# @option options [Parse::User, Parse::Pointer] :acl_user act as the given
|
|
414
|
+
# user pointer for ACL evaluation (no REST equivalent; mongo-direct only).
|
|
415
|
+
# @option options [String, Parse::Role] :acl_role act as the given role for
|
|
416
|
+
# ACL evaluation (no REST equivalent; mongo-direct only).
|
|
417
|
+
# @option options [Symbol] :read_preference MongoDB read preference applied
|
|
418
|
+
# to the underlying collection (e.g. +:secondary+).
|
|
419
|
+
# @option options [Integer] :max_time_ms maximum server-side execution time
|
|
420
|
+
# in milliseconds for the aggregate command.
|
|
396
421
|
#
|
|
397
422
|
# @return [Parse::AtlasSearch::AutocompleteResult] autocomplete result
|
|
398
423
|
#
|
|
@@ -509,7 +534,14 @@ module Parse
|
|
|
509
534
|
# @param collection_name [String] the Parse collection name
|
|
510
535
|
# @param query [String, nil] the search query text (nil for match-all)
|
|
511
536
|
# @param facets [Hash] facet definitions
|
|
512
|
-
# @param options [Hash] search options (same as #search
|
|
537
|
+
# @param options [Hash] search options (same as {#search}; see that
|
|
538
|
+
# method for the full list of accepted +@option+ entries including
|
|
539
|
+
# +:index+, +:fields+, +:fuzzy+, +:limit+, +:filter+, +:read_preference+,
|
|
540
|
+
# +:max_time_ms+, and the scoping kwargs +:master+, +:session_token+,
|
|
541
|
+
# +:acl_user+, +:acl_role+). Note: scoped identity kwargs require
|
|
542
|
+
# +master: true+ to be passed explicitly — $searchMeta bucket counts
|
|
543
|
+
# cannot be filtered by ACL after the fact, so the method refuses
|
|
544
|
+
# to silently downgrade.
|
|
513
545
|
#
|
|
514
546
|
# @return [Parse::AtlasSearch::FacetedResult] faceted result
|
|
515
547
|
#
|
|
@@ -944,19 +976,15 @@ module Parse
|
|
|
944
976
|
objects = parse_results.each_with_index.map do |doc, idx|
|
|
945
977
|
obj = build_parse_object(doc, class_name)
|
|
946
978
|
raw_doc = raw_results[idx]
|
|
947
|
-
# Attach search metadata from original raw document
|
|
979
|
+
# Attach search metadata from original raw document. `search_score`
|
|
980
|
+
# and `search_highlights` readers are defined once on Parse::Object
|
|
981
|
+
# (see lib/parse/model/object.rb) so we only set the ivars here —
|
|
982
|
+
# no per-row singleton method definition.
|
|
948
983
|
if obj && raw_doc["_score"]
|
|
949
984
|
obj.instance_variable_set(:@_search_score, raw_doc["_score"])
|
|
950
|
-
# Define accessor if not already defined
|
|
951
|
-
unless obj.respond_to?(:search_score)
|
|
952
|
-
obj.define_singleton_method(:search_score) { @_search_score }
|
|
953
|
-
end
|
|
954
985
|
end
|
|
955
986
|
if obj && raw_doc["_highlights"]
|
|
956
987
|
obj.instance_variable_set(:@_search_highlights, raw_doc["_highlights"])
|
|
957
|
-
unless obj.respond_to?(:search_highlights)
|
|
958
|
-
obj.define_singleton_method(:search_highlights) { @_search_highlights }
|
|
959
|
-
end
|
|
960
988
|
end
|
|
961
989
|
obj
|
|
962
990
|
end.compact
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "connection_pool"
|
|
5
|
+
require "moneta"
|
|
6
|
+
|
|
7
|
+
module Parse
|
|
8
|
+
module Cache
|
|
9
|
+
# Moneta-compatible facade over a ConnectionPool of Moneta stores. The
|
|
10
|
+
# Faraday caching middleware only calls four methods on its store
|
|
11
|
+
# (`[]`, `key?`, `delete`, `store`); this class checks out a backend
|
|
12
|
+
# for each of them via `@pool.with`.
|
|
13
|
+
#
|
|
14
|
+
# Why a pool: a single Moneta-Redis store wraps one Redis connection.
|
|
15
|
+
# Under a multi-threaded Puma worker (or any concurrent caller), threads
|
|
16
|
+
# serialize on that connection's mutex. A pool of N stores lets up to N
|
|
17
|
+
# cache calls run in parallel.
|
|
18
|
+
#
|
|
19
|
+
# Note that a cache hit costs two checkouts (`key?` then `[]`). That is
|
|
20
|
+
# accepted to keep behavior identical to a plain Moneta store; callers
|
|
21
|
+
# should size the pool with that in mind (default 5, which matches the
|
|
22
|
+
# Puma default thread count).
|
|
23
|
+
class Pool
|
|
24
|
+
# The wrapped ConnectionPool instance.
|
|
25
|
+
attr_reader :pool
|
|
26
|
+
|
|
27
|
+
# @param size [Integer] number of pooled backend stores.
|
|
28
|
+
# @param timeout [Numeric] seconds to wait for a checkout before
|
|
29
|
+
# raising `ConnectionPool::TimeoutError`.
|
|
30
|
+
# @yield Block invoked to build a single backend store. Must return a
|
|
31
|
+
# Moneta store responding to `[]`, `key?`, `delete`, `store`.
|
|
32
|
+
def initialize(size: 5, timeout: 5, &block)
|
|
33
|
+
raise ArgumentError, "Parse::Cache::Pool requires a block that builds a Moneta store" unless block_given?
|
|
34
|
+
@pool = ConnectionPool.new(size: size, timeout: timeout, &block)
|
|
35
|
+
@closed = false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def [](key)
|
|
39
|
+
@pool.with { |store| store[key] }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def key?(key)
|
|
43
|
+
@pool.with { |store| store.key?(key) }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def delete(key)
|
|
47
|
+
@pool.with { |store| store.delete(key) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def store(key, value, options = {})
|
|
51
|
+
@pool.with { |store| store.store(key, value, options) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Atomic SETNX-style write. Required by `Parse::CreateLock` to acquire
|
|
55
|
+
# cross-process locks against Redis-backed stores. Forwards to the
|
|
56
|
+
# underlying Moneta store's `#create`, which returns `true` only if
|
|
57
|
+
# the key was absent and is now set.
|
|
58
|
+
def create(key, value, options = {})
|
|
59
|
+
@pool.with { |store| store.create(key, value, options) }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Atomic counter increment. Forwarded for parity with Moneta so
|
|
63
|
+
# callers expecting the full Moneta surface (counters, rate limits)
|
|
64
|
+
# work transparently through the pool.
|
|
65
|
+
def increment(key, amount = 1, options = {})
|
|
66
|
+
@pool.with { |store| store.increment(key, amount, options) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Clear the underlying backend. Pooled Moneta stores all point at the
|
|
70
|
+
# same Redis DB, so a single checkout suffices — issuing `clear` on
|
|
71
|
+
# one connection flushes the DB for every connection.
|
|
72
|
+
def clear
|
|
73
|
+
@pool.with { |store| store.clear if store.respond_to?(:clear) }
|
|
74
|
+
self
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Close all pooled backends. Safe to call multiple times — repeat
|
|
78
|
+
# calls are no-ops. `ConnectionPool#shutdown` raises
|
|
79
|
+
# `ConnectionPool::PoolShuttingDownError` on a second invocation,
|
|
80
|
+
# so we gate it with a `@closed` flag.
|
|
81
|
+
def close
|
|
82
|
+
return if @closed
|
|
83
|
+
@closed = true
|
|
84
|
+
@pool.shutdown { |store| store.close if store.respond_to?(:close) }
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "moneta"
|
|
5
|
+
require_relative "pool"
|
|
6
|
+
|
|
7
|
+
module Parse
|
|
8
|
+
module Cache
|
|
9
|
+
# Ergonomic Redis cache builder for Parse Stack. Composes a
|
|
10
|
+
# ConnectionPool of Moneta-Redis stores and carries an optional
|
|
11
|
+
# `namespace` that `Parse::Client` will pick up automatically — there
|
|
12
|
+
# is no need to also pass `cache_namespace:` to `Parse.setup` when
|
|
13
|
+
# using this wrapper.
|
|
14
|
+
#
|
|
15
|
+
# Usage:
|
|
16
|
+
# Parse.setup(
|
|
17
|
+
# cache: Parse::Cache::Redis.new(
|
|
18
|
+
# url: "redis://localhost:6379/0",
|
|
19
|
+
# namespace: "app_x",
|
|
20
|
+
# pool_size: 10,
|
|
21
|
+
# ),
|
|
22
|
+
# expires: 60,
|
|
23
|
+
# ...
|
|
24
|
+
# )
|
|
25
|
+
#
|
|
26
|
+
# The instance is a Moneta-compatible store (it delegates the four
|
|
27
|
+
# methods the Faraday caching middleware uses — `[]`, `key?`,
|
|
28
|
+
# `delete`, `store` — to a pooled backend), so it can be passed
|
|
29
|
+
# directly to `Parse.setup(cache:)` / `Parse::Client.new(cache:)`.
|
|
30
|
+
class Redis
|
|
31
|
+
# @return [String, nil] cache key namespace prefix (or nil if not set).
|
|
32
|
+
attr_reader :namespace
|
|
33
|
+
|
|
34
|
+
# @return [Integer] pool size.
|
|
35
|
+
attr_reader :pool_size
|
|
36
|
+
|
|
37
|
+
# @return [String] Redis connection URL.
|
|
38
|
+
attr_reader :url
|
|
39
|
+
|
|
40
|
+
# @param url [String] Redis URL (e.g. `"redis://localhost:6379/0"`).
|
|
41
|
+
# @param namespace [String, nil] optional key prefix so multiple Parse
|
|
42
|
+
# apps can share one Redis without colliding. When non-nil, the
|
|
43
|
+
# namespace is automatically forwarded to the caching middleware
|
|
44
|
+
# as `cache_namespace:`.
|
|
45
|
+
# @param pool_size [Integer] number of pooled Moneta-Redis stores.
|
|
46
|
+
# Defaults to 5 (the Puma default thread count).
|
|
47
|
+
#
|
|
48
|
+
# **Sizing math (per Faraday request):**
|
|
49
|
+
# - cache hit: `key?` + `[]` = **2 checkouts**
|
|
50
|
+
# - GET miss + successful store: `key?` + 3 variant deletes
|
|
51
|
+
# (anonymous + master-key sibling + final key) + 1 `store` in
|
|
52
|
+
# `on_complete` = **up to 5 checkouts**
|
|
53
|
+
# - non-GET write (POST/PUT/DELETE): 3 variant deletes =
|
|
54
|
+
# **3 checkouts**
|
|
55
|
+
#
|
|
56
|
+
# The worst case (5) is on the write-through-after-miss path, not
|
|
57
|
+
# the hit path. Rule of thumb: start at `pool_size = RAILS_MAX_THREADS`,
|
|
58
|
+
# then bump it up if you observe `ConnectionPool::TimeoutError` in
|
|
59
|
+
# `parse.cache.error` notifications (the middleware swallows that
|
|
60
|
+
# error into a passthrough request rather than raising to the caller).
|
|
61
|
+
# @param pool_timeout [Numeric] seconds to wait for a backend
|
|
62
|
+
# checkout before raising `ConnectionPool::TimeoutError`. Defaults
|
|
63
|
+
# to 5s. The caching middleware catches that error and falls back
|
|
64
|
+
# to a passthrough request rather than raising to the caller.
|
|
65
|
+
# @param moneta_options [Hash] extra options passed through to
|
|
66
|
+
# `Moneta.new(:Redis, ...)` (e.g. `:db`, `:connect_timeout`).
|
|
67
|
+
# `expires: true` is set automatically so per-key TTLs supplied
|
|
68
|
+
# by the caching middleware (the `:expires` Faraday option) are
|
|
69
|
+
# honored by Redis. Pass `expires: false` here to opt out — but
|
|
70
|
+
# note that doing so causes cached responses to live forever,
|
|
71
|
+
# which is rarely what you want for a session-token-scoped
|
|
72
|
+
# response cache.
|
|
73
|
+
def initialize(url:, namespace: nil, pool_size: 5, pool_timeout: 5, **moneta_options)
|
|
74
|
+
@url = url
|
|
75
|
+
@namespace = normalize_namespace(namespace)
|
|
76
|
+
@pool_size = pool_size
|
|
77
|
+
@pool_timeout = pool_timeout
|
|
78
|
+
# Default expires: true so per-call `expires:` (the TTL the
|
|
79
|
+
# Faraday caching middleware passes on store) is honored. The
|
|
80
|
+
# Moneta-Redis adapter ignores per-call expires unless the
|
|
81
|
+
# store was constructed with this flag. Without it, cached
|
|
82
|
+
# session-scoped REST responses outlive their token's
|
|
83
|
+
# validity. Callers can still pass `expires: false` to opt out.
|
|
84
|
+
merged_options = { expires: true }.merge(moneta_options)
|
|
85
|
+
@moneta_options = merged_options
|
|
86
|
+
@closed = false
|
|
87
|
+
@pool = Pool.new(size: pool_size, timeout: pool_timeout) do
|
|
88
|
+
Moneta.new(:Redis, { url: url }.merge(merged_options))
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def [](key)
|
|
93
|
+
@pool[key]
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def key?(key)
|
|
97
|
+
@pool.key?(key)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def delete(key)
|
|
101
|
+
@pool.delete(key)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def store(key, value, options = {})
|
|
105
|
+
@pool.store(key, value, options)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Atomic SETNX. Required so `Parse::CreateLock` can acquire
|
|
109
|
+
# cross-process locks when this wrapper is the configured cache /
|
|
110
|
+
# `synchronize_create_store`. Returns `true` only when the key did
|
|
111
|
+
# not already exist.
|
|
112
|
+
def create(key, value, options = {})
|
|
113
|
+
@pool.create(key, value, options)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Atomic counter increment. Forwarded for Moneta surface parity.
|
|
117
|
+
def increment(key, amount = 1, options = {})
|
|
118
|
+
@pool.increment(key, amount, options)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Clear cached entries belonging to this wrapper. Required for
|
|
122
|
+
# `Parse::Client#clear_cache!` compatibility.
|
|
123
|
+
#
|
|
124
|
+
# **Namespace-scoped when a namespace is set:** the wrapper walks
|
|
125
|
+
# `<namespace>:*` via Redis SCAN and DELs the matching keys,
|
|
126
|
+
# leaving other tenants on the same DB untouched. When no
|
|
127
|
+
# namespace is configured the wrapper falls back to `FLUSHDB` on
|
|
128
|
+
# the backing DB — same blast radius as previous versions, but
|
|
129
|
+
# only for unnamespaced deployments. To opt into the wide
|
|
130
|
+
# FLUSHDB explicitly (e.g. ops tooling), call {#flush_db!}.
|
|
131
|
+
#
|
|
132
|
+
# @param scope [String, nil] explicit namespace prefix to scan-delete.
|
|
133
|
+
# When provided, overrides the wrapper's configured `@namespace` and
|
|
134
|
+
# SCAN-deletes `<scope>:*` regardless of how the wrapper was built.
|
|
135
|
+
# This is the safe escape hatch for tenants that share a non-
|
|
136
|
+
# namespaced wrapper but still want to evict only their own keys
|
|
137
|
+
# without `FLUSHDB`-ing siblings (and without wiping
|
|
138
|
+
# `parse-stack:foc:v1:*` create-lock keys that live on the same DB).
|
|
139
|
+
# The scope must be a non-empty String; the trailing `:` is added
|
|
140
|
+
# automatically and any trailing `:` in the input is stripped so
|
|
141
|
+
# `"tenant_x"` and `"tenant_x:"` are equivalent.
|
|
142
|
+
def clear(scope: nil)
|
|
143
|
+
if scope
|
|
144
|
+
prefix = validate_scope!(scope)
|
|
145
|
+
delete_keys_matching!("#{prefix}:*")
|
|
146
|
+
elsif @namespace
|
|
147
|
+
delete_keys_matching!("#{@namespace}:*")
|
|
148
|
+
else
|
|
149
|
+
@pool.clear
|
|
150
|
+
end
|
|
151
|
+
self
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Issue `FLUSHDB` on the backing Redis DB, regardless of whether a
|
|
155
|
+
# namespace is configured. Evicts every key on the selected DB,
|
|
156
|
+
# including unrelated tenants — use only for ops tooling that
|
|
157
|
+
# owns the whole DB.
|
|
158
|
+
def flush_db!
|
|
159
|
+
@pool.clear
|
|
160
|
+
self
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Close all pooled connections. Safe to call multiple times.
|
|
164
|
+
def close
|
|
165
|
+
return if @closed
|
|
166
|
+
@closed = true
|
|
167
|
+
@pool.close
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
private
|
|
171
|
+
|
|
172
|
+
def delete_keys_matching!(pattern)
|
|
173
|
+
@pool.pool.with do |store|
|
|
174
|
+
redis = backend_client(store)
|
|
175
|
+
# SCAN-DEL loop. `count:` is a hint to the server; the actual
|
|
176
|
+
# batch size returned varies. Loop until the cursor wraps back
|
|
177
|
+
# to "0".
|
|
178
|
+
cursor = "0"
|
|
179
|
+
loop do
|
|
180
|
+
cursor, keys = redis.scan(cursor, match: pattern, count: 1000)
|
|
181
|
+
redis.del(*keys) unless keys.empty?
|
|
182
|
+
break if cursor == "0"
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def backend_client(moneta_store)
|
|
188
|
+
# Walk down the Moneta proxy chain (Expires → Adapter → redis-rb)
|
|
189
|
+
# until we reach an object that quacks like the redis-rb client
|
|
190
|
+
# (i.e. responds to #scan). Moneta wraps the actual adapter when
|
|
191
|
+
# `expires: true` is passed, and the adapter then exposes the
|
|
192
|
+
# underlying redis-rb client via `#backend` (modern releases) or
|
|
193
|
+
# the `@backend` ivar (older releases).
|
|
194
|
+
node = moneta_store
|
|
195
|
+
12.times do
|
|
196
|
+
return node if node.respond_to?(:scan)
|
|
197
|
+
if node.respond_to?(:backend)
|
|
198
|
+
node = node.backend
|
|
199
|
+
elsif node.instance_variable_defined?(:@backend)
|
|
200
|
+
node = node.instance_variable_get(:@backend)
|
|
201
|
+
elsif node.instance_variable_defined?(:@adapter)
|
|
202
|
+
node = node.instance_variable_get(:@adapter)
|
|
203
|
+
else
|
|
204
|
+
break
|
|
205
|
+
end
|
|
206
|
+
break if node.nil?
|
|
207
|
+
end
|
|
208
|
+
node
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def normalize_namespace(ns)
|
|
212
|
+
s = ns.to_s.chomp(":")
|
|
213
|
+
s.empty? ? nil : s
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Validate a caller-supplied `scope:` for `clear(scope:)`. Returns the
|
|
217
|
+
# normalized prefix or raises ArgumentError. We enforce:
|
|
218
|
+
#
|
|
219
|
+
# - must be a String (Symbol / Integer / nil would silently `.to_s`
|
|
220
|
+
# under `normalize_namespace` and expand the deletion target —
|
|
221
|
+
# `scope: 0` would clear `0:*`)
|
|
222
|
+
# - must be non-empty after trimming a trailing `:`
|
|
223
|
+
# - must not contain Redis SCAN glob metacharacters (`*`, `?`, `[`,
|
|
224
|
+
# `]`, `\`) — otherwise `scope: "*"` would SCAN-delete the whole
|
|
225
|
+
# DB, defeating the whole point of having `flush_db!` as the
|
|
226
|
+
# explicit wide-blast-radius escape hatch
|
|
227
|
+
# - must not contain a null byte (defense-in-depth against keys
|
|
228
|
+
# crafted to terminate early in some Redis client paths)
|
|
229
|
+
GLOB_METACHARS = /[\*\?\[\]\\\x00]/.freeze
|
|
230
|
+
private_constant :GLOB_METACHARS
|
|
231
|
+
|
|
232
|
+
def validate_scope!(scope)
|
|
233
|
+
unless scope.is_a?(String)
|
|
234
|
+
raise ArgumentError, "scope: must be a String (got #{scope.class})"
|
|
235
|
+
end
|
|
236
|
+
prefix = scope.chomp(":")
|
|
237
|
+
if prefix.empty?
|
|
238
|
+
raise ArgumentError, "scope: must be a non-empty namespace string"
|
|
239
|
+
end
|
|
240
|
+
if prefix.match?(GLOB_METACHARS)
|
|
241
|
+
raise ArgumentError,
|
|
242
|
+
"scope: must not contain Redis SCAN glob characters (*, ?, [, ], \\, or NUL); " \
|
|
243
|
+
"use flush_db! for a full-DB flush"
|
|
244
|
+
end
|
|
245
|
+
prefix
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
end
|
|
@@ -48,6 +48,26 @@ module Parse
|
|
|
48
48
|
SENSITIVE_FIELDS_SET = SENSITIVE_FIELDS.map(&:downcase).to_set.freeze
|
|
49
49
|
# Placeholder used in place of redacted values.
|
|
50
50
|
REDACTED_PLACEHOLDER = "[FILTERED]"
|
|
51
|
+
# Minimum length at which a numeric-only Array in a logged JSON
|
|
52
|
+
# body is compacted to a single placeholder string instead of
|
|
53
|
+
# printed verbatim. Two concerns drive this:
|
|
54
|
+
#
|
|
55
|
+
# 1. **Noise.** A 1536-float OpenAI embedding inlines as ~25 KB of
|
|
56
|
+
# JSON per logged row. Aggregation pipelines with
|
|
57
|
+
# `$vectorSearch.queryVector` and any save/fetch carrying a
|
|
58
|
+
# `:vector` field would otherwise drown operator logs.
|
|
59
|
+
# 2. **Sensitivity.** Embeddings are reversible-by-similarity:
|
|
60
|
+
# an attacker who scrapes operator logs can reconstruct
|
|
61
|
+
# high-level features of the source text (topic, sentiment,
|
|
62
|
+
# sometimes near-verbatim phrases for short inputs) by
|
|
63
|
+
# nearest-neighbor lookup against a public model.
|
|
64
|
+
#
|
|
65
|
+
# Threshold rationale: 32 is well below every common embedding
|
|
66
|
+
# width (BGE-small 384, Cohere 1024, OpenAI small 1536, OpenAI
|
|
67
|
+
# large 3072) and well above any normal Parse Array property
|
|
68
|
+
# (tags, role lists, etc.). Numeric-only check additionally
|
|
69
|
+
# protects normal long arrays of strings/objects.
|
|
70
|
+
LOG_VECTOR_COMPACT_THRESHOLD = 32
|
|
51
71
|
# Request headers that must never be printed verbatim in debug logs.
|
|
52
72
|
# Matched case-insensitively against Faraday header keys.
|
|
53
73
|
REDACTED_HEADERS = [
|
|
@@ -57,6 +77,31 @@ module Parse
|
|
|
57
77
|
"X-Parse-JavaScript-Key",
|
|
58
78
|
"Authorization",
|
|
59
79
|
"Cookie",
|
|
80
|
+
# Embedding-provider credentials (Parse::Embeddings::OpenAI and
|
|
81
|
+
# forthcoming Cohere/Voyage adapters). These never touch Parse
|
|
82
|
+
# Server itself, but they share the same Faraday log path when a
|
|
83
|
+
# caller mounts the embeddings connection through Parse logging.
|
|
84
|
+
# OpenAI's official auth header is `Authorization: Bearer …`
|
|
85
|
+
# (already covered above); Organization/Project are listed here
|
|
86
|
+
# since they're account-identifying metadata operators may not
|
|
87
|
+
# want to publish. `X-Api-Key` and `Anthropic-Api-Key` are
|
|
88
|
+
# reserved for forthcoming non-OpenAI providers.
|
|
89
|
+
"X-Api-Key",
|
|
90
|
+
"OpenAI-Organization",
|
|
91
|
+
"OpenAI-Project",
|
|
92
|
+
"Anthropic-Api-Key",
|
|
93
|
+
# Cohere, Voyage, Jina, and DashScope (Qwen) use Bearer auth
|
|
94
|
+
# (covered by "Authorization" above), but some operators front
|
|
95
|
+
# them with a proxy that rewrites to a vendor-specific header.
|
|
96
|
+
# These are listed defensively so a future header-form switch
|
|
97
|
+
# doesn't silently leak keys into Faraday logs. `Api-Key` is the
|
|
98
|
+
# bare form some vendor SDKs and proxies use; covered for parity.
|
|
99
|
+
"Cohere-Api-Key",
|
|
100
|
+
"Voyage-Api-Key",
|
|
101
|
+
"Jina-Api-Key",
|
|
102
|
+
"Api-Key",
|
|
103
|
+
"X-DashScope-Api-Key",
|
|
104
|
+
"DashScope-Api-Key",
|
|
60
105
|
].map(&:downcase).freeze
|
|
61
106
|
|
|
62
107
|
class << self
|
|
@@ -91,6 +136,7 @@ module Parse
|
|
|
91
136
|
after_structural = s
|
|
92
137
|
if (parsed = try_parse_json(s))
|
|
93
138
|
scrubbed = scrub_sensitive!(parsed)
|
|
139
|
+
compact_vectors!(scrubbed)
|
|
94
140
|
begin
|
|
95
141
|
after_structural = scrubbed.to_json
|
|
96
142
|
rescue StandardError
|
|
@@ -160,12 +206,60 @@ module Parse
|
|
|
160
206
|
node
|
|
161
207
|
end
|
|
162
208
|
|
|
209
|
+
# @!visibility private
|
|
210
|
+
# Recursively walk a parsed JSON structure replacing any
|
|
211
|
+
# numeric-only Array of length >= +LOG_VECTOR_COMPACT_THRESHOLD+
|
|
212
|
+
# with a compact placeholder string ("<vector dims=N>"). Mutates
|
|
213
|
+
# Hashes/Arrays in place; returns the node for chaining. Distinct
|
|
214
|
+
# pass from {scrub_sensitive!} because the criterion is shape
|
|
215
|
+
# (numeric array width), not key name.
|
|
216
|
+
#
|
|
217
|
+
# The walker does NOT descend into the replaced array — once a
|
|
218
|
+
# node is recognised as a vector its inner Numerics aren't of
|
|
219
|
+
# interest. Nested vectors (Array<Array<Numeric>>, e.g. a batched
|
|
220
|
+
# embedding response in a logged HTTP body) are caught at the
|
|
221
|
+
# inner array level on the next recursion.
|
|
222
|
+
def self.compact_vectors!(node)
|
|
223
|
+
case node
|
|
224
|
+
when Hash
|
|
225
|
+
node.each do |key, value|
|
|
226
|
+
if vector_shape?(value)
|
|
227
|
+
node[key] = "<vector dims=#{value.length}>"
|
|
228
|
+
elsif value.is_a?(Hash) || value.is_a?(Array)
|
|
229
|
+
compact_vectors!(value)
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
when Array
|
|
233
|
+
node.each_with_index do |item, i|
|
|
234
|
+
if vector_shape?(item)
|
|
235
|
+
node[i] = "<vector dims=#{item.length}>"
|
|
236
|
+
elsif item.is_a?(Hash) || item.is_a?(Array)
|
|
237
|
+
compact_vectors!(item)
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
node
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# @!visibility private
|
|
245
|
+
# An Array is "vector-shaped" if it meets the compaction threshold
|
|
246
|
+
# AND every element is Numeric. The numeric check prevents long
|
|
247
|
+
# tag arrays / role lists / mixed-type arrays from being mangled.
|
|
248
|
+
# Boolean is not Numeric in Ruby, so an array of booleans (rare
|
|
249
|
+
# but possible) is left alone — also fine.
|
|
250
|
+
def self.vector_shape?(val)
|
|
251
|
+
return false unless val.is_a?(Array)
|
|
252
|
+
return false if val.length < LOG_VECTOR_COMPACT_THRESHOLD
|
|
253
|
+
val.all? { |x| x.is_a?(Numeric) }
|
|
254
|
+
end
|
|
255
|
+
|
|
163
256
|
# @!visibility private
|
|
164
257
|
# If +str+ parses as JSON (object or array), scrub structurally and
|
|
165
258
|
# re-encode. Otherwise return the original string unchanged.
|
|
166
259
|
def self.maybe_scrub_embedded_json(str)
|
|
167
260
|
return str unless (inner = try_parse_json(str))
|
|
168
261
|
scrub_sensitive!(inner)
|
|
262
|
+
compact_vectors!(inner)
|
|
169
263
|
begin
|
|
170
264
|
inner.to_json
|
|
171
265
|
rescue StandardError
|