parse-stack-next 4.5.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/CHANGELOG.md +295 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +73 -0
- data/lib/parse/cache/redis.rb +190 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +75 -5
- data/parse-stack.png +0 -0
data/lib/parse/atlas_search.rb
CHANGED
|
@@ -243,6 +243,18 @@ module Parse
|
|
|
243
243
|
# @option options [Hash] :sort sort specification (default: by relevance score)
|
|
244
244
|
# @option options [Boolean] :raw return raw MongoDB documents (default: false)
|
|
245
245
|
# @option options [String] :class_name Parse class name for object conversion
|
|
246
|
+
# @option options [String] :session_token Parse session token used to scope
|
|
247
|
+
# ACL/CLP enforcement to the owning user.
|
|
248
|
+
# @option options [Boolean] :master run with master-key semantics and bypass
|
|
249
|
+
# ACL/CLP enforcement (default: false).
|
|
250
|
+
# @option options [Parse::User, Parse::Pointer] :acl_user act as the given
|
|
251
|
+
# user pointer for ACL evaluation (no REST equivalent; mongo-direct only).
|
|
252
|
+
# @option options [String, Parse::Role] :acl_role act as the given role for
|
|
253
|
+
# ACL evaluation (no REST equivalent; mongo-direct only).
|
|
254
|
+
# @option options [Symbol] :read_preference MongoDB read preference applied
|
|
255
|
+
# to the underlying collection (e.g. +:secondary+).
|
|
256
|
+
# @option options [Integer] :max_time_ms maximum server-side execution time
|
|
257
|
+
# in milliseconds for the aggregate command.
|
|
246
258
|
#
|
|
247
259
|
# @return [Parse::AtlasSearch::SearchResult] search result object
|
|
248
260
|
#
|
|
@@ -393,6 +405,19 @@ module Parse
|
|
|
393
405
|
# @option options [Integer] :limit max suggestions to return (default: 10)
|
|
394
406
|
# @option options [Hash] :filter additional constraints
|
|
395
407
|
# @option options [Boolean] :raw return raw documents (default: false)
|
|
408
|
+
# @option options [String] :class_name Parse class name for object conversion.
|
|
409
|
+
# @option options [String] :session_token Parse session token used to scope
|
|
410
|
+
# ACL/CLP enforcement to the owning user.
|
|
411
|
+
# @option options [Boolean] :master run with master-key semantics and bypass
|
|
412
|
+
# ACL/CLP enforcement (default: false).
|
|
413
|
+
# @option options [Parse::User, Parse::Pointer] :acl_user act as the given
|
|
414
|
+
# user pointer for ACL evaluation (no REST equivalent; mongo-direct only).
|
|
415
|
+
# @option options [String, Parse::Role] :acl_role act as the given role for
|
|
416
|
+
# ACL evaluation (no REST equivalent; mongo-direct only).
|
|
417
|
+
# @option options [Symbol] :read_preference MongoDB read preference applied
|
|
418
|
+
# to the underlying collection (e.g. +:secondary+).
|
|
419
|
+
# @option options [Integer] :max_time_ms maximum server-side execution time
|
|
420
|
+
# in milliseconds for the aggregate command.
|
|
396
421
|
#
|
|
397
422
|
# @return [Parse::AtlasSearch::AutocompleteResult] autocomplete result
|
|
398
423
|
#
|
|
@@ -509,7 +534,14 @@ module Parse
|
|
|
509
534
|
# @param collection_name [String] the Parse collection name
|
|
510
535
|
# @param query [String, nil] the search query text (nil for match-all)
|
|
511
536
|
# @param facets [Hash] facet definitions
|
|
512
|
-
# @param options [Hash] search options (same as #search
|
|
537
|
+
# @param options [Hash] search options (same as {#search}; see that
|
|
538
|
+
# method for the full list of accepted +@option+ entries including
|
|
539
|
+
# +:index+, +:fields+, +:fuzzy+, +:limit+, +:filter+, +:read_preference+,
|
|
540
|
+
# +:max_time_ms+, and the scoping kwargs +:master+, +:session_token+,
|
|
541
|
+
# +:acl_user+, +:acl_role+). Note: scoped identity kwargs require
|
|
542
|
+
# +master: true+ to be passed explicitly — $searchMeta bucket counts
|
|
543
|
+
# cannot be filtered by ACL after the fact, so the method refuses
|
|
544
|
+
# to silently downgrade.
|
|
513
545
|
#
|
|
514
546
|
# @return [Parse::AtlasSearch::FacetedResult] faceted result
|
|
515
547
|
#
|
|
@@ -944,19 +976,15 @@ module Parse
|
|
|
944
976
|
objects = parse_results.each_with_index.map do |doc, idx|
|
|
945
977
|
obj = build_parse_object(doc, class_name)
|
|
946
978
|
raw_doc = raw_results[idx]
|
|
947
|
-
# Attach search metadata from original raw document
|
|
979
|
+
# Attach search metadata from original raw document. `search_score`
|
|
980
|
+
# and `search_highlights` readers are defined once on Parse::Object
|
|
981
|
+
# (see lib/parse/model/object.rb) so we only set the ivars here —
|
|
982
|
+
# no per-row singleton method definition.
|
|
948
983
|
if obj && raw_doc["_score"]
|
|
949
984
|
obj.instance_variable_set(:@_search_score, raw_doc["_score"])
|
|
950
|
-
# Define accessor if not already defined
|
|
951
|
-
unless obj.respond_to?(:search_score)
|
|
952
|
-
obj.define_singleton_method(:search_score) { @_search_score }
|
|
953
|
-
end
|
|
954
985
|
end
|
|
955
986
|
if obj && raw_doc["_highlights"]
|
|
956
987
|
obj.instance_variable_set(:@_search_highlights, raw_doc["_highlights"])
|
|
957
|
-
unless obj.respond_to?(:search_highlights)
|
|
958
|
-
obj.define_singleton_method(:search_highlights) { @_search_highlights }
|
|
959
|
-
end
|
|
960
988
|
end
|
|
961
989
|
obj
|
|
962
990
|
end.compact
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "connection_pool"
|
|
5
|
+
require "moneta"
|
|
6
|
+
|
|
7
|
+
module Parse
|
|
8
|
+
module Cache
|
|
9
|
+
# Moneta-compatible facade over a ConnectionPool of Moneta stores. The
|
|
10
|
+
# Faraday caching middleware only calls four methods on its store
|
|
11
|
+
# (`[]`, `key?`, `delete`, `store`); this class checks out a backend
|
|
12
|
+
# for each of them via `@pool.with`.
|
|
13
|
+
#
|
|
14
|
+
# Why a pool: a single Moneta-Redis store wraps one Redis connection.
|
|
15
|
+
# Under a multi-threaded Puma worker (or any concurrent caller), threads
|
|
16
|
+
# serialize on that connection's mutex. A pool of N stores lets up to N
|
|
17
|
+
# cache calls run in parallel.
|
|
18
|
+
#
|
|
19
|
+
# Note that a cache hit costs two checkouts (`key?` then `[]`). That is
|
|
20
|
+
# accepted to keep behavior identical to a plain Moneta store; callers
|
|
21
|
+
# should size the pool with that in mind (default 5, which matches the
|
|
22
|
+
# Puma default thread count).
|
|
23
|
+
class Pool
|
|
24
|
+
# The wrapped ConnectionPool instance.
|
|
25
|
+
attr_reader :pool
|
|
26
|
+
|
|
27
|
+
# @param size [Integer] number of pooled backend stores.
|
|
28
|
+
# @param timeout [Numeric] seconds to wait for a checkout before
|
|
29
|
+
# raising `ConnectionPool::TimeoutError`.
|
|
30
|
+
# @yield Block invoked to build a single backend store. Must return a
|
|
31
|
+
# Moneta store responding to `[]`, `key?`, `delete`, `store`.
|
|
32
|
+
def initialize(size: 5, timeout: 5, &block)
|
|
33
|
+
raise ArgumentError, "Parse::Cache::Pool requires a block that builds a Moneta store" unless block_given?
|
|
34
|
+
@pool = ConnectionPool.new(size: size, timeout: timeout, &block)
|
|
35
|
+
@closed = false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def [](key)
|
|
39
|
+
@pool.with { |store| store[key] }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def key?(key)
|
|
43
|
+
@pool.with { |store| store.key?(key) }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def delete(key)
|
|
47
|
+
@pool.with { |store| store.delete(key) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def store(key, value, options = {})
|
|
51
|
+
@pool.with { |store| store.store(key, value, options) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Clear the underlying backend. Pooled Moneta stores all point at the
|
|
55
|
+
# same Redis DB, so a single checkout suffices — issuing `clear` on
|
|
56
|
+
# one connection flushes the DB for every connection.
|
|
57
|
+
def clear
|
|
58
|
+
@pool.with { |store| store.clear if store.respond_to?(:clear) }
|
|
59
|
+
self
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Close all pooled backends. Safe to call multiple times — repeat
|
|
63
|
+
# calls are no-ops. `ConnectionPool#shutdown` raises
|
|
64
|
+
# `ConnectionPool::PoolShuttingDownError` on a second invocation,
|
|
65
|
+
# so we gate it with a `@closed` flag.
|
|
66
|
+
def close
|
|
67
|
+
return if @closed
|
|
68
|
+
@closed = true
|
|
69
|
+
@pool.shutdown { |store| store.close if store.respond_to?(:close) }
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "moneta"
|
|
5
|
+
require_relative "pool"
|
|
6
|
+
|
|
7
|
+
module Parse
|
|
8
|
+
module Cache
|
|
9
|
+
# Ergonomic Redis cache builder for Parse Stack. Composes a
|
|
10
|
+
# ConnectionPool of Moneta-Redis stores and carries an optional
|
|
11
|
+
# `namespace` that `Parse::Client` will pick up automatically — there
|
|
12
|
+
# is no need to also pass `cache_namespace:` to `Parse.setup` when
|
|
13
|
+
# using this wrapper.
|
|
14
|
+
#
|
|
15
|
+
# Usage:
|
|
16
|
+
# Parse.setup(
|
|
17
|
+
# cache: Parse::Cache::Redis.new(
|
|
18
|
+
# url: "redis://localhost:6379/0",
|
|
19
|
+
# namespace: "app_x",
|
|
20
|
+
# pool_size: 10,
|
|
21
|
+
# ),
|
|
22
|
+
# expires: 60,
|
|
23
|
+
# ...
|
|
24
|
+
# )
|
|
25
|
+
#
|
|
26
|
+
# The instance is a Moneta-compatible store (it delegates the four
|
|
27
|
+
# methods the Faraday caching middleware uses — `[]`, `key?`,
|
|
28
|
+
# `delete`, `store` — to a pooled backend), so it can be passed
|
|
29
|
+
# directly to `Parse.setup(cache:)` / `Parse::Client.new(cache:)`.
|
|
30
|
+
class Redis
|
|
31
|
+
# @return [String, nil] cache key namespace prefix (or nil if not set).
|
|
32
|
+
attr_reader :namespace
|
|
33
|
+
|
|
34
|
+
# @return [Integer] pool size.
|
|
35
|
+
attr_reader :pool_size
|
|
36
|
+
|
|
37
|
+
# @return [String] Redis connection URL.
|
|
38
|
+
attr_reader :url
|
|
39
|
+
|
|
40
|
+
# @param url [String] Redis URL (e.g. `"redis://localhost:6379/0"`).
|
|
41
|
+
# @param namespace [String, nil] optional key prefix so multiple Parse
|
|
42
|
+
# apps can share one Redis without colliding. When non-nil, the
|
|
43
|
+
# namespace is automatically forwarded to the caching middleware
|
|
44
|
+
# as `cache_namespace:`.
|
|
45
|
+
# @param pool_size [Integer] number of pooled Moneta-Redis stores.
|
|
46
|
+
# Defaults to 5 (the Puma default thread count).
|
|
47
|
+
#
|
|
48
|
+
# **Sizing math (per Faraday request):**
|
|
49
|
+
# - cache hit: `key?` + `[]` = **2 checkouts**
|
|
50
|
+
# - GET miss + successful store: `key?` + 3 variant deletes
|
|
51
|
+
# (anonymous + master-key sibling + final key) + 1 `store` in
|
|
52
|
+
# `on_complete` = **up to 5 checkouts**
|
|
53
|
+
# - non-GET write (POST/PUT/DELETE): 3 variant deletes =
|
|
54
|
+
# **3 checkouts**
|
|
55
|
+
#
|
|
56
|
+
# The worst case (5) is on the write-through-after-miss path, not
|
|
57
|
+
# the hit path. Rule of thumb: start at `pool_size = RAILS_MAX_THREADS`,
|
|
58
|
+
# then bump it up if you observe `ConnectionPool::TimeoutError` in
|
|
59
|
+
# `parse.cache.error` notifications (the middleware swallows that
|
|
60
|
+
# error into a passthrough request rather than raising to the caller).
|
|
61
|
+
# @param pool_timeout [Numeric] seconds to wait for a backend
|
|
62
|
+
# checkout before raising `ConnectionPool::TimeoutError`. Defaults
|
|
63
|
+
# to 5s. The caching middleware catches that error and falls back
|
|
64
|
+
# to a passthrough request rather than raising to the caller.
|
|
65
|
+
# @param moneta_options [Hash] extra options passed through to
|
|
66
|
+
# `Moneta.new(:Redis, ...)` (e.g. `:db`, `:connect_timeout`).
|
|
67
|
+
# `expires: true` is set automatically so per-key TTLs supplied
|
|
68
|
+
# by the caching middleware (the `:expires` Faraday option) are
|
|
69
|
+
# honored by Redis. Pass `expires: false` here to opt out — but
|
|
70
|
+
# note that doing so causes cached responses to live forever,
|
|
71
|
+
# which is rarely what you want for a session-token-scoped
|
|
72
|
+
# response cache.
|
|
73
|
+
def initialize(url:, namespace: nil, pool_size: 5, pool_timeout: 5, **moneta_options)
|
|
74
|
+
@url = url
|
|
75
|
+
@namespace = normalize_namespace(namespace)
|
|
76
|
+
@pool_size = pool_size
|
|
77
|
+
@pool_timeout = pool_timeout
|
|
78
|
+
# Default expires: true so per-call `expires:` (the TTL the
|
|
79
|
+
# Faraday caching middleware passes on store) is honored. The
|
|
80
|
+
# Moneta-Redis adapter ignores per-call expires unless the
|
|
81
|
+
# store was constructed with this flag. Without it, cached
|
|
82
|
+
# session-scoped REST responses outlive their token's
|
|
83
|
+
# validity. Callers can still pass `expires: false` to opt out.
|
|
84
|
+
merged_options = { expires: true }.merge(moneta_options)
|
|
85
|
+
@moneta_options = merged_options
|
|
86
|
+
@closed = false
|
|
87
|
+
@pool = Pool.new(size: pool_size, timeout: pool_timeout) do
|
|
88
|
+
Moneta.new(:Redis, { url: url }.merge(merged_options))
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def [](key)
|
|
93
|
+
@pool[key]
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def key?(key)
|
|
97
|
+
@pool.key?(key)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def delete(key)
|
|
101
|
+
@pool.delete(key)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def store(key, value, options = {})
|
|
105
|
+
@pool.store(key, value, options)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Clear cached entries belonging to this wrapper. Required for
|
|
109
|
+
# `Parse::Client#clear_cache!` compatibility.
|
|
110
|
+
#
|
|
111
|
+
# **Namespace-scoped when a namespace is set:** the wrapper walks
|
|
112
|
+
# `<namespace>:*` via Redis SCAN and DELs the matching keys,
|
|
113
|
+
# leaving other tenants on the same DB untouched. When no
|
|
114
|
+
# namespace is configured the wrapper falls back to `FLUSHDB` on
|
|
115
|
+
# the backing DB — same blast radius as previous versions, but
|
|
116
|
+
# only for unnamespaced deployments. To opt into the wide
|
|
117
|
+
# FLUSHDB explicitly (e.g. ops tooling), call {#flush_db!}.
|
|
118
|
+
def clear
|
|
119
|
+
if @namespace
|
|
120
|
+
delete_keys_matching!("#{@namespace}:*")
|
|
121
|
+
else
|
|
122
|
+
@pool.clear
|
|
123
|
+
end
|
|
124
|
+
self
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Issue `FLUSHDB` on the backing Redis DB, regardless of whether a
|
|
128
|
+
# namespace is configured. Evicts every key on the selected DB,
|
|
129
|
+
# including unrelated tenants — use only for ops tooling that
|
|
130
|
+
# owns the whole DB.
|
|
131
|
+
def flush_db!
|
|
132
|
+
@pool.clear
|
|
133
|
+
self
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Close all pooled connections. Safe to call multiple times.
|
|
137
|
+
def close
|
|
138
|
+
return if @closed
|
|
139
|
+
@closed = true
|
|
140
|
+
@pool.close
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
private
|
|
144
|
+
|
|
145
|
+
def delete_keys_matching!(pattern)
|
|
146
|
+
@pool.pool.with do |store|
|
|
147
|
+
redis = backend_client(store)
|
|
148
|
+
# SCAN-DEL loop. `count:` is a hint to the server; the actual
|
|
149
|
+
# batch size returned varies. Loop until the cursor wraps back
|
|
150
|
+
# to "0".
|
|
151
|
+
cursor = "0"
|
|
152
|
+
loop do
|
|
153
|
+
cursor, keys = redis.scan(cursor, match: pattern, count: 1000)
|
|
154
|
+
redis.del(*keys) unless keys.empty?
|
|
155
|
+
break if cursor == "0"
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def backend_client(moneta_store)
|
|
161
|
+
# Walk down the Moneta proxy chain (Expires → Adapter → redis-rb)
|
|
162
|
+
# until we reach an object that quacks like the redis-rb client
|
|
163
|
+
# (i.e. responds to #scan). Moneta wraps the actual adapter when
|
|
164
|
+
# `expires: true` is passed, and the adapter then exposes the
|
|
165
|
+
# underlying redis-rb client via `#backend` (modern releases) or
|
|
166
|
+
# the `@backend` ivar (older releases).
|
|
167
|
+
node = moneta_store
|
|
168
|
+
12.times do
|
|
169
|
+
return node if node.respond_to?(:scan)
|
|
170
|
+
if node.respond_to?(:backend)
|
|
171
|
+
node = node.backend
|
|
172
|
+
elsif node.instance_variable_defined?(:@backend)
|
|
173
|
+
node = node.instance_variable_get(:@backend)
|
|
174
|
+
elsif node.instance_variable_defined?(:@adapter)
|
|
175
|
+
node = node.instance_variable_get(:@adapter)
|
|
176
|
+
else
|
|
177
|
+
break
|
|
178
|
+
end
|
|
179
|
+
break if node.nil?
|
|
180
|
+
end
|
|
181
|
+
node
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def normalize_namespace(ns)
|
|
185
|
+
s = ns.to_s.chomp(":")
|
|
186
|
+
s.empty? ? nil : s
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
@@ -48,6 +48,26 @@ module Parse
|
|
|
48
48
|
SENSITIVE_FIELDS_SET = SENSITIVE_FIELDS.map(&:downcase).to_set.freeze
|
|
49
49
|
# Placeholder used in place of redacted values.
|
|
50
50
|
REDACTED_PLACEHOLDER = "[FILTERED]"
|
|
51
|
+
# Minimum length at which a numeric-only Array in a logged JSON
|
|
52
|
+
# body is compacted to a single placeholder string instead of
|
|
53
|
+
# printed verbatim. Two concerns drive this:
|
|
54
|
+
#
|
|
55
|
+
# 1. **Noise.** A 1536-float OpenAI embedding inlines as ~25 KB of
|
|
56
|
+
# JSON per logged row. Aggregation pipelines with
|
|
57
|
+
# `$vectorSearch.queryVector` and any save/fetch carrying a
|
|
58
|
+
# `:vector` field would otherwise drown operator logs.
|
|
59
|
+
# 2. **Sensitivity.** Embeddings are reversible-by-similarity:
|
|
60
|
+
# an attacker who scrapes operator logs can reconstruct
|
|
61
|
+
# high-level features of the source text (topic, sentiment,
|
|
62
|
+
# sometimes near-verbatim phrases for short inputs) by
|
|
63
|
+
# nearest-neighbor lookup against a public model.
|
|
64
|
+
#
|
|
65
|
+
# Threshold rationale: 32 is well below every common embedding
|
|
66
|
+
# width (BGE-small 384, Cohere 1024, OpenAI small 1536, OpenAI
|
|
67
|
+
# large 3072) and well above any normal Parse Array property
|
|
68
|
+
# (tags, role lists, etc.). Numeric-only check additionally
|
|
69
|
+
# protects normal long arrays of strings/objects.
|
|
70
|
+
LOG_VECTOR_COMPACT_THRESHOLD = 32
|
|
51
71
|
# Request headers that must never be printed verbatim in debug logs.
|
|
52
72
|
# Matched case-insensitively against Faraday header keys.
|
|
53
73
|
REDACTED_HEADERS = [
|
|
@@ -57,6 +77,31 @@ module Parse
|
|
|
57
77
|
"X-Parse-JavaScript-Key",
|
|
58
78
|
"Authorization",
|
|
59
79
|
"Cookie",
|
|
80
|
+
# Embedding-provider credentials (Parse::Embeddings::OpenAI and
|
|
81
|
+
# forthcoming Cohere/Voyage adapters). These never touch Parse
|
|
82
|
+
# Server itself, but they share the same Faraday log path when a
|
|
83
|
+
# caller mounts the embeddings connection through Parse logging.
|
|
84
|
+
# OpenAI's official auth header is `Authorization: Bearer …`
|
|
85
|
+
# (already covered above); Organization/Project are listed here
|
|
86
|
+
# since they're account-identifying metadata operators may not
|
|
87
|
+
# want to publish. `X-Api-Key` and `Anthropic-Api-Key` are
|
|
88
|
+
# reserved for forthcoming non-OpenAI providers.
|
|
89
|
+
"X-Api-Key",
|
|
90
|
+
"OpenAI-Organization",
|
|
91
|
+
"OpenAI-Project",
|
|
92
|
+
"Anthropic-Api-Key",
|
|
93
|
+
# Cohere, Voyage, Jina, and DashScope (Qwen) use Bearer auth
|
|
94
|
+
# (covered by "Authorization" above), but some operators front
|
|
95
|
+
# them with a proxy that rewrites to a vendor-specific header.
|
|
96
|
+
# These are listed defensively so a future header-form switch
|
|
97
|
+
# doesn't silently leak keys into Faraday logs. `Api-Key` is the
|
|
98
|
+
# bare form some vendor SDKs and proxies use; covered for parity.
|
|
99
|
+
"Cohere-Api-Key",
|
|
100
|
+
"Voyage-Api-Key",
|
|
101
|
+
"Jina-Api-Key",
|
|
102
|
+
"Api-Key",
|
|
103
|
+
"X-DashScope-Api-Key",
|
|
104
|
+
"DashScope-Api-Key",
|
|
60
105
|
].map(&:downcase).freeze
|
|
61
106
|
|
|
62
107
|
class << self
|
|
@@ -91,6 +136,7 @@ module Parse
|
|
|
91
136
|
after_structural = s
|
|
92
137
|
if (parsed = try_parse_json(s))
|
|
93
138
|
scrubbed = scrub_sensitive!(parsed)
|
|
139
|
+
compact_vectors!(scrubbed)
|
|
94
140
|
begin
|
|
95
141
|
after_structural = scrubbed.to_json
|
|
96
142
|
rescue StandardError
|
|
@@ -160,12 +206,60 @@ module Parse
|
|
|
160
206
|
node
|
|
161
207
|
end
|
|
162
208
|
|
|
209
|
+
# @!visibility private
|
|
210
|
+
# Recursively walk a parsed JSON structure replacing any
|
|
211
|
+
# numeric-only Array of length >= +LOG_VECTOR_COMPACT_THRESHOLD+
|
|
212
|
+
# with a compact placeholder string ("<vector dims=N>"). Mutates
|
|
213
|
+
# Hashes/Arrays in place; returns the node for chaining. Distinct
|
|
214
|
+
# pass from {scrub_sensitive!} because the criterion is shape
|
|
215
|
+
# (numeric array width), not key name.
|
|
216
|
+
#
|
|
217
|
+
# The walker does NOT descend into the replaced array — once a
|
|
218
|
+
# node is recognised as a vector its inner Numerics aren't of
|
|
219
|
+
# interest. Nested vectors (Array<Array<Numeric>>, e.g. a batched
|
|
220
|
+
# embedding response in a logged HTTP body) are caught at the
|
|
221
|
+
# inner array level on the next recursion.
|
|
222
|
+
def self.compact_vectors!(node)
|
|
223
|
+
case node
|
|
224
|
+
when Hash
|
|
225
|
+
node.each do |key, value|
|
|
226
|
+
if vector_shape?(value)
|
|
227
|
+
node[key] = "<vector dims=#{value.length}>"
|
|
228
|
+
elsif value.is_a?(Hash) || value.is_a?(Array)
|
|
229
|
+
compact_vectors!(value)
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
when Array
|
|
233
|
+
node.each_with_index do |item, i|
|
|
234
|
+
if vector_shape?(item)
|
|
235
|
+
node[i] = "<vector dims=#{item.length}>"
|
|
236
|
+
elsif item.is_a?(Hash) || item.is_a?(Array)
|
|
237
|
+
compact_vectors!(item)
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
node
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# @!visibility private
|
|
245
|
+
# An Array is "vector-shaped" if it meets the compaction threshold
|
|
246
|
+
# AND every element is Numeric. The numeric check prevents long
|
|
247
|
+
# tag arrays / role lists / mixed-type arrays from being mangled.
|
|
248
|
+
# Boolean is not Numeric in Ruby, so an array of booleans (rare
|
|
249
|
+
# but possible) is left alone — also fine.
|
|
250
|
+
def self.vector_shape?(val)
|
|
251
|
+
return false unless val.is_a?(Array)
|
|
252
|
+
return false if val.length < LOG_VECTOR_COMPACT_THRESHOLD
|
|
253
|
+
val.all? { |x| x.is_a?(Numeric) }
|
|
254
|
+
end
|
|
255
|
+
|
|
163
256
|
# @!visibility private
|
|
164
257
|
# If +str+ parses as JSON (object or array), scrub structurally and
|
|
165
258
|
# re-encode. Otherwise return the original string unchanged.
|
|
166
259
|
def self.maybe_scrub_embedded_json(str)
|
|
167
260
|
return str unless (inner = try_parse_json(str))
|
|
168
261
|
scrub_sensitive!(inner)
|
|
262
|
+
compact_vectors!(inner)
|
|
169
263
|
begin
|
|
170
264
|
inner.to_json
|
|
171
265
|
rescue StandardError
|
data/lib/parse/client/caching.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
require "faraday"
|
|
5
5
|
require "moneta"
|
|
6
|
+
require "connection_pool"
|
|
6
7
|
require "digest"
|
|
7
8
|
require_relative "protocol"
|
|
8
9
|
|
|
@@ -81,6 +82,14 @@ module Parse
|
|
|
81
82
|
@opts = { expires: 0 }
|
|
82
83
|
@opts.merge!(opts) if opts.is_a?(Hash)
|
|
83
84
|
@expires = @opts[:expires]
|
|
85
|
+
# Optional cache key namespace so two Parse apps sharing one Redis don't
|
|
86
|
+
# collide (e.g. `mk:/classes/Song/abc` is the same path for both apps).
|
|
87
|
+
# When set, keys become `<namespace>:<existing-prefix>:<url>`. Empty
|
|
88
|
+
# string is treated as nil. Trailing `:` is stripped once so users can
|
|
89
|
+
# pass either `"app_x"` or `"app_x:"`.
|
|
90
|
+
ns = @opts[:namespace].to_s
|
|
91
|
+
ns = ns.chomp(":")
|
|
92
|
+
@namespace = ns.empty? ? nil : ns
|
|
84
93
|
|
|
85
94
|
unless [:key?, :[], :delete, :store].all? { |method| @store.respond_to?(method) }
|
|
86
95
|
raise ArgumentError, "Caching store object must a Moneta key/value store."
|
|
@@ -134,21 +143,36 @@ module Parse
|
|
|
134
143
|
@cache_key = "mk:#{@cache_key}" # prefix for master key requests
|
|
135
144
|
end
|
|
136
145
|
|
|
146
|
+
# Namespace outermost so a SCAN over `<namespace>:*` evicts a whole
|
|
147
|
+
# tenant/app cleanly without touching another app's entries.
|
|
148
|
+
@cache_key = "#{@namespace}:#{@cache_key}" if @namespace
|
|
149
|
+
|
|
150
|
+
url_path = url.path
|
|
151
|
+
|
|
137
152
|
begin
|
|
138
153
|
# Skip cache read if write_only mode is enabled
|
|
139
154
|
if method == :get && @cache_key.present? && !@write_only && @store.key?(@cache_key)
|
|
140
|
-
|
|
155
|
+
# Debug-log the URL **path only** — `url.to_s` would include the
|
|
156
|
+
# query string, which Parse encodes JSON `where=` into and may
|
|
157
|
+
# contain PII. Same redaction discipline as the AS::N payload.
|
|
158
|
+
puts("[Parse::Cache] Hit >> #{url_path}") if self.class.logging.present?
|
|
141
159
|
response = Faraday::Response.new
|
|
142
160
|
begin
|
|
143
161
|
cache_data = @store[@cache_key] # previous cached response
|
|
144
162
|
rescue => e
|
|
145
|
-
|
|
163
|
+
# Log only the class name — some Moneta/Redis drivers echo the
|
|
164
|
+
# offending key in `e.message`, and our key contains a hashed
|
|
165
|
+
# session-token prefix that we treat as side-channel material.
|
|
166
|
+
puts "[Parse::Cache] Error: #{e.class.name}"
|
|
167
|
+
instrument_cache(:error, method: method, url_path: url_path, error: e.class.name)
|
|
146
168
|
cache_data = nil
|
|
147
169
|
end
|
|
148
170
|
|
|
149
171
|
# check if the store was from a legacy parse-stack cache value which
|
|
150
172
|
# is stored as Faraday::Env. T\he new system stores less content in a simple hash
|
|
151
173
|
# for improved interoperability and access time.
|
|
174
|
+
body = nil
|
|
175
|
+
response_headers = nil
|
|
152
176
|
if cache_data.is_a?(Faraday::Env)
|
|
153
177
|
body = cache_data.respond_to?(:body) ? cache_data.body : nil
|
|
154
178
|
response_headers = cache_data.response_headers || {}
|
|
@@ -160,24 +184,43 @@ module Parse
|
|
|
160
184
|
if cache_data.present? && body.present?
|
|
161
185
|
response_headers[CACHE_RESPONSE_HEADER] = "true"
|
|
162
186
|
response.finish({ status: 200, response_headers: response_headers, body: body })
|
|
187
|
+
instrument_cache(:hit, method: method, url_path: url_path)
|
|
163
188
|
return response
|
|
164
189
|
else
|
|
165
|
-
|
|
190
|
+
delete_cache_variants(url)
|
|
191
|
+
instrument_cache(:miss, method: method, url_path: url_path, reason: :empty_payload)
|
|
166
192
|
end
|
|
193
|
+
elsif method == :get && @cache_key.present? && !@write_only
|
|
194
|
+
# GET miss: opportunistically clear any sibling variants of the
|
|
195
|
+
# current namespace (anonymous `<url>` and master-key `mk:<url>`
|
|
196
|
+
# under the same namespace) so a stale variant from a prior
|
|
197
|
+
# request flavor doesn't linger until TTL.
|
|
198
|
+
#
|
|
199
|
+
# When @namespace is set we deliberately do NOT touch the bare
|
|
200
|
+
# un-namespaced `<url>` / `mk:<url>` keys — those could belong to
|
|
201
|
+
# another Parse app sharing the Redis DB, and cross-namespace
|
|
202
|
+
# eviction would be a blast-radius bug, not a fix. Operators
|
|
203
|
+
# upgrading an SDK that previously wrote un-namespaced keys
|
|
204
|
+
# should evict those once at upgrade time via SCAN.
|
|
205
|
+
delete_cache_variants(url)
|
|
206
|
+
instrument_cache(:miss, method: method, url_path: url_path)
|
|
207
|
+
elsif method == :get && @cache_key.present? && @write_only
|
|
208
|
+
delete_cache_variants(url)
|
|
209
|
+
instrument_cache(:miss, method: method, url_path: url_path, reason: :write_only)
|
|
167
210
|
elsif @cache_key.present?
|
|
168
211
|
#non GET requets should clear the cache for that same resource path.
|
|
169
212
|
#ex. a POST to /1/classes/Artist/<objectId> should delete the cache for a GET
|
|
170
213
|
# request for the same '/1/classes/Artist/<objectId>' where objectId are equivalent
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
@store.delete @cache_key # final key
|
|
214
|
+
delete_cache_variants(url)
|
|
215
|
+
instrument_cache(:delete, method: method, url_path: url_path)
|
|
174
216
|
end
|
|
175
|
-
rescue ::TypeError, Errno::EINVAL, Redis::CannotConnectError, Redis::TimeoutError => e
|
|
217
|
+
rescue ::TypeError, Errno::EINVAL, Redis::CannotConnectError, Redis::TimeoutError, ConnectionPool::TimeoutError => e
|
|
176
218
|
# if the cache store fails to connect, catch the exception but proceed
|
|
177
219
|
# with the regular request, but turn off caching for this request. It is possible
|
|
178
220
|
# that the cache connection resumes at a later point, so this is temporary.
|
|
179
221
|
@enabled = false
|
|
180
|
-
puts "[Parse::Cache] Error: #{e}"
|
|
222
|
+
puts "[Parse::Cache] Error: #{e.class.name}"
|
|
223
|
+
instrument_cache(:error, method: method, url_path: url_path, error: e.class.name)
|
|
181
224
|
end
|
|
182
225
|
|
|
183
226
|
@app.call(env).on_complete do |response_env|
|
|
@@ -186,18 +229,75 @@ module Parse
|
|
|
186
229
|
|
|
187
230
|
if @enabled && method == :get && CACHEABLE_HTTP_CODES.include?(response_env.status) &&
|
|
188
231
|
response_env.body.present? && response_env.response_headers[CONTENT_LENGTH_KEY].to_i.between?(20, 1_250_000)
|
|
232
|
+
store_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
189
233
|
begin
|
|
190
234
|
@store.store(@cache_key,
|
|
191
235
|
{ headers: response_env.response_headers, body: response_env.body },
|
|
192
236
|
expires: @expires)
|
|
237
|
+
duration_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - store_start) * 1000.0).round(3)
|
|
238
|
+
instrument_cache(:store, method: method, url_path: url_path, duration_ms: duration_ms)
|
|
193
239
|
rescue => e
|
|
194
|
-
puts "[Parse::Cache] Store Error: #{e}"
|
|
240
|
+
puts "[Parse::Cache] Store Error: #{e.class.name}"
|
|
241
|
+
instrument_cache(:error, method: method, url_path: url_path, error: e.class.name)
|
|
195
242
|
end
|
|
196
243
|
end # if
|
|
197
244
|
# do something with the response
|
|
198
245
|
# response_env[:response_headers].merge!(...)
|
|
199
246
|
end
|
|
200
247
|
end
|
|
248
|
+
|
|
249
|
+
private
|
|
250
|
+
|
|
251
|
+
# Emit an ActiveSupport::Notifications event under the `parse.cache.*`
|
|
252
|
+
# namespace.
|
|
253
|
+
#
|
|
254
|
+
# **Payload shape (stable):** `{ event:, namespace:, method:, url_path:,
|
|
255
|
+
# [reason:], [duration_ms:], [error:] }`.
|
|
256
|
+
#
|
|
257
|
+
# **Security invariants:**
|
|
258
|
+
# - The cache key is NEVER emitted. The key contains a hashed
|
|
259
|
+
# session-token prefix that would be a side-channel for "this user
|
|
260
|
+
# has data at this URL" enumeration.
|
|
261
|
+
# - `url_path` is `URI#path` only — query strings are stripped because
|
|
262
|
+
# Parse encodes query JSON there (potentially long or PII-bearing).
|
|
263
|
+
# - `error` is `Exception#class.name` only — never the exception
|
|
264
|
+
# message or backtrace.
|
|
265
|
+
# - `namespace` is whatever the SDK consumer configured at setup. Treat
|
|
266
|
+
# subscribers as you would your application log sink: they observe
|
|
267
|
+
# the namespace, the HTTP method, and the URL path of every cached
|
|
268
|
+
# GET / invalidating write.
|
|
269
|
+
#
|
|
270
|
+
# **Subscriber discipline:** ActiveSupport::Notifications runs
|
|
271
|
+
# subscribers **synchronously on the Faraday request thread**. A
|
|
272
|
+
# blocking subscriber (e.g. synchronous I/O to a slow sink) blocks
|
|
273
|
+
# every cached request for the duration of its work, and an exception
|
|
274
|
+
# raised inside a subscriber will surface as a request failure. Keep
|
|
275
|
+
# subscribers cheap — counter increments, in-memory accumulators, or
|
|
276
|
+
# non-blocking sinks like StatsD-over-UDP.
|
|
277
|
+
# @!visibility private
|
|
278
|
+
def instrument_cache(event, **extra)
|
|
279
|
+
return unless defined?(ActiveSupport::Notifications)
|
|
280
|
+
payload = { event: event, namespace: @namespace }.merge!(extra)
|
|
281
|
+
ActiveSupport::Notifications.instrument("parse.cache.#{event}", payload)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Delete the canonical cache_key plus its legacy un-namespaced and
|
|
285
|
+
# master-key-prefixed variants. Called on both GET misses (defensive
|
|
286
|
+
# cleanup of stale pre-namespace entries) and non-GET writes (cache
|
|
287
|
+
# invalidation for the resource).
|
|
288
|
+
# @!visibility private
|
|
289
|
+
def delete_cache_variants(url)
|
|
290
|
+
if @namespace
|
|
291
|
+
# Namespaced: only delete our app's variants so a write through
|
|
292
|
+
# client A doesn't blow away client B's cache when both share Redis.
|
|
293
|
+
@store.delete "#{@namespace}:#{url.to_s}"
|
|
294
|
+
@store.delete "#{@namespace}:mk:#{url.to_s}"
|
|
295
|
+
else
|
|
296
|
+
@store.delete url.to_s # regular
|
|
297
|
+
@store.delete "mk:#{url.to_s}" # master key cache-key
|
|
298
|
+
end
|
|
299
|
+
@store.delete @cache_key # final key
|
|
300
|
+
end
|
|
201
301
|
end #Caching
|
|
202
302
|
end #Middleware
|
|
203
303
|
end
|