parse-stack-next 4.5.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.env.sample +17 -3
  3. data/.github/workflows/codeql.yml +44 -0
  4. data/.github/workflows/docs.yml +39 -0
  5. data/.github/workflows/ruby.yml +8 -6
  6. data/.gitignore +4 -0
  7. data/CHANGELOG.md +295 -72
  8. data/Gemfile.lock +10 -3
  9. data/LICENSE.txt +1 -1
  10. data/README.md +190 -219
  11. data/Rakefile +1 -1
  12. data/SECURITY.md +30 -0
  13. data/assets/parse-stack-next-avatar.png +0 -0
  14. data/assets/parse-stack-next-avatar.svg +37 -0
  15. data/assets/parse-stack-next-banner.png +0 -0
  16. data/assets/parse-stack-next-banner.svg +45 -0
  17. data/assets/parse-stack-next-social-preview.png +0 -0
  18. data/docs/atlas_vector_search_guide.md +511 -0
  19. data/docs/client_sdk_guide.md +1320 -0
  20. data/docs/mcp_guide.md +225 -104
  21. data/docs/mongodb_direct_guide.md +21 -4
  22. data/docs/usage_guide.md +585 -0
  23. data/examples/transaction_example.rb +28 -28
  24. data/lib/parse/acl_scope.rb +2 -2
  25. data/lib/parse/agent/mcp_rack_app.rb +184 -16
  26. data/lib/parse/agent/metadata_dsl.rb +16 -16
  27. data/lib/parse/agent/pipeline_validator.rb +28 -1
  28. data/lib/parse/agent/prompts.rb +5 -5
  29. data/lib/parse/agent/tools.rb +287 -14
  30. data/lib/parse/agent.rb +209 -12
  31. data/lib/parse/api/analytics.rb +27 -5
  32. data/lib/parse/api/files.rb +6 -2
  33. data/lib/parse/api/push.rb +21 -4
  34. data/lib/parse/api/server.rb +59 -0
  35. data/lib/parse/api/users.rb +26 -2
  36. data/lib/parse/atlas_search/index_manager.rb +84 -0
  37. data/lib/parse/atlas_search.rb +37 -9
  38. data/lib/parse/cache/pool.rb +73 -0
  39. data/lib/parse/cache/redis.rb +190 -0
  40. data/lib/parse/client/body_builder.rb +94 -0
  41. data/lib/parse/client/caching.rb +109 -9
  42. data/lib/parse/client/response.rb +27 -0
  43. data/lib/parse/client.rb +74 -3
  44. data/lib/parse/console.rb +203 -0
  45. data/lib/parse/embeddings/cohere.rb +484 -0
  46. data/lib/parse/embeddings/fixture.rb +130 -0
  47. data/lib/parse/embeddings/jina.rb +454 -0
  48. data/lib/parse/embeddings/local_http.rb +492 -0
  49. data/lib/parse/embeddings/openai.rb +520 -0
  50. data/lib/parse/embeddings/provider.rb +264 -0
  51. data/lib/parse/embeddings/qwen.rb +431 -0
  52. data/lib/parse/embeddings/voyage.rb +550 -0
  53. data/lib/parse/embeddings.rb +225 -0
  54. data/lib/parse/graphql/scalars.rb +53 -0
  55. data/lib/parse/graphql/type_generator.rb +264 -0
  56. data/lib/parse/graphql.rb +48 -0
  57. data/lib/parse/live_query/client.rb +24 -5
  58. data/lib/parse/live_query/subscription.rb +17 -6
  59. data/lib/parse/live_query.rb +9 -4
  60. data/lib/parse/model/associations/collection_proxy.rb +2 -2
  61. data/lib/parse/model/associations/has_many.rb +32 -1
  62. data/lib/parse/model/associations/has_one.rb +17 -0
  63. data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
  64. data/lib/parse/model/classes/user.rb +307 -11
  65. data/lib/parse/model/clp.rb +1 -1
  66. data/lib/parse/model/core/embed_managed.rb +296 -0
  67. data/lib/parse/model/core/fetching.rb +4 -4
  68. data/lib/parse/model/core/indexing.rb +53 -14
  69. data/lib/parse/model/core/parse_reference.rb +3 -3
  70. data/lib/parse/model/core/properties.rb +70 -1
  71. data/lib/parse/model/core/querying.rb +57 -1
  72. data/lib/parse/model/core/vector_searchable.rb +285 -0
  73. data/lib/parse/model/file.rb +16 -4
  74. data/lib/parse/model/model.rb +26 -10
  75. data/lib/parse/model/object.rb +63 -6
  76. data/lib/parse/model/pointer.rb +16 -2
  77. data/lib/parse/model/shortnames.rb +2 -0
  78. data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
  79. data/lib/parse/model/vector.rb +102 -0
  80. data/lib/parse/mongodb.rb +90 -8
  81. data/lib/parse/pipeline_security.rb +59 -2
  82. data/lib/parse/query/constraints.rb +16 -14
  83. data/lib/parse/query/ordering.rb +1 -1
  84. data/lib/parse/query.rb +137 -64
  85. data/lib/parse/stack/generators/templates/model.erb +2 -2
  86. data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
  87. data/lib/parse/stack/generators/templates/model_role.rb +1 -1
  88. data/lib/parse/stack/generators/templates/model_session.rb +1 -1
  89. data/lib/parse/stack/generators/templates/parse.rb +1 -1
  90. data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
  91. data/lib/parse/stack/version.rb +1 -1
  92. data/lib/parse/stack.rb +375 -73
  93. data/lib/parse/two_factor_auth/user_extension.rb +5 -2
  94. data/lib/parse/vector_search.rb +341 -0
  95. data/parse-stack-next.gemspec +10 -9
  96. data/scripts/docker/docker-compose.test.yml +18 -0
  97. data/scripts/start-parse.sh +6 -0
  98. data/scripts/vector_prototype/create_vector_index.js +105 -0
  99. data/scripts/vector_prototype/fetch_embeddings.py +241 -0
  100. data/scripts/vector_prototype/fixture_manifest.json +9 -0
  101. data/scripts/vector_prototype/query_prototype.rb +84 -0
  102. data/scripts/vector_prototype/run.sh +34 -0
  103. metadata +75 -5
  104. data/parse-stack.png +0 -0
@@ -243,6 +243,18 @@ module Parse
243
243
  # @option options [Hash] :sort sort specification (default: by relevance score)
244
244
  # @option options [Boolean] :raw return raw MongoDB documents (default: false)
245
245
  # @option options [String] :class_name Parse class name for object conversion
246
+ # @option options [String] :session_token Parse session token used to scope
247
+ # ACL/CLP enforcement to the owning user.
248
+ # @option options [Boolean] :master run with master-key semantics and bypass
249
+ # ACL/CLP enforcement (default: false).
250
+ # @option options [Parse::User, Parse::Pointer] :acl_user act as the given
251
+ # user pointer for ACL evaluation (no REST equivalent; mongo-direct only).
252
+ # @option options [String, Parse::Role] :acl_role act as the given role for
253
+ # ACL evaluation (no REST equivalent; mongo-direct only).
254
+ # @option options [Symbol] :read_preference MongoDB read preference applied
255
+ # to the underlying collection (e.g. +:secondary+).
256
+ # @option options [Integer] :max_time_ms maximum server-side execution time
257
+ # in milliseconds for the aggregate command.
246
258
  #
247
259
  # @return [Parse::AtlasSearch::SearchResult] search result object
248
260
  #
@@ -393,6 +405,19 @@ module Parse
393
405
  # @option options [Integer] :limit max suggestions to return (default: 10)
394
406
  # @option options [Hash] :filter additional constraints
395
407
  # @option options [Boolean] :raw return raw documents (default: false)
408
+ # @option options [String] :class_name Parse class name for object conversion.
409
+ # @option options [String] :session_token Parse session token used to scope
410
+ # ACL/CLP enforcement to the owning user.
411
+ # @option options [Boolean] :master run with master-key semantics and bypass
412
+ # ACL/CLP enforcement (default: false).
413
+ # @option options [Parse::User, Parse::Pointer] :acl_user act as the given
414
+ # user pointer for ACL evaluation (no REST equivalent; mongo-direct only).
415
+ # @option options [String, Parse::Role] :acl_role act as the given role for
416
+ # ACL evaluation (no REST equivalent; mongo-direct only).
417
+ # @option options [Symbol] :read_preference MongoDB read preference applied
418
+ # to the underlying collection (e.g. +:secondary+).
419
+ # @option options [Integer] :max_time_ms maximum server-side execution time
420
+ # in milliseconds for the aggregate command.
396
421
  #
397
422
  # @return [Parse::AtlasSearch::AutocompleteResult] autocomplete result
398
423
  #
@@ -509,7 +534,14 @@ module Parse
509
534
  # @param collection_name [String] the Parse collection name
510
535
  # @param query [String, nil] the search query text (nil for match-all)
511
536
  # @param facets [Hash] facet definitions
512
- # @param options [Hash] search options (same as #search)
537
+ # @param options [Hash] search options (same as {#search}; see that
538
+ # method for the full list of accepted +@option+ entries including
539
+ # +:index+, +:fields+, +:fuzzy+, +:limit+, +:filter+, +:read_preference+,
540
+ # +:max_time_ms+, and the scoping kwargs +:master+, +:session_token+,
541
+ # +:acl_user+, +:acl_role+). Note: scoped identity kwargs require
542
+ # +master: true+ to be passed explicitly — $searchMeta bucket counts
543
+ # cannot be filtered by ACL after the fact, so the method refuses
544
+ # to silently downgrade.
513
545
  #
514
546
  # @return [Parse::AtlasSearch::FacetedResult] faceted result
515
547
  #
@@ -944,19 +976,15 @@ module Parse
944
976
  objects = parse_results.each_with_index.map do |doc, idx|
945
977
  obj = build_parse_object(doc, class_name)
946
978
  raw_doc = raw_results[idx]
947
- # Attach search metadata from original raw document (scores are stripped during conversion)
979
+ # Attach search metadata from original raw document. `search_score`
980
+ # and `search_highlights` readers are defined once on Parse::Object
981
+ # (see lib/parse/model/object.rb) so we only set the ivars here —
982
+ # no per-row singleton method definition.
948
983
  if obj && raw_doc["_score"]
949
984
  obj.instance_variable_set(:@_search_score, raw_doc["_score"])
950
- # Define accessor if not already defined
951
- unless obj.respond_to?(:search_score)
952
- obj.define_singleton_method(:search_score) { @_search_score }
953
- end
954
985
  end
955
986
  if obj && raw_doc["_highlights"]
956
987
  obj.instance_variable_set(:@_search_highlights, raw_doc["_highlights"])
957
- unless obj.respond_to?(:search_highlights)
958
- obj.define_singleton_method(:search_highlights) { @_search_highlights }
959
- end
960
988
  end
961
989
  obj
962
990
  end.compact
@@ -0,0 +1,73 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ require "connection_pool"
5
+ require "moneta"
6
+
7
+ module Parse
8
+ module Cache
9
+ # Moneta-compatible facade over a ConnectionPool of Moneta stores. The
10
+ # Faraday caching middleware only calls four methods on its store
11
+ # (`[]`, `key?`, `delete`, `store`); this class checks out a backend
12
+ # for each of them via `@pool.with`.
13
+ #
14
+ # Why a pool: a single Moneta-Redis store wraps one Redis connection.
15
+ # Under a multi-threaded Puma worker (or any concurrent caller), threads
16
+ # serialize on that connection's mutex. A pool of N stores lets up to N
17
+ # cache calls run in parallel.
18
+ #
19
+ # Note that a cache hit costs two checkouts (`key?` then `[]`). That is
20
+ # accepted to keep behavior identical to a plain Moneta store; callers
21
+ # should size the pool with that in mind (default 5, which matches the
22
+ # Puma default thread count).
23
+ class Pool
24
+ # The wrapped ConnectionPool instance.
25
+ attr_reader :pool
26
+
27
+ # @param size [Integer] number of pooled backend stores.
28
+ # @param timeout [Numeric] seconds to wait for a checkout before
29
+ # raising `ConnectionPool::TimeoutError`.
30
+ # @yield Block invoked to build a single backend store. Must return a
31
+ # Moneta store responding to `[]`, `key?`, `delete`, `store`.
32
+ def initialize(size: 5, timeout: 5, &block)
33
+ raise ArgumentError, "Parse::Cache::Pool requires a block that builds a Moneta store" unless block_given?
34
+ @pool = ConnectionPool.new(size: size, timeout: timeout, &block)
35
+ @closed = false
36
+ end
37
+
38
+ def [](key)
39
+ @pool.with { |store| store[key] }
40
+ end
41
+
42
+ def key?(key)
43
+ @pool.with { |store| store.key?(key) }
44
+ end
45
+
46
+ def delete(key)
47
+ @pool.with { |store| store.delete(key) }
48
+ end
49
+
50
+ def store(key, value, options = {})
51
+ @pool.with { |store| store.store(key, value, options) }
52
+ end
53
+
54
+ # Clear the underlying backend. Pooled Moneta stores all point at the
55
+ # same Redis DB, so a single checkout suffices — issuing `clear` on
56
+ # one connection flushes the DB for every connection.
57
+ def clear
58
+ @pool.with { |store| store.clear if store.respond_to?(:clear) }
59
+ self
60
+ end
61
+
62
+ # Close all pooled backends. Safe to call multiple times — repeat
63
+ # calls are no-ops. `ConnectionPool#shutdown` raises
64
+ # `ConnectionPool::PoolShuttingDownError` on a second invocation,
65
+ # so we gate it with a `@closed` flag.
66
+ def close
67
+ return if @closed
68
+ @closed = true
69
+ @pool.shutdown { |store| store.close if store.respond_to?(:close) }
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,190 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ require "moneta"
5
+ require_relative "pool"
6
+
7
+ module Parse
8
+ module Cache
9
+ # Ergonomic Redis cache builder for Parse Stack. Composes a
10
+ # ConnectionPool of Moneta-Redis stores and carries an optional
11
+ # `namespace` that `Parse::Client` will pick up automatically — there
12
+ # is no need to also pass `cache_namespace:` to `Parse.setup` when
13
+ # using this wrapper.
14
+ #
15
+ # Usage:
16
+ # Parse.setup(
17
+ # cache: Parse::Cache::Redis.new(
18
+ # url: "redis://localhost:6379/0",
19
+ # namespace: "app_x",
20
+ # pool_size: 10,
21
+ # ),
22
+ # expires: 60,
23
+ # ...
24
+ # )
25
+ #
26
+ # The instance is a Moneta-compatible store (it delegates the four
27
+ # methods the Faraday caching middleware uses — `[]`, `key?`,
28
+ # `delete`, `store` — to a pooled backend), so it can be passed
29
+ # directly to `Parse.setup(cache:)` / `Parse::Client.new(cache:)`.
30
+ class Redis
31
+ # @return [String, nil] cache key namespace prefix (or nil if not set).
32
+ attr_reader :namespace
33
+
34
+ # @return [Integer] pool size.
35
+ attr_reader :pool_size
36
+
37
+ # @return [String] Redis connection URL.
38
+ attr_reader :url
39
+
40
+ # @param url [String] Redis URL (e.g. `"redis://localhost:6379/0"`).
41
+ # @param namespace [String, nil] optional key prefix so multiple Parse
42
+ # apps can share one Redis without colliding. When non-nil, the
43
+ # namespace is automatically forwarded to the caching middleware
44
+ # as `cache_namespace:`.
45
+ # @param pool_size [Integer] number of pooled Moneta-Redis stores.
46
+ # Defaults to 5 (the Puma default thread count).
47
+ #
48
+ # **Sizing math (per Faraday request):**
49
+ # - cache hit: `key?` + `[]` = **2 checkouts**
50
+ # - GET miss + successful store: `key?` + 3 variant deletes
51
+ # (anonymous + master-key sibling + final key) + 1 `store` in
52
+ # `on_complete` = **up to 5 checkouts**
53
+ # - non-GET write (POST/PUT/DELETE): 3 variant deletes =
54
+ # **3 checkouts**
55
+ #
56
+ # The worst case (5) is on the write-through-after-miss path, not
57
+ # the hit path. Rule of thumb: start at `pool_size = RAILS_MAX_THREADS`,
58
+ # then bump it up if you observe `ConnectionPool::TimeoutError` in
59
+ # `parse.cache.error` notifications (the middleware swallows that
60
+ # error into a passthrough request rather than raising to the caller).
61
+ # @param pool_timeout [Numeric] seconds to wait for a backend
62
+ # checkout before raising `ConnectionPool::TimeoutError`. Defaults
63
+ # to 5s. The caching middleware catches that error and falls back
64
+ # to a passthrough request rather than raising to the caller.
65
+ # @param moneta_options [Hash] extra options passed through to
66
+ # `Moneta.new(:Redis, ...)` (e.g. `:db`, `:connect_timeout`).
67
+ # `expires: true` is set automatically so per-key TTLs supplied
68
+ # by the caching middleware (the `:expires` Faraday option) are
69
+ # honored by Redis. Pass `expires: false` here to opt out — but
70
+ # note that doing so causes cached responses to live forever,
71
+ # which is rarely what you want for a session-token-scoped
72
+ # response cache.
73
+ def initialize(url:, namespace: nil, pool_size: 5, pool_timeout: 5, **moneta_options)
74
+ @url = url
75
+ @namespace = normalize_namespace(namespace)
76
+ @pool_size = pool_size
77
+ @pool_timeout = pool_timeout
78
+ # Default expires: true so per-call `expires:` (the TTL the
79
+ # Faraday caching middleware passes on store) is honored. The
80
+ # Moneta-Redis adapter ignores per-call expires unless the
81
+ # store was constructed with this flag. Without it, cached
82
+ # session-scoped REST responses outlive their token's
83
+ # validity. Callers can still pass `expires: false` to opt out.
84
+ merged_options = { expires: true }.merge(moneta_options)
85
+ @moneta_options = merged_options
86
+ @closed = false
87
+ @pool = Pool.new(size: pool_size, timeout: pool_timeout) do
88
+ Moneta.new(:Redis, { url: url }.merge(merged_options))
89
+ end
90
+ end
91
+
92
+ def [](key)
93
+ @pool[key]
94
+ end
95
+
96
+ def key?(key)
97
+ @pool.key?(key)
98
+ end
99
+
100
+ def delete(key)
101
+ @pool.delete(key)
102
+ end
103
+
104
+ def store(key, value, options = {})
105
+ @pool.store(key, value, options)
106
+ end
107
+
108
+ # Clear cached entries belonging to this wrapper. Required for
109
+ # `Parse::Client#clear_cache!` compatibility.
110
+ #
111
+ # **Namespace-scoped when a namespace is set:** the wrapper walks
112
+ # `<namespace>:*` via Redis SCAN and DELs the matching keys,
113
+ # leaving other tenants on the same DB untouched. When no
114
+ # namespace is configured the wrapper falls back to `FLUSHDB` on
115
+ # the backing DB — same blast radius as previous versions, but
116
+ # only for unnamespaced deployments. To opt into the wide
117
+ # FLUSHDB explicitly (e.g. ops tooling), call {#flush_db!}.
118
+ def clear
119
+ if @namespace
120
+ delete_keys_matching!("#{@namespace}:*")
121
+ else
122
+ @pool.clear
123
+ end
124
+ self
125
+ end
126
+
127
+ # Issue `FLUSHDB` on the backing Redis DB, regardless of whether a
128
+ # namespace is configured. Evicts every key on the selected DB,
129
+ # including unrelated tenants — use only for ops tooling that
130
+ # owns the whole DB.
131
+ def flush_db!
132
+ @pool.clear
133
+ self
134
+ end
135
+
136
+ # Close all pooled connections. Safe to call multiple times.
137
+ def close
138
+ return if @closed
139
+ @closed = true
140
+ @pool.close
141
+ end
142
+
143
+ private
144
+
145
+ def delete_keys_matching!(pattern)
146
+ @pool.pool.with do |store|
147
+ redis = backend_client(store)
148
+ # SCAN-DEL loop. `count:` is a hint to the server; the actual
149
+ # batch size returned varies. Loop until the cursor wraps back
150
+ # to "0".
151
+ cursor = "0"
152
+ loop do
153
+ cursor, keys = redis.scan(cursor, match: pattern, count: 1000)
154
+ redis.del(*keys) unless keys.empty?
155
+ break if cursor == "0"
156
+ end
157
+ end
158
+ end
159
+
160
+ def backend_client(moneta_store)
161
+ # Walk down the Moneta proxy chain (Expires → Adapter → redis-rb)
162
+ # until we reach an object that quacks like the redis-rb client
163
+ # (i.e. responds to #scan). Moneta wraps the actual adapter when
164
+ # `expires: true` is passed, and the adapter then exposes the
165
+ # underlying redis-rb client via `#backend` (modern releases) or
166
+ # the `@backend` ivar (older releases).
167
+ node = moneta_store
168
+ 12.times do
169
+ return node if node.respond_to?(:scan)
170
+ if node.respond_to?(:backend)
171
+ node = node.backend
172
+ elsif node.instance_variable_defined?(:@backend)
173
+ node = node.instance_variable_get(:@backend)
174
+ elsif node.instance_variable_defined?(:@adapter)
175
+ node = node.instance_variable_get(:@adapter)
176
+ else
177
+ break
178
+ end
179
+ break if node.nil?
180
+ end
181
+ node
182
+ end
183
+
184
+ def normalize_namespace(ns)
185
+ s = ns.to_s.chomp(":")
186
+ s.empty? ? nil : s
187
+ end
188
+ end
189
+ end
190
+ end
@@ -48,6 +48,26 @@ module Parse
48
48
  SENSITIVE_FIELDS_SET = SENSITIVE_FIELDS.map(&:downcase).to_set.freeze
49
49
  # Placeholder used in place of redacted values.
50
50
  REDACTED_PLACEHOLDER = "[FILTERED]"
51
+ # Minimum length at which a numeric-only Array in a logged JSON
52
+ # body is compacted to a single placeholder string instead of
53
+ # printed verbatim. Two concerns drive this:
54
+ #
55
+ # 1. **Noise.** A 1536-float OpenAI embedding inlines as ~25 KB of
56
+ # JSON per logged row. Aggregation pipelines with
57
+ # `$vectorSearch.queryVector` and any save/fetch carrying a
58
+ # `:vector` field would otherwise drown operator logs.
59
+ # 2. **Sensitivity.** Embeddings are reversible-by-similarity:
60
+ # an attacker who scrapes operator logs can reconstruct
61
+ # high-level features of the source text (topic, sentiment,
62
+ # sometimes near-verbatim phrases for short inputs) by
63
+ # nearest-neighbor lookup against a public model.
64
+ #
65
+ # Threshold rationale: 32 is well below every common embedding
66
+ # width (BGE-small 384, Cohere 1024, OpenAI small 1536, OpenAI
67
+ # large 3072) and well above any normal Parse Array property
68
+ # (tags, role lists, etc.). Numeric-only check additionally
69
+ # protects normal long arrays of strings/objects.
70
+ LOG_VECTOR_COMPACT_THRESHOLD = 32
51
71
  # Request headers that must never be printed verbatim in debug logs.
52
72
  # Matched case-insensitively against Faraday header keys.
53
73
  REDACTED_HEADERS = [
@@ -57,6 +77,31 @@ module Parse
57
77
  "X-Parse-JavaScript-Key",
58
78
  "Authorization",
59
79
  "Cookie",
80
+ # Embedding-provider credentials (Parse::Embeddings::OpenAI and
81
+ # forthcoming Cohere/Voyage adapters). These never touch Parse
82
+ # Server itself, but they share the same Faraday log path when a
83
+ # caller mounts the embeddings connection through Parse logging.
84
+ # OpenAI's official auth header is `Authorization: Bearer …`
85
+ # (already covered above); Organization/Project are listed here
86
+ # since they're account-identifying metadata operators may not
87
+ # want to publish. `X-Api-Key` and `Anthropic-Api-Key` are
88
+ # reserved for forthcoming non-OpenAI providers.
89
+ "X-Api-Key",
90
+ "OpenAI-Organization",
91
+ "OpenAI-Project",
92
+ "Anthropic-Api-Key",
93
+ # Cohere, Voyage, Jina, and DashScope (Qwen) use Bearer auth
94
+ # (covered by "Authorization" above), but some operators front
95
+ # them with a proxy that rewrites to a vendor-specific header.
96
+ # These are listed defensively so a future header-form switch
97
+ # doesn't silently leak keys into Faraday logs. `Api-Key` is the
98
+ # bare form some vendor SDKs and proxies use; covered for parity.
99
+ "Cohere-Api-Key",
100
+ "Voyage-Api-Key",
101
+ "Jina-Api-Key",
102
+ "Api-Key",
103
+ "X-DashScope-Api-Key",
104
+ "DashScope-Api-Key",
60
105
  ].map(&:downcase).freeze
61
106
 
62
107
  class << self
@@ -91,6 +136,7 @@ module Parse
91
136
  after_structural = s
92
137
  if (parsed = try_parse_json(s))
93
138
  scrubbed = scrub_sensitive!(parsed)
139
+ compact_vectors!(scrubbed)
94
140
  begin
95
141
  after_structural = scrubbed.to_json
96
142
  rescue StandardError
@@ -160,12 +206,60 @@ module Parse
160
206
  node
161
207
  end
162
208
 
209
+ # @!visibility private
210
+ # Recursively walk a parsed JSON structure replacing any
211
+ # numeric-only Array of length >= +LOG_VECTOR_COMPACT_THRESHOLD+
212
+ # with a compact placeholder string ("<vector dims=N>"). Mutates
213
+ # Hashes/Arrays in place; returns the node for chaining. Distinct
214
+ # pass from {scrub_sensitive!} because the criterion is shape
215
+ # (numeric array width), not key name.
216
+ #
217
+ # The walker does NOT descend into the replaced array — once a
218
+ # node is recognised as a vector its inner Numerics aren't of
219
+ # interest. Nested vectors (Array<Array<Numeric>>, e.g. a batched
220
+ # embedding response in a logged HTTP body) are caught at the
221
+ # inner array level on the next recursion.
222
+ def self.compact_vectors!(node)
223
+ case node
224
+ when Hash
225
+ node.each do |key, value|
226
+ if vector_shape?(value)
227
+ node[key] = "<vector dims=#{value.length}>"
228
+ elsif value.is_a?(Hash) || value.is_a?(Array)
229
+ compact_vectors!(value)
230
+ end
231
+ end
232
+ when Array
233
+ node.each_with_index do |item, i|
234
+ if vector_shape?(item)
235
+ node[i] = "<vector dims=#{item.length}>"
236
+ elsif item.is_a?(Hash) || item.is_a?(Array)
237
+ compact_vectors!(item)
238
+ end
239
+ end
240
+ end
241
+ node
242
+ end
243
+
244
+ # @!visibility private
245
+ # An Array is "vector-shaped" if it meets the compaction threshold
246
+ # AND every element is Numeric. The numeric check prevents long
247
+ # tag arrays / role lists / mixed-type arrays from being mangled.
248
+ # Boolean is not Numeric in Ruby, so an array of booleans (rare
249
+ # but possible) is left alone — also fine.
250
+ def self.vector_shape?(val)
251
+ return false unless val.is_a?(Array)
252
+ return false if val.length < LOG_VECTOR_COMPACT_THRESHOLD
253
+ val.all? { |x| x.is_a?(Numeric) }
254
+ end
255
+
163
256
  # @!visibility private
164
257
  # If +str+ parses as JSON (object or array), scrub structurally and
165
258
  # re-encode. Otherwise return the original string unchanged.
166
259
  def self.maybe_scrub_embedded_json(str)
167
260
  return str unless (inner = try_parse_json(str))
168
261
  scrub_sensitive!(inner)
262
+ compact_vectors!(inner)
169
263
  begin
170
264
  inner.to_json
171
265
  rescue StandardError
@@ -3,6 +3,7 @@
3
3
 
4
4
  require "faraday"
5
5
  require "moneta"
6
+ require "connection_pool"
6
7
  require "digest"
7
8
  require_relative "protocol"
8
9
 
@@ -81,6 +82,14 @@ module Parse
81
82
  @opts = { expires: 0 }
82
83
  @opts.merge!(opts) if opts.is_a?(Hash)
83
84
  @expires = @opts[:expires]
85
+ # Optional cache key namespace so two Parse apps sharing one Redis don't
86
+ # collide (e.g. `mk:/classes/Song/abc` is the same path for both apps).
87
+ # When set, keys become `<namespace>:<existing-prefix>:<url>`. Empty
88
+ # string is treated as nil. Trailing `:` is stripped once so users can
89
+ # pass either `"app_x"` or `"app_x:"`.
90
+ ns = @opts[:namespace].to_s
91
+ ns = ns.chomp(":")
92
+ @namespace = ns.empty? ? nil : ns
84
93
 
85
94
  unless [:key?, :[], :delete, :store].all? { |method| @store.respond_to?(method) }
86
95
  raise ArgumentError, "Caching store object must a Moneta key/value store."
@@ -134,21 +143,36 @@ module Parse
134
143
  @cache_key = "mk:#{@cache_key}" # prefix for master key requests
135
144
  end
136
145
 
146
+ # Namespace outermost so a SCAN over `<namespace>:*` evicts a whole
147
+ # tenant/app cleanly without touching another app's entries.
148
+ @cache_key = "#{@namespace}:#{@cache_key}" if @namespace
149
+
150
+ url_path = url.path
151
+
137
152
  begin
138
153
  # Skip cache read if write_only mode is enabled
139
154
  if method == :get && @cache_key.present? && !@write_only && @store.key?(@cache_key)
140
- puts("[Parse::Cache] Hit >> #{url}") if self.class.logging.present?
155
+ # Debug-log the URL **path only** — `url.to_s` would include the
156
+ # query string, which Parse encodes JSON `where=` into and may
157
+ # contain PII. Same redaction discipline as the AS::N payload.
158
+ puts("[Parse::Cache] Hit >> #{url_path}") if self.class.logging.present?
141
159
  response = Faraday::Response.new
142
160
  begin
143
161
  cache_data = @store[@cache_key] # previous cached response
144
162
  rescue => e
145
- puts "[Parse::Cache] Error: #{e}"
163
+ # Log only the class name — some Moneta/Redis drivers echo the
164
+ # offending key in `e.message`, and our key contains a hashed
165
+ # session-token prefix that we treat as side-channel material.
166
+ puts "[Parse::Cache] Error: #{e.class.name}"
167
+ instrument_cache(:error, method: method, url_path: url_path, error: e.class.name)
146
168
  cache_data = nil
147
169
  end
148
170
 
149
171
  # check if the store was from a legacy parse-stack cache value which
150
172
  # is stored as Faraday::Env. T\he new system stores less content in a simple hash
151
173
  # for improved interoperability and access time.
174
+ body = nil
175
+ response_headers = nil
152
176
  if cache_data.is_a?(Faraday::Env)
153
177
  body = cache_data.respond_to?(:body) ? cache_data.body : nil
154
178
  response_headers = cache_data.response_headers || {}
@@ -160,24 +184,43 @@ module Parse
160
184
  if cache_data.present? && body.present?
161
185
  response_headers[CACHE_RESPONSE_HEADER] = "true"
162
186
  response.finish({ status: 200, response_headers: response_headers, body: body })
187
+ instrument_cache(:hit, method: method, url_path: url_path)
163
188
  return response
164
189
  else
165
- @store.delete @cache_key
190
+ delete_cache_variants(url)
191
+ instrument_cache(:miss, method: method, url_path: url_path, reason: :empty_payload)
166
192
  end
193
+ elsif method == :get && @cache_key.present? && !@write_only
194
+ # GET miss: opportunistically clear any sibling variants of the
195
+ # current namespace (anonymous `<url>` and master-key `mk:<url>`
196
+ # under the same namespace) so a stale variant from a prior
197
+ # request flavor doesn't linger until TTL.
198
+ #
199
+ # When @namespace is set we deliberately do NOT touch the bare
200
+ # un-namespaced `<url>` / `mk:<url>` keys — those could belong to
201
+ # another Parse app sharing the Redis DB, and cross-namespace
202
+ # eviction would be a blast-radius bug, not a fix. Operators
203
+ # upgrading an SDK that previously wrote un-namespaced keys
204
+ # should evict those once at upgrade time via SCAN.
205
+ delete_cache_variants(url)
206
+ instrument_cache(:miss, method: method, url_path: url_path)
207
+ elsif method == :get && @cache_key.present? && @write_only
208
+ delete_cache_variants(url)
209
+ instrument_cache(:miss, method: method, url_path: url_path, reason: :write_only)
167
210
  elsif @cache_key.present?
168
211
  #non GET requets should clear the cache for that same resource path.
169
212
  #ex. a POST to /1/classes/Artist/<objectId> should delete the cache for a GET
170
213
  # request for the same '/1/classes/Artist/<objectId>' where objectId are equivalent
171
- @store.delete url.to_s # regular
172
- @store.delete "mk:#{url.to_s}" # master key cache-key
173
- @store.delete @cache_key # final key
214
+ delete_cache_variants(url)
215
+ instrument_cache(:delete, method: method, url_path: url_path)
174
216
  end
175
- rescue ::TypeError, Errno::EINVAL, Redis::CannotConnectError, Redis::TimeoutError => e
217
+ rescue ::TypeError, Errno::EINVAL, Redis::CannotConnectError, Redis::TimeoutError, ConnectionPool::TimeoutError => e
176
218
  # if the cache store fails to connect, catch the exception but proceed
177
219
  # with the regular request, but turn off caching for this request. It is possible
178
220
  # that the cache connection resumes at a later point, so this is temporary.
179
221
  @enabled = false
180
- puts "[Parse::Cache] Error: #{e}"
222
+ puts "[Parse::Cache] Error: #{e.class.name}"
223
+ instrument_cache(:error, method: method, url_path: url_path, error: e.class.name)
181
224
  end
182
225
 
183
226
  @app.call(env).on_complete do |response_env|
@@ -186,18 +229,75 @@ module Parse
186
229
 
187
230
  if @enabled && method == :get && CACHEABLE_HTTP_CODES.include?(response_env.status) &&
188
231
  response_env.body.present? && response_env.response_headers[CONTENT_LENGTH_KEY].to_i.between?(20, 1_250_000)
232
+ store_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
189
233
  begin
190
234
  @store.store(@cache_key,
191
235
  { headers: response_env.response_headers, body: response_env.body },
192
236
  expires: @expires)
237
+ duration_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - store_start) * 1000.0).round(3)
238
+ instrument_cache(:store, method: method, url_path: url_path, duration_ms: duration_ms)
193
239
  rescue => e
194
- puts "[Parse::Cache] Store Error: #{e}"
240
+ puts "[Parse::Cache] Store Error: #{e.class.name}"
241
+ instrument_cache(:error, method: method, url_path: url_path, error: e.class.name)
195
242
  end
196
243
  end # if
197
244
  # do something with the response
198
245
  # response_env[:response_headers].merge!(...)
199
246
  end
200
247
  end
248
+
249
+ private
250
+
251
+ # Emit an ActiveSupport::Notifications event under the `parse.cache.*`
252
+ # namespace.
253
+ #
254
+ # **Payload shape (stable):** `{ event:, namespace:, method:, url_path:,
255
+ # [reason:], [duration_ms:], [error:] }`.
256
+ #
257
+ # **Security invariants:**
258
+ # - The cache key is NEVER emitted. The key contains a hashed
259
+ # session-token prefix that would be a side-channel for "this user
260
+ # has data at this URL" enumeration.
261
+ # - `url_path` is `URI#path` only — query strings are stripped because
262
+ # Parse encodes query JSON there (potentially long or PII-bearing).
263
+ # - `error` is `Exception#class.name` only — never the exception
264
+ # message or backtrace.
265
+ # - `namespace` is whatever the SDK consumer configured at setup. Treat
266
+ # subscribers as you would your application log sink: they observe
267
+ # the namespace, the HTTP method, and the URL path of every cached
268
+ # GET / invalidating write.
269
+ #
270
+ # **Subscriber discipline:** ActiveSupport::Notifications runs
271
+ # subscribers **synchronously on the Faraday request thread**. A
272
+ # blocking subscriber (e.g. synchronous I/O to a slow sink) blocks
273
+ # every cached request for the duration of its work, and an exception
274
+ # raised inside a subscriber will surface as a request failure. Keep
275
+ # subscribers cheap — counter increments, in-memory accumulators, or
276
+ # non-blocking sinks like StatsD-over-UDP.
277
+ # @!visibility private
278
+ def instrument_cache(event, **extra)
279
+ return unless defined?(ActiveSupport::Notifications)
280
+ payload = { event: event, namespace: @namespace }.merge!(extra)
281
+ ActiveSupport::Notifications.instrument("parse.cache.#{event}", payload)
282
+ end
283
+
284
+ # Delete the canonical cache_key plus its legacy un-namespaced and
285
+ # master-key-prefixed variants. Called on both GET misses (defensive
286
+ # cleanup of stale pre-namespace entries) and non-GET writes (cache
287
+ # invalidation for the resource).
288
+ # @!visibility private
289
+ def delete_cache_variants(url)
290
+ if @namespace
291
+ # Namespaced: only delete our app's variants so a write through
292
+ # client A doesn't blow away client B's cache when both share Redis.
293
+ @store.delete "#{@namespace}:#{url.to_s}"
294
+ @store.delete "#{@namespace}:mk:#{url.to_s}"
295
+ else
296
+ @store.delete url.to_s # regular
297
+ @store.delete "mk:#{url.to_s}" # master key cache-key
298
+ end
299
+ @store.delete @cache_key # final key
300
+ end
201
301
  end #Caching
202
302
  end #Middleware
203
303
  end