parse-stack-next 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +7 -0
  2. data/.bundle/config +2 -0
  3. data/.env.sample +112 -0
  4. data/.env.test +10 -0
  5. data/.github/workflows/ruby.yml +36 -0
  6. data/.gitignore +49 -0
  7. data/.ruby-version +1 -0
  8. data/.solargraph.yml +22 -0
  9. data/CHANGELOG.md +5816 -0
  10. data/Gemfile +30 -0
  11. data/Gemfile.lock +175 -0
  12. data/LICENSE.txt +23 -0
  13. data/Makefile +63 -0
  14. data/README.md +5655 -0
  15. data/Rakefile +573 -0
  16. data/bin/console +38 -0
  17. data/bin/parse-console +136 -0
  18. data/bin/server +17 -0
  19. data/bin/setup +7 -0
  20. data/config/parse-config.json +12 -0
  21. data/docs/TEST_SERVER.md +271 -0
  22. data/docs/_config.yml +1 -0
  23. data/docs/mcp_guide.md +3484 -0
  24. data/docs/mongodb_direct_guide.md +1348 -0
  25. data/docs/mongodb_index_optimization_guide.md +631 -0
  26. data/examples/transaction_example.rb +219 -0
  27. data/lib/parse/acl_scope.rb +728 -0
  28. data/lib/parse/agent/cancellation_token.rb +80 -0
  29. data/lib/parse/agent/constraint_translator.rb +480 -0
  30. data/lib/parse/agent/describe.rb +420 -0
  31. data/lib/parse/agent/errors.rb +133 -0
  32. data/lib/parse/agent/mcp_client.rb +557 -0
  33. data/lib/parse/agent/mcp_dispatcher.rb +1023 -0
  34. data/lib/parse/agent/mcp_rack_app.rb +1143 -0
  35. data/lib/parse/agent/mcp_server.rb +376 -0
  36. data/lib/parse/agent/metadata_audit.rb +259 -0
  37. data/lib/parse/agent/metadata_dsl.rb +733 -0
  38. data/lib/parse/agent/metadata_registry.rb +794 -0
  39. data/lib/parse/agent/pipeline_validator.rb +82 -0
  40. data/lib/parse/agent/prompts.rb +351 -0
  41. data/lib/parse/agent/rate_limiter.rb +158 -0
  42. data/lib/parse/agent/relation_graph.rb +162 -0
  43. data/lib/parse/agent/result_formatter.rb +453 -0
  44. data/lib/parse/agent/tools.rb +5489 -0
  45. data/lib/parse/agent.rb +3249 -0
  46. data/lib/parse/api/aggregate.rb +79 -0
  47. data/lib/parse/api/all.rb +26 -0
  48. data/lib/parse/api/analytics.rb +18 -0
  49. data/lib/parse/api/batch.rb +33 -0
  50. data/lib/parse/api/cloud_functions.rb +58 -0
  51. data/lib/parse/api/config.rb +125 -0
  52. data/lib/parse/api/files.rb +29 -0
  53. data/lib/parse/api/hooks.rb +117 -0
  54. data/lib/parse/api/objects.rb +146 -0
  55. data/lib/parse/api/path_segment.rb +75 -0
  56. data/lib/parse/api/push.rb +20 -0
  57. data/lib/parse/api/schema.rb +49 -0
  58. data/lib/parse/api/server.rb +50 -0
  59. data/lib/parse/api/sessions.rb +24 -0
  60. data/lib/parse/api/users.rb +250 -0
  61. data/lib/parse/atlas_search/index_manager.rb +353 -0
  62. data/lib/parse/atlas_search/result.rb +204 -0
  63. data/lib/parse/atlas_search/search_builder.rb +604 -0
  64. data/lib/parse/atlas_search/session.rb +253 -0
  65. data/lib/parse/atlas_search.rb +995 -0
  66. data/lib/parse/client/authentication.rb +97 -0
  67. data/lib/parse/client/batch.rb +234 -0
  68. data/lib/parse/client/body_builder.rb +240 -0
  69. data/lib/parse/client/caching.rb +203 -0
  70. data/lib/parse/client/logging.rb +293 -0
  71. data/lib/parse/client/profiling.rb +181 -0
  72. data/lib/parse/client/protocol.rb +91 -0
  73. data/lib/parse/client/request.rb +233 -0
  74. data/lib/parse/client/response.rb +208 -0
  75. data/lib/parse/client.rb +1104 -0
  76. data/lib/parse/clp_scope.rb +361 -0
  77. data/lib/parse/live_query/circuit_breaker.rb +256 -0
  78. data/lib/parse/live_query/client.rb +1001 -0
  79. data/lib/parse/live_query/configuration.rb +224 -0
  80. data/lib/parse/live_query/event.rb +115 -0
  81. data/lib/parse/live_query/event_queue.rb +272 -0
  82. data/lib/parse/live_query/health_monitor.rb +214 -0
  83. data/lib/parse/live_query/logging.rb +149 -0
  84. data/lib/parse/live_query/subscription.rb +294 -0
  85. data/lib/parse/live_query.rb +163 -0
  86. data/lib/parse/lookup_rewriter.rb +445 -0
  87. data/lib/parse/model/acl.rb +968 -0
  88. data/lib/parse/model/associations/belongs_to.rb +275 -0
  89. data/lib/parse/model/associations/collection_proxy.rb +435 -0
  90. data/lib/parse/model/associations/has_many.rb +597 -0
  91. data/lib/parse/model/associations/has_one.rb +158 -0
  92. data/lib/parse/model/associations/pointer_collection_proxy.rb +134 -0
  93. data/lib/parse/model/associations/relation_collection_proxy.rb +177 -0
  94. data/lib/parse/model/bytes.rb +62 -0
  95. data/lib/parse/model/classes/audience.rb +262 -0
  96. data/lib/parse/model/classes/installation.rb +363 -0
  97. data/lib/parse/model/classes/job_schedule.rb +153 -0
  98. data/lib/parse/model/classes/job_status.rb +264 -0
  99. data/lib/parse/model/classes/product.rb +75 -0
  100. data/lib/parse/model/classes/push_status.rb +263 -0
  101. data/lib/parse/model/classes/role.rb +751 -0
  102. data/lib/parse/model/classes/session.rb +201 -0
  103. data/lib/parse/model/classes/user.rb +943 -0
  104. data/lib/parse/model/clp.rb +544 -0
  105. data/lib/parse/model/core/actions.rb +1268 -0
  106. data/lib/parse/model/core/builder.rb +139 -0
  107. data/lib/parse/model/core/create_lock.rb +386 -0
  108. data/lib/parse/model/core/describe.rb +382 -0
  109. data/lib/parse/model/core/enhanced_change_tracking.rb +159 -0
  110. data/lib/parse/model/core/errors.rb +38 -0
  111. data/lib/parse/model/core/fetching.rb +566 -0
  112. data/lib/parse/model/core/field_guards.rb +220 -0
  113. data/lib/parse/model/core/indexing.rb +382 -0
  114. data/lib/parse/model/core/parse_reference.rb +407 -0
  115. data/lib/parse/model/core/properties.rb +809 -0
  116. data/lib/parse/model/core/querying.rb +491 -0
  117. data/lib/parse/model/core/schema.rb +202 -0
  118. data/lib/parse/model/core/search_indexing.rb +174 -0
  119. data/lib/parse/model/date.rb +88 -0
  120. data/lib/parse/model/email.rb +213 -0
  121. data/lib/parse/model/file.rb +527 -0
  122. data/lib/parse/model/geojson.rb +271 -0
  123. data/lib/parse/model/geopoint.rb +261 -0
  124. data/lib/parse/model/model.rb +260 -0
  125. data/lib/parse/model/object.rb +2068 -0
  126. data/lib/parse/model/phone.rb +520 -0
  127. data/lib/parse/model/pointer.rb +443 -0
  128. data/lib/parse/model/polygon.rb +406 -0
  129. data/lib/parse/model/push.rb +975 -0
  130. data/lib/parse/model/shortnames.rb +8 -0
  131. data/lib/parse/model/time_zone.rb +141 -0
  132. data/lib/parse/model/validations/uniqueness_validator.rb +97 -0
  133. data/lib/parse/model/validations.rb +96 -0
  134. data/lib/parse/mongodb.rb +2300 -0
  135. data/lib/parse/pipeline_security.rb +554 -0
  136. data/lib/parse/query/constraint.rb +198 -0
  137. data/lib/parse/query/constraints.rb +3279 -0
  138. data/lib/parse/query/cursor.rb +434 -0
  139. data/lib/parse/query/n_plus_one_detector.rb +445 -0
  140. data/lib/parse/query/operation.rb +104 -0
  141. data/lib/parse/query/ordering.rb +66 -0
  142. data/lib/parse/query.rb +7028 -0
  143. data/lib/parse/schema/index_migrator.rb +291 -0
  144. data/lib/parse/schema/search_index_migrator.rb +289 -0
  145. data/lib/parse/schema.rb +494 -0
  146. data/lib/parse/stack/generators/rails.rb +40 -0
  147. data/lib/parse/stack/generators/templates/model.erb +51 -0
  148. data/lib/parse/stack/generators/templates/model_installation.rb +4 -0
  149. data/lib/parse/stack/generators/templates/model_role.rb +4 -0
  150. data/lib/parse/stack/generators/templates/model_session.rb +4 -0
  151. data/lib/parse/stack/generators/templates/model_user.rb +11 -0
  152. data/lib/parse/stack/generators/templates/parse.rb +12 -0
  153. data/lib/parse/stack/generators/templates/webhooks.rb +10 -0
  154. data/lib/parse/stack/railtie.rb +18 -0
  155. data/lib/parse/stack/tasks.rb +563 -0
  156. data/lib/parse/stack/version.rb +11 -0
  157. data/lib/parse/stack.rb +455 -0
  158. data/lib/parse/two_factor_auth/user_extension.rb +449 -0
  159. data/lib/parse/two_factor_auth.rb +310 -0
  160. data/lib/parse/webhooks/payload.rb +360 -0
  161. data/lib/parse/webhooks/registration.rb +199 -0
  162. data/lib/parse/webhooks/replay_protection.rb +189 -0
  163. data/lib/parse/webhooks.rb +510 -0
  164. data/lib/parse-stack-next.rb +5 -0
  165. data/lib/parse-stack.rb +5 -0
  166. data/parse-stack-next.gemspec +82 -0
  167. data/parse-stack.png +0 -0
  168. data/scripts/debug-ips.js +35 -0
  169. data/scripts/docker/Dockerfile.parse +13 -0
  170. data/scripts/docker/atlas-init.js +284 -0
  171. data/scripts/docker/docker-compose.atlas.yml +76 -0
  172. data/scripts/docker/docker-compose.test.yml +106 -0
  173. data/scripts/docker/mongo-init.js +21 -0
  174. data/scripts/eval_mcp_with_lm_studio.rb +274 -0
  175. data/scripts/start-parse.sh +90 -0
  176. data/scripts/start_mcp_server.rb +78 -0
  177. data/scripts/test_server_connection.rb +82 -0
  178. metadata +377 -0
@@ -0,0 +1,554 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ module Parse
5
+ # Canonical security validator for MongoDB aggregation pipelines and
6
+ # filter hashes that the SDK forwards to the driver or to Parse Server.
7
+ #
8
+ # Previously the codebase had three different validators with three
9
+ # different rule sets:
10
+ #
11
+ # - `Parse::Agent::PipelineValidator` — strict allowlist for the Agent
12
+ # (read-only paths only)
13
+ # - `Parse::Query#validate_pipeline!` — outer-stage-only denylist
14
+ # - `Parse::MongoDB.assert_no_denied_operators!` — recursive denylist of
15
+ # server-side JS operators
16
+ #
17
+ # `Parse::AtlasSearch.convert_filter_for_mongodb` was a complete
18
+ # passthrough that bypassed all three. A user-supplied filter containing
19
+ # `$where`/`$expr`/`$function`/`$regex` was injected straight into the
20
+ # pipeline `$match` stage, bypassing every existing constraint guard.
21
+ #
22
+ # This module consolidates the rules. Every entry point that forwards a
23
+ # caller-supplied pipeline or filter to MongoDB now routes through one
24
+ # of the two public methods here:
25
+ #
26
+ # - {validate_pipeline!} — strict mode (allowlist + size/depth caps).
27
+ # Used by `Parse::Agent` and by `Parse::Query#aggregate` for
28
+ # user-facing aggregation entry points.
29
+ #
30
+ # - {validate_filter!} — permissive mode (recursive denylist only).
31
+ # Used by `Parse::MongoDB.find/aggregate` and Atlas Search filter
32
+ # passthrough where the pipeline is constructed by SDK code but a
33
+ # user-controlled filter hash is interpolated. Refuses
34
+ # `$where`/`$function`/`$accumulator` and the data-mutating stages
35
+ # at any nesting depth.
36
+ #
37
+ # == Policy: allowlist top-level, denylist recursive
38
+ #
39
+ # Strict mode enforces {ALLOWED_STAGES} ONLY at the top-level stage
40
+ # key — nested sub-pipelines (inside `$lookup.pipeline`,
41
+ # `$unionWith.pipeline`, `$facet.*`, `$graphLookup`) are walked with
42
+ # the operator denylist but NOT with the stage allowlist. This is
43
+ # intentional: Atlas Search and uncommon-but-legitimate read stages
44
+ # like `$densify` and `$fill` must be allowed inside sub-pipelines
45
+ # even when the outer pipeline is strict-validated. The denylist is
46
+ # the security boundary; the allowlist is a shape check.
47
+ #
48
+ # == Caveat for {Parse::Query#aggregate} callers
49
+ #
50
+ # `Parse::Query#aggregate` routes through {validate_filter!}, not
51
+ # {validate_pipeline!}, so user-supplied pipelines are checked
52
+ # against the denylist only. Permissive mode does NOT block
53
+ # `$lookup`, `$graphLookup`, or `$unionWith` reading from arbitrary
54
+ # collections — these are legitimate read stages but powerful enough
55
+ # to cross Parse ACL/CLP boundaries when the source collection lacks
56
+ # row-level enforcement. **Never pass raw attacker-controlled input
57
+ # into `Parse::Query#aggregate`.** Construct the pipeline in SDK code
58
+ # and interpolate only validated values.
59
+ #
60
+ # == Capability gap: `$expr`
61
+ #
62
+ # `$expr` itself is not in {DENIED_OPERATORS}. The recursive walker
63
+ # catches `$function`/`$accumulator` nested inside `$expr`, so the
64
+ # immediate JavaScript-execution risk is closed. A future Atlas
65
+ # operator gated under `$expr` would slip until {DENIED_OPERATORS}
66
+ # is extended. Defense-in-depth callers concerned about expensive
67
+ # aggregation expressions (`$regexMatch` ReDoS, large `$reduce`
68
+ # loops) should validate user input shape before reaching this
69
+ # module.
70
+ module PipelineSecurity
71
+ # Raised when a pipeline or filter contains a forbidden stage or
72
+ # operator. Inherits from `Parse::Error` so callers can rescue both
73
+ # this and other Parse SDK errors with one rescue clause.
74
+ class Error < Parse::Error
75
+ attr_reader :stage, :operator, :reason
76
+
77
+ def initialize(message, stage: nil, operator: nil, reason: nil)
78
+ @stage = stage
79
+ @operator = operator
80
+ @reason = reason
81
+ super(message)
82
+ end
83
+ end
84
+
85
+ # Operators that are ALWAYS refused at any nesting depth. These either
86
+ # execute server-side JavaScript (`$where`, `$function`,
87
+ # `$accumulator`) or mutate the database (`$out`, `$merge`) or the
88
+ # server itself (`$collMod`, `$createIndex`, `$dropIndex`,
89
+ # `$planCacheSetFilter`, `$planCacheClear`). None of them are needed
90
+ # for read queries.
91
+ DENIED_OPERATORS = %w[
92
+ $where $function $accumulator
93
+ $out $merge
94
+ $collMod $createIndex $dropIndex
95
+ $planCacheSetFilter $planCacheClear
96
+ ].freeze
97
+
98
+ # Field-reference paths (string values inside `$expr` whose first
99
+ # byte is `$`) that point at server-internal columns and must never
100
+ # be reachable from a user-influenced pipeline. A boolean expression
101
+ # inside `$expr` over any of these is a 1-bit-per-query side channel
102
+ # that bisects the value of a bcrypt hash, session token, or
103
+ # password-reset token. Names match Parse Server's internal column
104
+ # layout (cf. MongoStorageAdapter).
105
+ DENIED_FIELD_REFS = %w[
106
+ $_hashed_password $_password_history
107
+ $_session_token $_sessionToken
108
+ $_email_verify_token $_perishable_token
109
+ $_failed_login_count $_account_lockout_expires_at
110
+ $_rperm $_wperm
111
+ $_auth_data
112
+ ].freeze
113
+
114
+ # String prefix for per-provider auth-data field references inside $expr.
115
+ # Parse Server stores per-provider columns as `_auth_data_facebook`,
116
+ # `_auth_data_google`, etc. — none of these should be reachable from a
117
+ # user-influenced pipeline. The prefix `$_auth_data_` covers all of them
118
+ # without requiring an exhaustive list.
119
+ DENIED_FIELD_REF_PREFIXES = %w[$_auth_data_].freeze
120
+
121
+ # MongoDB collection names that an SDK aggregation IS permitted to
122
+ # name in `from:`/`coll:`. Any name starting with `_` outside this
123
+ # set is refused as an internal Parse Server collection. The four
124
+ # entries here are the only `_`-prefixed collections that hold
125
+ # Parse SDK data classes; everything else with a leading `_` is
126
+ # server-managed state (`_SCHEMA` discloses class-level
127
+ # permissions; `_Hooks` discloses Cloud Code webhook URLs + secret
128
+ # keys; `_GraphQLConfig` discloses GraphQL schema state; `_Audit`
129
+ # holds operational telemetry; `_Idempotency`/`_PushStatus`/
130
+ # `_JobStatus`/`_JobSchedule`/`_GlobalConfig`/`_Audience` hold
131
+ # internal Parse Server bookkeeping).
132
+ ALLOWED_UNDERSCORE_COLLECTIONS = %w[_User _Role _Installation _Session].freeze
133
+
134
+ # Field names that are internal to Parse Server's storage layout
135
+ # and must never appear in returned documents. Most are stripped
136
+ # by `Parse::MongoDB.convert_document_to_parse`, but a raw-result
137
+ # path (`raw: true`) bypasses that conversion and would otherwise
138
+ # surface the bcrypt hash, session token, or reset token.
139
+ #
140
+ # `sessionToken` / `session_token` (no leading underscore) are the
141
+ # credential column on `_Session` rows. Unlike the `_User`-side
142
+ # `_session_token`, the Session class declares it as a regular
143
+ # property, so without this entry a master-key agent that has had
144
+ # the class explicitly unhidden would receive raw bearer tokens in
145
+ # every row of a `query_class("_Session")` response. The denylist
146
+ # is the process-level floor — independent of class-visibility
147
+ # state — so even a deliberate `agent_unhidden` on `_Session` (or
148
+ # a compromised superadmin tool) cannot exfiltrate active tokens.
149
+ INTERNAL_FIELDS_DENYLIST = %w[
150
+ _hashed_password _password_history
151
+ _session_token _sessionToken
152
+ sessionToken session_token
153
+ _email_verify_token _perishable_token
154
+ _failed_login_count _account_lockout_expires_at
155
+ _rperm _wperm _tombstone
156
+ _auth_data
157
+ ].freeze
158
+
159
+ # Prefix covering per-provider auth-data columns (`_auth_data_facebook`,
160
+ # `_auth_data_google`, …). Used by strip_internal_fields and by the
161
+ # walk_for_denied! field-name screen.
162
+ INTERNAL_FIELDS_PREFIX_DENYLIST = %w[_auth_data_].freeze
163
+
164
+ # Forensic string-introspection operators. When any of these
165
+ # appears INSIDE `$expr` with a field-reference input string, the
166
+ # query becomes a per-character oracle even though the operator
167
+ # itself is otherwise legitimate. Refused inside `$expr` regardless
168
+ # of the input — the validator does not try to introspect operand
169
+ # shapes deeply, and these operators have no legitimate use against
170
+ # Parse-Server-managed columns from an SDK aggregation.
171
+ FORENSIC_OPERATORS = %w[
172
+ $regexMatch $regexFind $regexFindAll
173
+ $substr $substrBytes $substrCP
174
+ $indexOfBytes $indexOfCP
175
+ $strLenBytes $strLenCP
176
+ $strcasecmp
177
+ ].freeze
178
+
179
+ # Top-level pipeline stages permitted by the strict validator. The
180
+ # set covers Parse-Stack's own aggregation use, plus Atlas Search
181
+ # entry points (`$search`, `$searchMeta`, `$listSearchIndexes`) so
182
+ # that `Parse::AtlasSearch` calls do not break.
183
+ ALLOWED_STAGES = %w[
184
+ $match $group $sort $project $limit $skip $unwind $lookup
185
+ $count $addFields $set $unset $bucket $bucketAuto $facet
186
+ $sample $sortByCount $replaceRoot $replaceWith $redact
187
+ $graphLookup $unionWith
188
+ $search $searchMeta $listSearchIndexes
189
+ ].freeze
190
+
191
+ # Cap on number of top-level stages in a strict-validated pipeline.
192
+ MAX_PIPELINE_STAGES = 20
193
+
194
+ # Cap on nested object/array depth during recursive walks. Stops a
195
+ # caller from forcing the validator into a near-infinite traversal.
196
+ # Legitimate Parse-generated pipelines with `$facet` containing
197
+ # `$lookup` with `let` and correlated sub-pipelines (`$match.$expr.
198
+ # $and.[…]`) can reach depth 12+ on a normal read, so we keep
199
+ # comfortable headroom above the real ceiling.
200
+ MAX_DEPTH = 20
201
+
202
+ module_function
203
+
204
+ # Strict validation: pipeline must be a non-empty Array of Hashes,
205
+ # each Hash's top-level key must be in {ALLOWED_STAGES}, and no
206
+ # entry in {DENIED_OPERATORS} may appear at any nesting depth.
207
+ #
208
+ # @param pipeline [Array<Hash>] the aggregation pipeline.
209
+ # @raise [Error] if validation fails.
210
+ # @return [true]
211
+ def validate_pipeline!(pipeline)
212
+ unless pipeline.is_a?(Array)
213
+ raise Error.new("Pipeline must be an Array, got #{pipeline.class}", reason: :invalid_type)
214
+ end
215
+ if pipeline.empty?
216
+ raise Error.new("Pipeline cannot be empty", reason: :empty_pipeline)
217
+ end
218
+ if pipeline.size > MAX_PIPELINE_STAGES
219
+ raise Error.new(
220
+ "Pipeline exceeds maximum of #{MAX_PIPELINE_STAGES} stages (got #{pipeline.size})",
221
+ reason: :too_many_stages,
222
+ )
223
+ end
224
+
225
+ pipeline.each_with_index do |stage, idx|
226
+ validate_stage!(stage, idx)
227
+ end
228
+ true
229
+ end
230
+
231
+ # Permissive validation: walks the given Hash or Array (or anything
232
+ # else, which is a no-op) and refuses any nested key that appears
233
+ # in {DENIED_OPERATORS}. Does NOT check the top-level stage
234
+ # allowlist or the stage count cap. Used by direct-MongoDB sinks
235
+ # where callers have explicit intent and want flexibility in stage
236
+ # selection, but server-side JS and data-mutating operators must
237
+ # still be refused.
238
+ #
239
+ # @param node [Hash, Array, Object] the structure to walk.
240
+ # @param allow_internal_fields [Boolean] when true, skip the
241
+ # {INTERNAL_FIELDS_DENYLIST} check (e.g. for SDK-generated ACL
242
+ # filters that legitimately reference +_rperm+/+_wperm+ via
243
+ # {Parse::Query#readable_by_role} and friends). The
244
+ # {DENIED_OPERATORS} walk and forensic-operator gating still
245
+ # apply. Default +false+ for callers that forward raw,
246
+ # user-influenced pipelines (e.g. Agent MCP tools).
247
+ # @raise [Error] if a denied operator is found at any depth.
248
+ # @return [true]
249
+ def validate_filter!(node, allow_internal_fields: false)
250
+ walk_for_denied!(node, depth: 0, allow_internal_fields: allow_internal_fields)
251
+ true
252
+ end
253
+
254
+ # @return [Boolean] true if the pipeline passes strict validation.
255
+ def valid_pipeline?(pipeline)
256
+ validate_pipeline!(pipeline)
257
+ true
258
+ rescue Error
259
+ false
260
+ end
261
+
262
+ # @return [Boolean] true if the node passes permissive validation.
263
+ def valid_filter?(node)
264
+ validate_filter!(node)
265
+ true
266
+ rescue Error
267
+ false
268
+ end
269
+
270
+ # Refuses any collection name reserved for Parse Server's internal
271
+ # state. Accepts the four SDK-data system classes (`_User`,
272
+ # `_Role`, `_Installation`, `_Session`) and any non-`_`-prefixed
273
+ # name. Used by `LookupRewriter` and by the Agent's pipeline
274
+ # walker to enforce a hard floor independent of any per-Agent
275
+ # `MetadataRegistry.hidden?` policy.
276
+ #
277
+ # @param name [String, Symbol, nil] the collection name from
278
+ # `from:`/`coll:`. `nil` is treated as "no collection named" --
279
+ # the caller passes through.
280
+ # @raise [Error] when `name` is `_`-prefixed and not in
281
+ # {ALLOWED_UNDERSCORE_COLLECTIONS}.
282
+ def assert_collection_allowed!(name)
283
+ return if name.nil?
284
+ str = name.to_s
285
+ return if str.empty?
286
+ return unless str.start_with?("_")
287
+ return if ALLOWED_UNDERSCORE_COLLECTIONS.include?(str)
288
+ raise Error.new(
289
+ "SECURITY: Collection '#{str}' is reserved for Parse Server's internal " \
290
+ "state and is not reachable from an SDK aggregation pipeline.",
291
+ operator: str,
292
+ reason: :denied_internal_collection,
293
+ )
294
+ end
295
+
296
+ # Strip {INTERNAL_FIELDS_DENYLIST} keys from a Hash document (one
297
+ # level deep -- raw search documents are flat). Returns a new
298
+ # Hash; the input is not mutated. Non-Hash inputs return unchanged
299
+ # so callers can pipe arbitrary cursor entries through this.
300
+ def strip_internal_fields(doc)
301
+ return doc unless doc.is_a?(Hash)
302
+ doc.each_with_object({}) do |(key, value), out|
303
+ k = key.to_s
304
+ next if INTERNAL_FIELDS_DENYLIST.include?(k)
305
+ next if INTERNAL_FIELDS_PREFIX_DENYLIST.any? { |prefix| k.start_with?(prefix) }
306
+ out[key] = value
307
+ end
308
+ end
309
+
310
+ # Wave-3 TRACK-CLP-4: refuse caller-supplied pipelines that
311
+ # reference a protected field via `$<field>` on the RHS of a
312
+ # `$project` / `$addFields` / `$set` / `$group` / `$bucket` /
313
+ # `$replaceWith` / `$lookup.let` clause.
314
+ #
315
+ # The protectedFields enforcement layer (CLPScope.redact_protected_fields!)
316
+ # strips the field by NAME from the result rows. But a pipeline
317
+ # can launder a protected field through a rename:
318
+ #
319
+ # { "$addFields" => { "ssn_copy" => "$ssn" } }
320
+ # { "$project" => { "renamed" => "$ssn", "objectId" => 1 } }
321
+ # { "$group" => { "_id" => "$ssn", "n" => { "$sum" => 1 } } }
322
+ #
323
+ # The post-fetch strip walks the rows and deletes `ssn` keys, but
324
+ # the value is now stored under `ssn_copy` / `renamed` / `_id`,
325
+ # so the strip walks past it. This scanner runs BEFORE the pipeline
326
+ # reaches Mongo: any `$<field>` string whose unprefixed name is in
327
+ # the class's protected-fields set raises {Parse::CLPScope::Denied}
328
+ # so the caller knows the join was refused, rather than silently
329
+ # leaking the renamed value.
330
+ #
331
+ # Variable references (`$$ROOT`, `$$CURRENT`, `$$user_var`) are
332
+ # NOT field references — they're aggregation variables. The walker
333
+ # checks the leading `$` is single, not double, before treating the
334
+ # string as a field path.
335
+ #
336
+ # Master mode + nil resolution short-circuit at the entry: the
337
+ # walker is a no-op when the caller can read everything anyway.
338
+ #
339
+ # @param pipeline [Array<Hash>] the caller-supplied pipeline,
340
+ # before SDK-side ACL stages are prepended.
341
+ # @param collection_name [String] the queried collection / class.
342
+ # @param resolution [Parse::ACLScope::Resolution, nil] the resolved
343
+ # scope; nil-or-master short-circuits.
344
+ # @raise [Parse::CLPScope::Denied] when any nested string in the
345
+ # pipeline names a protected field via `$<name>` syntax.
346
+ # @return [void]
347
+ def refuse_protected_field_references!(pipeline, collection_name, resolution)
348
+ return if resolution.nil? || (resolution.respond_to?(:master?) && resolution.master?)
349
+ return if pipeline.nil? || pipeline.empty?
350
+ perms = resolution.respond_to?(:permission_strings) ? resolution.permission_strings : nil
351
+ return if perms.nil?
352
+
353
+ # Lazy-require to avoid forcing CLPScope load order when the
354
+ # caller hasn't otherwise needed it.
355
+ require_relative "clp_scope" unless defined?(Parse::CLPScope)
356
+
357
+ protected_set = Parse::CLPScope.protected_fields_for(collection_name, perms)
358
+ return if protected_set.nil? || protected_set.empty?
359
+
360
+ pipeline.each_with_index do |stage, idx|
361
+ walk_for_protected_ref!(stage, protected_set, collection_name, "pipeline[#{idx}]")
362
+ end
363
+ nil
364
+ end
365
+
366
+ # @!visibility private
367
+ def walk_for_protected_ref!(node, protected_set, class_name, path)
368
+ case node
369
+ when String
370
+ # Field-reference syntax is `$<path>` — variable refs start
371
+ # with `$$` (e.g. `$$ROOT`, `$$<userVarFromLet>`) and aren't
372
+ # field references; skip them.
373
+ return if node.empty?
374
+ return unless node.start_with?("$")
375
+ return if node.start_with?("$$")
376
+ # Path may be dotted (`$ssn.area`). The protectedFields list
377
+ # is a set of top-level column names per Parse Server's CLP
378
+ # schema, so we compare against the first segment.
379
+ head = node.sub(/\A\$/, "").split(".").first
380
+ return if head.nil? || head.empty?
381
+ # `$_id` is the canonical primary-key reference; never on the
382
+ # protected list and would otherwise short-circuit common
383
+ # aggregations like `{$group: {_id: "$_id"}}`.
384
+ return if head == "_id"
385
+ if protected_set.include?(head)
386
+ raise Parse::CLPScope::Denied.new(
387
+ class_name, :read,
388
+ "Pipeline at #{path} references protectedField '#{head}' " \
389
+ "via field-reference '#{node}'. ProtectedFields cannot be " \
390
+ "laundered through a $project/$addFields/$group rename — " \
391
+ "the post-fetch strip walks by name and would miss the " \
392
+ "renamed value, leaking the protected column.",
393
+ )
394
+ end
395
+ when Array
396
+ node.each_with_index do |child, i|
397
+ walk_for_protected_ref!(child, protected_set, class_name, "#{path}[#{i}]")
398
+ end
399
+ when Hash
400
+ node.each do |key, value|
401
+ # Recurse into every value. Hash keys are field NAMES in
402
+ # most contexts, not references — we don't need to gate them
403
+ # because the post-fetch redact would still strip a key
404
+ # literally named "ssn". The bypass is the VALUE-side
405
+ # field-reference string.
406
+ walk_for_protected_ref!(value, protected_set, class_name, "#{path}.#{key}")
407
+ end
408
+ end
409
+ nil
410
+ end
411
+ private_class_method :walk_for_protected_ref!
412
+
413
+ # @!visibility private
414
+ def validate_stage!(stage, idx)
415
+ unless stage.is_a?(Hash)
416
+ raise Error.new(
417
+ "Pipeline stage #{idx} must be a Hash, got #{stage.class}",
418
+ stage: idx,
419
+ reason: :invalid_stage_type,
420
+ )
421
+ end
422
+
423
+ stage.each do |key, value|
424
+ key_str = key.to_s
425
+
426
+ if DENIED_OPERATORS.include?(key_str)
427
+ raise Error.new(
428
+ "SECURITY: Pipeline stage #{idx} uses denied operator '#{key_str}'. " \
429
+ "This operator either executes server-side JavaScript or mutates data, " \
430
+ "and is refused at any nesting depth.",
431
+ stage: idx,
432
+ operator: key_str,
433
+ reason: :denied_operator,
434
+ )
435
+ end
436
+
437
+ if key_str.start_with?("$") && !ALLOWED_STAGES.include?(key_str)
438
+ raise Error.new(
439
+ "SECURITY: Unknown aggregation stage '#{key_str}' at index #{idx} is not in the " \
440
+ "allowed stage list. Allowed: #{ALLOWED_STAGES.join(", ")}.",
441
+ stage: idx,
442
+ operator: key_str,
443
+ reason: :unknown_stage,
444
+ )
445
+ end
446
+
447
+ walk_for_denied!(value, depth: 1, stage_idx: idx)
448
+ end
449
+ end
450
+ private_class_method :validate_stage!
451
+
452
+ # @!visibility private
453
+ def walk_for_denied!(node, depth:, stage_idx: nil, inside_expr: false, allow_internal_fields: false)
454
+ if depth > MAX_DEPTH
455
+ raise Error.new(
456
+ "Pipeline nesting depth exceeded (#{MAX_DEPTH}). " \
457
+ "Refusing to walk pathologically nested structures.",
458
+ stage: stage_idx,
459
+ reason: :max_depth_exceeded,
460
+ )
461
+ end
462
+
463
+ case node
464
+ when Hash
465
+ node.each do |key, value|
466
+ key_str = key.to_s
467
+ if DENIED_OPERATORS.include?(key_str)
468
+ raise Error.new(
469
+ "SECURITY: Nested denied operator '#{key_str}' found at nesting depth #{depth}" \
470
+ "#{stage_idx ? " inside stage #{stage_idx}" : ""}. " \
471
+ "This operator either executes server-side JavaScript or mutates data, " \
472
+ "and is refused at any depth.",
473
+ stage: stage_idx,
474
+ operator: key_str,
475
+ reason: :nested_denied_operator,
476
+ )
477
+ end
478
+ # H1 / M1: refuse any Hash key — at any nesting depth — that
479
+ # names an internal Parse Server column. These appear as $match
480
+ # field names in aggregation pipelines and create the same
481
+ # oracle as the where:-constraint path in ConstraintTranslator.
482
+ # Operators ($-prefixed) are excluded because they are validated
483
+ # separately by DENIED_OPERATORS.
484
+ if !allow_internal_fields &&
485
+ !key_str.start_with?("$") &&
486
+ (INTERNAL_FIELDS_DENYLIST.include?(key_str) ||
487
+ INTERNAL_FIELDS_PREFIX_DENYLIST.any? { |prefix| key_str.start_with?(prefix) })
488
+ raise Error.new(
489
+ "SECURITY: Pipeline references internal Parse Server field " \
490
+ "'#{key_str}' at nesting depth #{depth}" \
491
+ "#{stage_idx ? " inside stage #{stage_idx}" : ""}. " \
492
+ "This column (password hash, session token, auth data, or ACL " \
493
+ "pointer) must not appear in a user-influenced pipeline — " \
494
+ "it enables credential exfiltration via count/match oracles.",
495
+ stage: stage_idx,
496
+ operator: key_str,
497
+ reason: :denied_internal_field,
498
+ )
499
+ end
500
+ child_inside_expr = inside_expr || key_str == "$expr"
501
+ if child_inside_expr && FORENSIC_OPERATORS.include?(key_str)
502
+ raise Error.new(
503
+ "SECURITY: Forensic operator '#{key_str}' inside $expr at nesting depth #{depth}" \
504
+ "#{stage_idx ? " inside stage #{stage_idx}" : ""}. " \
505
+ "String-introspection operators inside $expr enable per-character " \
506
+ "side-channel exfiltration of password hashes, session tokens, and " \
507
+ "reset tokens.",
508
+ stage: stage_idx,
509
+ operator: key_str,
510
+ reason: :forensic_operator_in_expr,
511
+ )
512
+ end
513
+ walk_for_denied!(value, depth: depth + 1, stage_idx: stage_idx, inside_expr: child_inside_expr, allow_internal_fields: allow_internal_fields)
514
+ end
515
+ when Array
516
+ node.each { |item| walk_for_denied!(item, depth: depth + 1, stage_idx: stage_idx, inside_expr: inside_expr, allow_internal_fields: allow_internal_fields) }
517
+ when String
518
+ # Refuse any `$<field>` reference string that names an internal
519
+ # Parse Server column, regardless of whether it appears inside
520
+ # `$expr` or as a plain projection/grouping expression value.
521
+ #
522
+ # The previous guard was `inside_expr && ...`, which only fired
523
+ # when the string appeared nested under a `$expr` key. That missed
524
+ # the common aggregation shapes:
525
+ # { "$project" => { "x" => "$_hashed_password" } }
526
+ # { "$group" => { "_id" => "$_hashed_password" } }
527
+ # { "$addFields" => { "copy" => "$_auth_data_facebook" } }
528
+ # In all three cases the string reaches `walk_for_denied!` as a
529
+ # plain Hash value, not under `$expr`, so `inside_expr` was false
530
+ # and the check was skipped — leaking the internal field reference
531
+ # to MongoDB on classes that had no `agent_fields` allowlist.
532
+ #
533
+ # Internal-field reference strings have no legitimate use outside
534
+ # `$expr`, so broadening the guard to unconditional is safe.
535
+ if DENIED_FIELD_REFS.include?(node) ||
536
+ DENIED_FIELD_REF_PREFIXES.any? { |prefix| node.start_with?(prefix) }
537
+ raise Error.new(
538
+ "SECURITY: Field-reference '#{node}' at nesting depth #{depth}" \
539
+ "#{stage_idx ? " inside stage #{stage_idx}" : ""}. " \
540
+ "This column is internal to Parse Server (password hash, session " \
541
+ "token, reset token, auth data, or ACL pointer) and must not appear " \
542
+ "in a user-influenced pipeline.",
543
+ stage: stage_idx,
544
+ operator: node,
545
+ reason: :denied_field_ref_in_expr,
546
+ )
547
+ end
548
+ end
549
+ # Other primitives (Integer, etc.) are always safe.
550
+ nil
551
+ end
552
+ private_class_method :walk_for_denied!
553
+ end
554
+ end