parse-stack-next 5.1.1 → 5.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.env.sample +12 -0
  3. data/.env.test +4 -4
  4. data/CHANGELOG.md +630 -0
  5. data/Gemfile +3 -0
  6. data/Gemfile.lock +6 -1
  7. data/README.md +226 -39
  8. data/Rakefile +56 -10
  9. data/docs/atlas_vector_search_guide.md +110 -9
  10. data/docs/mcp_guide.md +504 -0
  11. data/docs/mongodb_direct_guide.md +66 -1
  12. data/docs/mongodb_index_optimization_guide.md +22 -1
  13. data/docs/usage_guide.md +15 -0
  14. data/lib/parse/agent/approval_gate.rb +0 -0
  15. data/lib/parse/agent/constraint_translator.rb +90 -19
  16. data/lib/parse/agent/describe.rb +1 -0
  17. data/lib/parse/agent/errors.rb +16 -0
  18. data/lib/parse/agent/mcp_client.rb +9 -0
  19. data/lib/parse/agent/mcp_dispatcher.rb +139 -7
  20. data/lib/parse/agent/mcp_rack_app.rb +621 -17
  21. data/lib/parse/agent/mcp_subscriptions.rb +607 -0
  22. data/lib/parse/agent/metadata_dsl.rb +58 -0
  23. data/lib/parse/agent/metadata_registry.rb +141 -1
  24. data/lib/parse/agent/prompt_hardening.rb +213 -0
  25. data/lib/parse/agent/result_formatter.rb +18 -3
  26. data/lib/parse/agent/tools.rb +167 -24
  27. data/lib/parse/agent.rb +692 -21
  28. data/lib/parse/client/request.rb +55 -4
  29. data/lib/parse/client/response.rb +4 -0
  30. data/lib/parse/client.rb +205 -7
  31. data/lib/parse/model/classes/installation.rb +27 -10
  32. data/lib/parse/model/classes/user.rb +8 -0
  33. data/lib/parse/model/core/actions.rb +65 -13
  34. data/lib/parse/model/core/embed_managed.rb +19 -14
  35. data/lib/parse/model/core/indexing.rb +108 -16
  36. data/lib/parse/model/core/querying.rb +29 -0
  37. data/lib/parse/model/model.rb +34 -3
  38. data/lib/parse/model/object.rb +42 -0
  39. data/lib/parse/query.rb +90 -24
  40. data/lib/parse/retrieval/agent_tool.rb +369 -0
  41. data/lib/parse/retrieval/chunk.rb +74 -0
  42. data/lib/parse/retrieval/chunker.rb +208 -0
  43. data/lib/parse/retrieval/retriever.rb +274 -0
  44. data/lib/parse/retrieval.rb +10 -0
  45. data/lib/parse/schema.rb +69 -20
  46. data/lib/parse/stack/version.rb +2 -2
  47. data/lib/parse/webhooks/payload.rb +62 -34
  48. data/lib/parse/webhooks.rb +15 -3
  49. data/parse-stack-next.gemspec +1 -1
  50. data/scripts/docker/docker-compose.atlas.yml +14 -10
  51. data/scripts/docker/docker-compose.test.yml +24 -20
  52. data/scripts/docker/mongo-init.js +3 -3
  53. data/scripts/start-parse.sh +10 -0
  54. data/scripts/start_mcp_server.rb +1 -1
  55. data/scripts/test_server_connection.rb +1 -1
  56. data/scripts/vector_prototype/create_vector_index.js +1 -1
  57. data/scripts/vector_prototype/fetch_embeddings.py +2 -2
  58. data/scripts/vector_prototype/query_prototype.rb +1 -1
  59. data/scripts/vector_prototype/run.sh +4 -4
  60. metadata +10 -2
@@ -0,0 +1,274 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "chunker"
5
+ require_relative "chunk"
6
+
7
+ module Parse
8
+ # Retrieval-augmented-generation (RAG) helpers. `Parse::RAG` is a
9
+ # discoverability alias for this module.
10
+ #
11
+ # {.retrieve} is the agent-agnostic core: it embeds a natural-language
12
+ # query, runs Atlas `$vectorSearch` through the existing
13
+ # `Class.find_similar` (which enforces ACL/CLP mongo-direct), then
14
+ # splits each retrieved document's text field into scored
15
+ # {Parse::Retrieval::Chunk}s for presentation.
16
+ #
17
+ # The agent-facing `semantic_search` tool (see
18
+ # `lib/parse/retrieval/agent_tool.rb`) wraps {.retrieve} with the
19
+ # agent security envelope (tenant scope, `field_allowlist` projection,
20
+ # score quantization).
21
+ #
22
+ # == ACL model
23
+ #
24
+ # {.retrieve} does NOT implement a REST "two-stage" re-query. The
25
+ # vector path is mongo-direct only (Parse Server's REST `/aggregate`
26
+ # is master-key-only and bypasses ACL — see the project notes), and
27
+ # `acl_user:` / `acl_role:` scopes have no REST equivalent. ACL is
28
+ # enforced inside `find_similar` via a post-`$vectorSearch` `_rperm`
29
+ # `$match`. Scope kwargs (`session_token:` / `acl_user:` /
30
+ # `acl_role:` / `master:`) pass straight through `**scope_opts`.
31
+ module Retrieval
32
+ # Raised when a tenant-scope value conflicts with a caller-supplied
33
+ # `vector_filter` constraint on the same field — a scope-spoofing
34
+ # attempt. Mirrors the agent layer's tenant-scope refusal.
35
+ class TenantScopeConflict < ArgumentError; end
36
+
37
+ # Raised when the text field to chunk cannot be inferred from the
38
+ # class's `embed` declarations and was not passed explicitly.
39
+ class AmbiguousTextField < ArgumentError; end
40
+
41
+ module_function
42
+
43
+ # Recursively refuse any underscore-prefixed key, at any depth, in a
44
+ # caller-supplied filter. This is distinct from (and stricter than)
45
+ # the agent layer's flat `validate_keys!`: a Mongo-style filter is a
46
+ # nested structure, and an underscore key buried inside `$or` /
47
+ # `$elemMatch` / a hash value could clobber tenant scope or reach a
48
+ # reserved column (`_rperm`, `_p_*`, `_auth_data_*`). The walk is
49
+ # unconditional — it does not special-case operators.
50
+ #
51
+ # @param obj [Object] a filter Hash/Array (or anything; scalars pass).
52
+ # @param path [Array<String>] internal — accumulates the key path for
53
+ # the error message.
54
+ # @raise [ArgumentError] on any `_`-prefixed key.
55
+ def assert_no_underscore_keys!(obj, path = [])
56
+ case obj
57
+ when Hash
58
+ obj.each do |k, v|
59
+ ks = k.to_s
60
+ if ks.start_with?("_")
61
+ raise ArgumentError,
62
+ "filter key '#{(path + [ks]).join(".")}' is reserved (underscore-prefixed)."
63
+ end
64
+ assert_no_underscore_keys!(v, path + [ks])
65
+ end
66
+ when Array
67
+ obj.each_with_index { |v, i| assert_no_underscore_keys!(v, path + ["[#{i}]"]) }
68
+ end
69
+ obj
70
+ end
71
+
72
+ # Retrieve and chunk documents semantically similar to `query`.
73
+ #
74
+ # @param query [String] natural-language query.
75
+ # @param klass [Class, String] a Parse::Object subclass (or its
76
+ # class name) declaring a `:vector` property. `class:` is accepted
77
+ # as an alias.
78
+ # @param field [Symbol, nil] the `:vector` property to search.
79
+ # Auto-resolved by `find_similar` when the class has exactly one.
80
+ # @param text_field [Symbol, nil] the text property to chunk for
81
+ # presentation. Defaults to the sole text source of the class's
82
+ # `embed` declaration; raises {AmbiguousTextField} when it can't be
83
+ # inferred.
84
+ # @param k [Integer] number of documents to retrieve. Default 10.
85
+ # @param filter [Hash, nil] post-`$vectorSearch` `$match` filter.
86
+ # @param vector_filter [Hash, nil] Atlas-native pre-search filter.
87
+ # @param chunker [#chunk_with_meta, #chunk, nil] chunking strategy.
88
+ # Defaults to {Chunker::FixedSizeOverlap}.
89
+ # @param tenant_scope [Hash, nil] `{ field:, value: }` merged into
90
+ # `vector_filter` (closing the cross-tenant existence side
91
+ # channel) — not just a post-stage match.
92
+ # @param score_quantize [Boolean] round scores to 1 decimal (limits
93
+ # membership-inference probing in non-admin contexts).
94
+ # @param source_transform [#call, nil] optional callable applied to
95
+ # each raw source record before it is stored on a Chunk. The agent
96
+ # tool injects tenant-scope assertion + `field_allowlist`
97
+ # projection here; a `StandardError` raised by the callable
98
+ # propagates and aborts the whole call (fail-closed). Kept as an
99
+ # injection point so this model-layer method stays free of any
100
+ # agent-layer dependency.
101
+ # @param hybrid [Object, nil] reserved — raises {NotImplementedError}
102
+ # if truthy. Hybrid (vector + lexical) retrieval lands in a later
103
+ # release; the kwarg locks the API shape now.
104
+ # @param rerank [Object, nil] reserved — raises {NotImplementedError}
105
+ # if non-nil. Cross-encoder rerank lands in a later release.
106
+ # @param scope_opts [Hash] ACL/CLP scope kwargs forwarded verbatim to
107
+ # `find_similar`: `session_token:` / `acl_user:` / `acl_role:` /
108
+ # `master:`.
109
+ # @return [Array<Parse::Retrieval::Chunk>] descending by score; chunk
110
+ # order within a document is positional.
111
+ def retrieve(query:, klass: nil, field: nil, text_field: nil, k: 10,
112
+ filter: nil, vector_filter: nil, chunker: nil,
113
+ tenant_scope: nil, score_quantize: false,
114
+ source_transform: nil, hybrid: nil, rerank: nil,
115
+ **scope_opts)
116
+ raise NotImplementedError,
117
+ "Parse::Retrieval.retrieve: `hybrid:` is reserved for a future release." if hybrid
118
+ raise NotImplementedError,
119
+ "Parse::Retrieval.retrieve: `rerank:` is reserved for a future release." if rerank
120
+
121
+ # `class:` alias (reserved word — arrives via **scope_opts).
122
+ klass ||= scope_opts.delete(:class)
123
+ klass = resolve_class!(klass)
124
+
125
+ unless query.is_a?(String) && !query.strip.empty?
126
+ raise ArgumentError, "Parse::Retrieval.retrieve: `query:` must be a non-empty String."
127
+ end
128
+
129
+ resolved_text_field = (text_field || infer_text_field!(klass)).to_sym
130
+ merged_vector_filter = fold_tenant_scope(klass, vector_filter, tenant_scope)
131
+ chunker ||= default_chunker
132
+
133
+ raw_hits = klass.find_similar(
134
+ text: query,
135
+ k: k,
136
+ field: field,
137
+ filter: filter,
138
+ vector_filter: merged_vector_filter,
139
+ raw: true,
140
+ **scope_opts,
141
+ )
142
+ return [] if raw_hits.nil? || raw_hits.empty?
143
+
144
+ text_wire = wire_name(klass, resolved_text_field)
145
+
146
+ raw_hits.flat_map do |doc|
147
+ build_chunks_for(doc, klass, text_wire, score_quantize, source_transform, chunker)
148
+ end
149
+ end
150
+
151
+ # @!visibility private
152
+ def resolve_class!(klass)
153
+ resolved =
154
+ case klass
155
+ when nil
156
+ nil
157
+ when Class
158
+ klass
159
+ else
160
+ Parse::Model.find_class(klass.to_s)
161
+ end
162
+ unless resolved.is_a?(Class) && resolved.respond_to?(:find_similar)
163
+ raise ArgumentError,
164
+ "Parse::Retrieval.retrieve: `klass:`/`class:` must be a Parse::Object " \
165
+ "subclass with a :vector property (got #{klass.inspect})."
166
+ end
167
+ resolved
168
+ end
169
+
170
+ # @!visibility private
171
+ # Infer the text field to chunk from the class's `embed` directives:
172
+ # the sole text (non-image) source field. Raises when zero or more
173
+ # than one candidate exists — the caller must then pass `text_field:`.
174
+ def infer_text_field!(klass)
175
+ directives = klass.respond_to?(:embed_directives) ? klass.embed_directives.values : []
176
+ sources = directives.reject { |d| d.respond_to?(:image?) && d.image? }
177
+ .flat_map(&:sources).uniq
178
+ return sources.first if sources.length == 1
179
+ raise AmbiguousTextField,
180
+ "Parse::Retrieval.retrieve: cannot infer the text field to chunk for " \
181
+ "#{klass} (candidates: #{sources.inspect}); pass `text_field:` explicitly."
182
+ end
183
+
184
+ # @!visibility private
185
+ def default_chunker
186
+ Chunker::FixedSizeOverlap.new(size: 800, overlap: 100)
187
+ end
188
+
189
+ # @!visibility private
190
+ # Merge the tenant scope into the Atlas pre-search filter using the
191
+ # field's wire/storage column name. A pre-existing constraint on the
192
+ # same field with a different value is a spoof attempt and is refused.
193
+ def fold_tenant_scope(klass, vector_filter, tenant_scope)
194
+ return vector_filter if tenant_scope.nil?
195
+ field = tenant_scope[:field] || tenant_scope["field"]
196
+ value = tenant_scope.key?(:value) ? tenant_scope[:value] : tenant_scope["value"]
197
+ return vector_filter if field.nil?
198
+
199
+ wire = wire_name(klass, field)
200
+ base = vector_filter ? vector_filter.dup : {}
201
+ existing_key = base.keys.find { |k| k.to_s == wire }
202
+ if existing_key && base[existing_key] != value
203
+ raise TenantScopeConflict,
204
+ "Parse::Retrieval.retrieve: vector_filter pins #{wire.inspect} to " \
205
+ "#{base[existing_key].inspect} but the tenant scope requires #{value.inspect}."
206
+ end
207
+ base[wire] = value
208
+ base
209
+ end
210
+
211
+ # @!visibility private
212
+ # Ruby property symbol -> wire/storage column name. Prefers the
213
+ # class's explicit field_map alias; falls back to lowerCamelCase
214
+ # columnization. Matches the resolution MetadataRegistry uses.
215
+ def wire_name(klass, field)
216
+ sym = field.to_sym
217
+ fmap = klass.respond_to?(:field_map) ? klass.field_map : {}
218
+ mapped = fmap[sym]
219
+ (mapped || sym.to_s.columnize).to_s
220
+ end
221
+
222
+ # @!visibility private
223
+ def fetch_field(doc, wire, sym)
224
+ return doc[wire] if doc.key?(wire)
225
+ return doc[wire.to_sym] if doc.key?(wire.to_sym)
226
+ return doc[sym.to_s] if doc.key?(sym.to_s)
227
+ doc[sym]
228
+ end
229
+
230
+ # @!visibility private
231
+ def build_chunks_for(doc, klass, text_wire, score_quantize, source_transform, chunker)
232
+ object_id = (doc["_id"] || doc[:_id] || doc["objectId"] || doc[:objectId]).to_s
233
+ raw_score = doc["_vscore"] || doc[:_vscore]
234
+ score = quantize_score(raw_score, score_quantize)
235
+
236
+ text = fetch_field(doc, text_wire, text_wire)
237
+ meta = chunker.respond_to?(:chunk_with_meta) ? chunker.chunk_with_meta(text) : nil
238
+ chunks = meta ? meta[:chunks] : Array(chunker.chunk(text))
239
+ truncated = meta ? meta[:truncated] : false
240
+ # A document that matched on its vector but carries no presentation
241
+ # text yields no chunks (skipped, not an empty-content chunk).
242
+ return [] if chunks.empty?
243
+
244
+ source = source_transform ? source_transform.call(doc) : doc
245
+ count = chunks.length
246
+ chunks.each_with_index.map do |content, idx|
247
+ Chunk.new(
248
+ id: "#{object_id}##{idx}",
249
+ content: content,
250
+ score: score,
251
+ source: source,
252
+ metadata: {
253
+ chunk_index: idx,
254
+ chunk_count: count,
255
+ chunks_truncated: truncated,
256
+ object_id: object_id,
257
+ class: klass.parse_class,
258
+ },
259
+ )
260
+ end
261
+ end
262
+
263
+ # @!visibility private
264
+ def quantize_score(score, quantize)
265
+ return score if score.nil?
266
+ f = score.to_f
267
+ quantize ? ((f * 10).round / 10.0) : f
268
+ end
269
+ end
270
+
271
+ # Discoverability alias. "RAG" ages badly as a term; `Retrieval` is
272
+ # the canonical name.
273
+ RAG = Retrieval
274
+ end
@@ -0,0 +1,10 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ # Parse::Retrieval — retrieval-augmented-generation (RAG) helpers.
5
+ #
6
+ # Entry point that loads the chunker, the {Parse::Retrieval::Chunk}
7
+ # value object, and the {Parse::Retrieval.retrieve} core. The
8
+ # `semantic_search` agent tool (which depends on the agent layer) is
9
+ # loaded separately from `lib/parse/agent.rb`.
10
+ require_relative "retrieval/retriever"
data/lib/parse/schema.rb CHANGED
@@ -231,16 +231,23 @@ module Parse
231
231
  end
232
232
 
233
233
  # Fields defined locally but missing on server.
234
- # @return [Hash] field name => type pairs
234
+ #
235
+ # Iterates the model's `field_map` (one entry per canonical property,
236
+ # canonical name => wire column name) rather than `fields` (which carries
237
+ # both the snake_case and camelCase keys for every property and therefore
238
+ # double-counts multi-word fields). The wire name resolved from
239
+ # `field_map` is the authoritative server column — including custom
240
+ # `field:` mappings — so this both dedupes and fixes custom-column
241
+ # detection. Result is keyed by the CANONICAL (snake) name with the type
242
+ # taken from `fields[name]`.
243
+ # @return [Hash] canonical field name => type pairs
235
244
  def missing_on_server
236
- return local_fields unless server_exists?
237
-
238
- local = local_fields
239
- server = server_field_names
245
+ server = server_exists? ? server_field_names : []
240
246
  missing = {}
241
- local.each do |name, type|
242
- name_str = name.to_s.camelize(:lower)
243
- missing[name] = type unless server.include?(name_str) || core_field?(name)
247
+ @model_class.field_map.each do |name, wire|
248
+ next if core_field?(name)
249
+ next if server.include?(wire.to_s)
250
+ missing[name] = @model_class.fields[name]
244
251
  end
245
252
  missing
246
253
  end
@@ -262,15 +269,25 @@ module Parse
262
269
  end
263
270
 
264
271
  # Fields with type mismatches.
265
- # @return [Hash] field name => { local: type, server: type }
272
+ #
273
+ # Iterates `field_map` (canonical name => wire column) rather than
274
+ # deriving the server key with `camelize(:lower)`, so a property with a
275
+ # custom `field:` wire column (e.g. `property :post_id, field:
276
+ # "postIdentifier"`) resolves to its real server column instead of a
277
+ # camelized guess. This both dedupes multi-word fields (which appear
278
+ # under two keys in `fields`) and matches the `missing_on_server`
279
+ # resolution path, so type drift on custom-mapped columns is no longer
280
+ # silently skipped.
281
+ # @return [Hash] canonical field name => { local: type, server: type }
266
282
  def type_mismatches
267
283
  return {} unless server_exists?
268
284
 
269
285
  mismatches = {}
270
- local_fields.each do |name, local_type|
286
+ @model_class.field_map.each do |name, wire|
271
287
  next if core_field?(name)
272
- name_str = name.to_s.camelize(:lower)
273
- server_type = @server_schema.field_type(name_str)
288
+ local_type = @model_class.fields[name]
289
+ next if local_type.nil?
290
+ server_type = @server_schema.field_type(wire.to_s)
274
291
  next unless server_type
275
292
 
276
293
  # Normalize types for comparison
@@ -285,11 +302,30 @@ module Parse
285
302
  end
286
303
 
287
304
  # Check if schemas are in sync.
305
+ #
306
+ # Strict / bidirectional: requires the local and server schemas to match
307
+ # in BOTH directions — no fields missing on the server, no fields missing
308
+ # locally, and no type mismatches. A server that is a strict superset of
309
+ # the local model is NOT "in sync" by this measure (use
310
+ # {#server_covers_local?} for the one-way local ⊆ server check).
288
311
  # @return [Boolean]
289
312
  def in_sync?
290
313
  missing_on_server.empty? && missing_locally.empty? && type_mismatches.empty?
291
314
  end
292
315
 
316
+ # Check whether the server schema covers every locally-defined field.
317
+ #
318
+ # One-way (local ⊆ server): true when nothing the model declares is
319
+ # missing on the server and there are no type mismatches. Unlike
320
+ # {#in_sync?}, this ignores server-only columns, so a server that is a
321
+ # strict superset of the local model still satisfies it. This is the
322
+ # predicate that determines whether a migration has any work to do —
323
+ # extra server columns are not something the migrator would add.
324
+ # @return [Boolean]
325
+ def server_covers_local?
326
+ missing_on_server.empty? && type_mismatches.empty?
327
+ end
328
+
293
329
  # Generate a human-readable summary.
294
330
  # @return [String]
295
331
  def summary
@@ -355,9 +391,17 @@ module Parse
355
391
  end
356
392
 
357
393
  # Check if migration is needed.
394
+ #
395
+ # A migration is needed when the class does not yet exist on the server,
396
+ # or when the server does not already cover every locally-defined field.
397
+ # Defined in terms of the one-way {SchemaDiff#server_covers_local?} rather
398
+ # than the strict bidirectional {SchemaDiff#in_sync?} so that a server
399
+ # which is a strict superset of the local model (extra server-only
400
+ # columns the migrator would never add) does not report a "needed"
401
+ # migration with zero operations.
358
402
  # @return [Boolean]
359
403
  def needed?
360
- !@diff.in_sync? || !@diff.server_exists?
404
+ !@diff.server_exists? || !@diff.server_covers_local?
361
405
  end
362
406
 
363
407
  # Get the operations that would be performed.
@@ -372,7 +416,7 @@ module Parse
372
416
  @diff.missing_on_server.each do |name, type|
373
417
  ops << {
374
418
  action: :add_field,
375
- field: name.to_s.camelize(:lower),
419
+ field: @model_class.field_map[name].to_s,
376
420
  type: REVERSE_TYPE_MAP[type] || "String",
377
421
  }
378
422
  end
@@ -424,7 +468,7 @@ module Parse
424
468
 
425
469
  # Add missing fields
426
470
  @diff.missing_on_server.each do |name, type|
427
- field_name = name.to_s.camelize(:lower)
471
+ field_name = @model_class.field_map[name].to_s
428
472
  field_schema = { "fields" => { field_name => field_definition(type) } }
429
473
 
430
474
  response = @client.update_schema(@model_class.parse_class, field_schema)
@@ -444,15 +488,20 @@ module Parse
444
488
 
445
489
  def build_schema
446
490
  fields = {}
447
- @model_class.fields.each do |name, type|
491
+ # Iterate `field_map` (canonical name => wire column) rather than
492
+ # `fields`, which carries both the snake_case and camelCase keys for
493
+ # every property and would emit a duplicate/phantom column for each
494
+ # multi-word or custom-`field:` property.
495
+ @model_class.field_map.each do |name, wire|
448
496
  next if %i[id object_id created_at updated_at acl objectId createdAt updatedAt ACL].include?(name)
449
- field_name = name.to_s.camelize(:lower)
450
- fields[field_name] = field_definition(type)
497
+ fields[wire.to_s] = field_definition(@model_class.fields[name])
451
498
  end
452
499
 
453
- # Add pointer targets
500
+ # Add pointer targets. `references` is keyed by the wire column name
501
+ # (the `parse_field`), so use it as-is — do not re-camelize, which
502
+ # would corrupt custom `field:` pointer columns.
454
503
  @model_class.references.each do |name, target_class|
455
- field_name = name.to_s.camelize(:lower)
504
+ field_name = name.to_s
456
505
  fields[field_name] = {
457
506
  "type" => "Pointer",
458
507
  "targetClass" => target_class.to_s,
@@ -2,10 +2,10 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  module Parse
5
- # @author Anthony Persaud, Henry Spindell, Adrian Curtin
5
+ # @author Adrian Curtin, Anthony Persaud, Henry Spindell
6
6
  # The Parse Server SDK for Ruby
7
7
  module Stack
8
8
  # The current version.
9
- VERSION = "5.1.1"
9
+ VERSION = "5.2.1"
10
10
  end
11
11
  end
@@ -78,24 +78,35 @@ module Parse
78
78
  hash = Hash[hash.map { |k, v| [k.to_s.underscore.to_sym, v] }]
79
79
  @raw = hash
80
80
  @master = hash[:master]
81
- # Strip protected mass-assignment keys (sessionToken, _rperm, _wperm,
82
- # _hashed_password, authData, roles, etc.) BEFORE constructing the
83
- # user object. Without this, an attacker reaching the webhook
84
- # endpoint with a valid key (or with the optional unauthenticated
85
- # mode enabled) can forge any of these fields on +payload.user+
86
- # via the +objectId+-present hydration branch that bypasses the
87
- # +Parse::Object#apply_attributes!+ protected-key filter.
81
+ # Webhook trigger payloads (beforeSave/afterSave/etc.) are delivered by
82
+ # Parse Server and, when a webhook key is configured (the default; see
83
+ # Parse::Webhooks.allow_unauthenticated for the opt-out used in tests /
84
+ # local dev), authenticated by it -- so they are treated as trusted,
85
+ # server-authoritative state. A handler is meant to receive the full
86
+ # object -- createdAt/updatedAt, ACL, internal fields and all. The only
87
+ # thing stripped here is genuine credential material a handler never
88
+ # legitimately needs to read (live session tokens, offline-crackable
89
+ # password hashes); see WEBHOOK_TRIGGER_CREDENTIAL_KEYS. Protection
90
+ # against *persisting* forged privileged fields lives on the write path
91
+ # (changes_payload emits only declared, dirty-tracked properties), not on
92
+ # this read path.
88
93
  if hash[:user].present?
89
- @user = Parse::User.new(self.class.scrub_protected_keys(hash[:user]))
94
+ # Trusted hydration via .build (not .new) so server-sent timestamps and
95
+ # data fields remain readable; credentials are removed first. Note
96
+ # Parse::User applies its own protections, so `payload.user.auth_data`
97
+ # is not exposed here. The built object is pristine, so a handler that
98
+ # saves payload.user transmits nothing (no dirty changes) and cannot
99
+ # persist forgeries.
100
+ @user = Parse::User.build(self.class.scrub_credentials(hash[:user]))
90
101
  end
91
102
  @installation_id = hash[:installation_id]
92
103
  @params = hash[:params]
93
104
  @params = @params.with_indifferent_access if @params.is_a?(Hash)
94
105
  @function_name = hash[:function_name]
95
- @object = self.class.scrub_protected_keys(hash[:object])
106
+ @object = self.class.scrub_credentials(hash[:object])
96
107
  @trigger_name = hash[:trigger_name]
97
- @original = self.class.scrub_protected_keys(hash[:original])
98
- @update = self.class.scrub_protected_keys(hash[:update]) || {}
108
+ @original = self.class.scrub_credentials(hash[:original])
109
+ @update = self.class.scrub_credentials(hash[:update]) || {}
99
110
  # Added for beforeFind and afterFind triggers
100
111
  @query = hash[:query]
101
112
  @objects = hash[:objects] || []
@@ -103,25 +114,32 @@ module Parse
103
114
  end
104
115
 
105
116
  # @!visibility private
106
- # Routing metadata that must be preserved on payload hashes even
107
- # though the general mass-assignment denylist forbids it. Stripping
108
- # +className+ here breaks +parse_class+/+parse_object+ resolution and
109
- # silently disables +payload_class_mismatch?+. The denylist still
110
- # protects +Parse::Object#apply_attributes!+ at hydration time.
111
- PAYLOAD_PRESERVED_KEYS = %w[className __type].freeze
117
+ # Genuine credential material that is stripped from every webhook trigger
118
+ # payload before a handler can see it, even though the rest of the
119
+ # (trusted, server-authoritative) payload passes through untouched. A
120
+ # session token is a live bearer credential; a password hash is
121
+ # offline-crackable. A handler has no legitimate reason to read either,
122
+ # and removing them keeps them out of logs and out of any object a handler
123
+ # might persist. Everything else Parse Server sends -- createdAt/updatedAt,
124
+ # ACL, authData, roles, _rperm/_wperm, internal fields -- is preserved so
125
+ # the handler observes the full object. Write-side protection
126
+ # (changes_payload emits only declared, dirty-tracked properties) is what
127
+ # prevents persisting forged privileged fields.
128
+ WEBHOOK_TRIGGER_CREDENTIAL_KEYS = %w[
129
+ sessionToken session_token
130
+ _hashed_password _password_history
131
+ ].freeze
112
132
 
113
133
  # @!visibility private
114
- # Returns a copy of +obj+ with the +PROTECTED_MASS_ASSIGNMENT_KEYS+
115
- # removed, except for routing metadata in +PAYLOAD_PRESERVED_KEYS+.
116
- # Operates on string and symbol keys (Parse Server uses camelCase
134
+ # Returns a copy of +obj+ with only +WEBHOOK_TRIGGER_CREDENTIAL_KEYS+
135
+ # removed. Operates on string and symbol keys (Parse Server uses camelCase
117
136
  # strings on the wire; downstream code may have already symbolized).
118
137
  # Pass-through for non-Hash input.
119
- def self.scrub_protected_keys(obj)
138
+ def self.scrub_credentials(obj)
120
139
  return obj unless obj.is_a?(Hash)
121
- denied = Parse::Properties::PROTECTED_MASS_ASSIGNMENT_KEYS
140
+ denied = WEBHOOK_TRIGGER_CREDENTIAL_KEYS
122
141
  obj.reject do |k, _|
123
142
  name = k.to_s
124
- next false if PAYLOAD_PRESERVED_KEYS.include?(name)
125
143
  denied.include?(name) || denied.include?(name.underscore)
126
144
  end
127
145
  end
@@ -278,24 +296,34 @@ module Parse
278
296
  if @original.present? && @original.is_a?(Hash)
279
297
  o = Parse::Object.build @original, parse_class
280
298
  o.apply_attributes! @object, dirty_track: true
281
-
282
- if o.is_a?(Parse::User) && @update.present? && @update["authData"].present?
283
- o.auth_data = @update["authData"]
284
- end
285
299
  return o
286
300
  else #else the object must be new
287
301
  klass = Parse::Object.find_class parse_class
288
302
  # if we have a class, return that with updated changes, otherwise
289
303
  # default to regular object
290
- if klass.present?
291
- o = klass.new(@object || {})
292
- if o.is_a?(Parse::User) && @update.present? && @update["authData"].present?
293
- o.auth_data = @update["authData"]
294
- end
295
- return o
296
- end # if klass.present?
304
+ return klass.new(@object || {}) if klass.present?
297
305
  end # if we have original
298
306
  end # if before_trigger?
307
+
308
+ # afterSave on an UPDATE: build the prior state, then overlay the final
309
+ # state with dirty tracking so `*_changed?` / `changes` work inside
310
+ # afterSave handlers (symmetric with the beforeSave path above). The
311
+ # filter uses the timestamp-preserving INITIALIZE key set rather than the
312
+ # wide mass-assignment set: the wide set would strip the incoming
313
+ # `updatedAt` from the overlay, leaving the prior `updatedAt` and breaking
314
+ # `existed?`. The diff still excludes credentials / _rperm / _wperm /
315
+ # authData / roles, and an after-trigger response is only true/false, so
316
+ # there is no path for a forged privileged field to be persisted.
317
+ if after_save? && @original.present? && @original.is_a?(Hash)
318
+ o = Parse::Object.build @original, parse_class
319
+ o.apply_attributes! @object, dirty_track: true,
320
+ protected_set: Parse::Properties::PROTECTED_INITIALIZE_KEYS
321
+ return o
322
+ end
323
+
324
+ # afterSave on a CREATE (and every other trigger): the full object as the
325
+ # server sent it. createdAt/updatedAt survive (only credentials are
326
+ # scrubbed), so `new?` / `existed?` read correctly.
299
327
  Parse::Object.build(@object, parse_class)
300
328
  end
301
329
 
@@ -233,11 +233,23 @@ module Parse
233
233
  # ran ActiveModel before_save callbacks locally. A client-spoofed
234
234
  # `_RB_` without master falls through and runs them here.
235
235
  unless trusted_ruby_initiated
236
- prepare_result = result.prepare_save!
237
- # If prepare_save! returns false (callback chain was halted), throw an error
238
- if prepare_result == false
236
+ before_save_result = result.run_before_save_callbacks
237
+ # If a before_save callback halted the chain (returned false), reject the save.
238
+ if before_save_result == false
239
239
  raise Parse::Webhooks::ResponseError, "Save halted by before_save callback"
240
240
  end
241
+ # Parse Server exposes no separate beforeCreate trigger, so the
242
+ # beforeSave hook is the single point at which before_create must
243
+ # run for a client-initiated create. Run it AFTER before_save, for
244
+ # new objects only -- matching ActiveModel order (before_save wraps
245
+ # before_create) and mirroring the afterSave hook, which runs
246
+ # after_create then after_save. `original.nil?` marks a create.
247
+ if payload && payload.original.nil?
248
+ create_result = result.run_before_create_callbacks
249
+ if create_result == false
250
+ raise Parse::Webhooks::ResponseError, "Save halted by before_create callback"
251
+ end
252
+ end
241
253
  end
242
254
  # For before_save, return the changes payload (what Parse Server expects)
243
255
  result = result.changes_payload
@@ -6,7 +6,7 @@ require "parse/stack/version"
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "parse-stack-next"
8
8
  spec.version = Parse::Stack::VERSION
9
- spec.authors = ["Anthony Persaud", "Henry Spindell", "Adrian Curtin"]
9
+ spec.authors = ["Adrian Curtin", "Anthony Persaud", "Henry Spindell"]
10
10
  spec.email = ["adrian+parse-stack@neurosynq.net"]
11
11
 
12
12
  spec.summary = %q{Parse Server SDK for Ruby — ORM, queries, auth, and MongoDB-direct access}