parse-stack-next 5.1.1 → 5.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.sample +12 -0
- data/.env.test +4 -4
- data/CHANGELOG.md +630 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +6 -1
- data/README.md +226 -39
- data/Rakefile +56 -10
- data/docs/atlas_vector_search_guide.md +110 -9
- data/docs/mcp_guide.md +504 -0
- data/docs/mongodb_direct_guide.md +66 -1
- data/docs/mongodb_index_optimization_guide.md +22 -1
- data/docs/usage_guide.md +15 -0
- data/lib/parse/agent/approval_gate.rb +0 -0
- data/lib/parse/agent/constraint_translator.rb +90 -19
- data/lib/parse/agent/describe.rb +1 -0
- data/lib/parse/agent/errors.rb +16 -0
- data/lib/parse/agent/mcp_client.rb +9 -0
- data/lib/parse/agent/mcp_dispatcher.rb +139 -7
- data/lib/parse/agent/mcp_rack_app.rb +621 -17
- data/lib/parse/agent/mcp_subscriptions.rb +607 -0
- data/lib/parse/agent/metadata_dsl.rb +58 -0
- data/lib/parse/agent/metadata_registry.rb +141 -1
- data/lib/parse/agent/prompt_hardening.rb +213 -0
- data/lib/parse/agent/result_formatter.rb +18 -3
- data/lib/parse/agent/tools.rb +167 -24
- data/lib/parse/agent.rb +692 -21
- data/lib/parse/client/request.rb +55 -4
- data/lib/parse/client/response.rb +4 -0
- data/lib/parse/client.rb +205 -7
- data/lib/parse/model/classes/installation.rb +27 -10
- data/lib/parse/model/classes/user.rb +8 -0
- data/lib/parse/model/core/actions.rb +65 -13
- data/lib/parse/model/core/embed_managed.rb +19 -14
- data/lib/parse/model/core/indexing.rb +108 -16
- data/lib/parse/model/core/querying.rb +29 -0
- data/lib/parse/model/model.rb +34 -3
- data/lib/parse/model/object.rb +42 -0
- data/lib/parse/query.rb +90 -24
- data/lib/parse/retrieval/agent_tool.rb +369 -0
- data/lib/parse/retrieval/chunk.rb +74 -0
- data/lib/parse/retrieval/chunker.rb +208 -0
- data/lib/parse/retrieval/retriever.rb +274 -0
- data/lib/parse/retrieval.rb +10 -0
- data/lib/parse/schema.rb +69 -20
- data/lib/parse/stack/version.rb +2 -2
- data/lib/parse/webhooks/payload.rb +62 -34
- data/lib/parse/webhooks.rb +15 -3
- data/parse-stack-next.gemspec +1 -1
- data/scripts/docker/docker-compose.atlas.yml +14 -10
- data/scripts/docker/docker-compose.test.yml +24 -20
- data/scripts/docker/mongo-init.js +3 -3
- data/scripts/start-parse.sh +10 -0
- data/scripts/start_mcp_server.rb +1 -1
- data/scripts/test_server_connection.rb +1 -1
- data/scripts/vector_prototype/create_vector_index.js +1 -1
- data/scripts/vector_prototype/fetch_embeddings.py +2 -2
- data/scripts/vector_prototype/query_prototype.rb +1 -1
- data/scripts/vector_prototype/run.sh +4 -4
- metadata +10 -2
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative "chunker"
|
|
5
|
+
require_relative "chunk"
|
|
6
|
+
|
|
7
|
+
module Parse
|
|
8
|
+
# Retrieval-augmented-generation (RAG) helpers. `Parse::RAG` is a
|
|
9
|
+
# discoverability alias for this module.
|
|
10
|
+
#
|
|
11
|
+
# {.retrieve} is the agent-agnostic core: it embeds a natural-language
|
|
12
|
+
# query, runs Atlas `$vectorSearch` through the existing
|
|
13
|
+
# `Class.find_similar` (which enforces ACL/CLP mongo-direct), then
|
|
14
|
+
# splits each retrieved document's text field into scored
|
|
15
|
+
# {Parse::Retrieval::Chunk}s for presentation.
|
|
16
|
+
#
|
|
17
|
+
# The agent-facing `semantic_search` tool (see
|
|
18
|
+
# `lib/parse/retrieval/agent_tool.rb`) wraps {.retrieve} with the
|
|
19
|
+
# agent security envelope (tenant scope, `field_allowlist` projection,
|
|
20
|
+
# score quantization).
|
|
21
|
+
#
|
|
22
|
+
# == ACL model
|
|
23
|
+
#
|
|
24
|
+
# {.retrieve} does NOT implement a REST "two-stage" re-query. The
|
|
25
|
+
# vector path is mongo-direct only (Parse Server's REST `/aggregate`
|
|
26
|
+
# is master-key-only and bypasses ACL — see the project notes), and
|
|
27
|
+
# `acl_user:` / `acl_role:` scopes have no REST equivalent. ACL is
|
|
28
|
+
# enforced inside `find_similar` via a post-`$vectorSearch` `_rperm`
|
|
29
|
+
# `$match`. Scope kwargs (`session_token:` / `acl_user:` /
|
|
30
|
+
# `acl_role:` / `master:`) pass straight through `**scope_opts`.
|
|
31
|
+
module Retrieval
|
|
32
|
+
# Raised when a tenant-scope value conflicts with a caller-supplied
|
|
33
|
+
# `vector_filter` constraint on the same field — a scope-spoofing
|
|
34
|
+
# attempt. Mirrors the agent layer's tenant-scope refusal.
|
|
35
|
+
class TenantScopeConflict < ArgumentError; end
|
|
36
|
+
|
|
37
|
+
# Raised when the text field to chunk cannot be inferred from the
|
|
38
|
+
# class's `embed` declarations and was not passed explicitly.
|
|
39
|
+
class AmbiguousTextField < ArgumentError; end
|
|
40
|
+
|
|
41
|
+
module_function
|
|
42
|
+
|
|
43
|
+
# Recursively refuse any underscore-prefixed key, at any depth, in a
|
|
44
|
+
# caller-supplied filter. This is distinct from (and stricter than)
|
|
45
|
+
# the agent layer's flat `validate_keys!`: a Mongo-style filter is a
|
|
46
|
+
# nested structure, and an underscore key buried inside `$or` /
|
|
47
|
+
# `$elemMatch` / a hash value could clobber tenant scope or reach a
|
|
48
|
+
# reserved column (`_rperm`, `_p_*`, `_auth_data_*`). The walk is
|
|
49
|
+
# unconditional — it does not special-case operators.
|
|
50
|
+
#
|
|
51
|
+
# @param obj [Object] a filter Hash/Array (or anything; scalars pass).
|
|
52
|
+
# @param path [Array<String>] internal — accumulates the key path for
|
|
53
|
+
# the error message.
|
|
54
|
+
# @raise [ArgumentError] on any `_`-prefixed key.
|
|
55
|
+
def assert_no_underscore_keys!(obj, path = [])
|
|
56
|
+
case obj
|
|
57
|
+
when Hash
|
|
58
|
+
obj.each do |k, v|
|
|
59
|
+
ks = k.to_s
|
|
60
|
+
if ks.start_with?("_")
|
|
61
|
+
raise ArgumentError,
|
|
62
|
+
"filter key '#{(path + [ks]).join(".")}' is reserved (underscore-prefixed)."
|
|
63
|
+
end
|
|
64
|
+
assert_no_underscore_keys!(v, path + [ks])
|
|
65
|
+
end
|
|
66
|
+
when Array
|
|
67
|
+
obj.each_with_index { |v, i| assert_no_underscore_keys!(v, path + ["[#{i}]"]) }
|
|
68
|
+
end
|
|
69
|
+
obj
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Retrieve and chunk documents semantically similar to `query`.
|
|
73
|
+
#
|
|
74
|
+
# @param query [String] natural-language query.
|
|
75
|
+
# @param klass [Class, String] a Parse::Object subclass (or its
|
|
76
|
+
# class name) declaring a `:vector` property. `class:` is accepted
|
|
77
|
+
# as an alias.
|
|
78
|
+
# @param field [Symbol, nil] the `:vector` property to search.
|
|
79
|
+
# Auto-resolved by `find_similar` when the class has exactly one.
|
|
80
|
+
# @param text_field [Symbol, nil] the text property to chunk for
|
|
81
|
+
# presentation. Defaults to the sole text source of the class's
|
|
82
|
+
# `embed` declaration; raises {AmbiguousTextField} when it can't be
|
|
83
|
+
# inferred.
|
|
84
|
+
# @param k [Integer] number of documents to retrieve. Default 10.
|
|
85
|
+
# @param filter [Hash, nil] post-`$vectorSearch` `$match` filter.
|
|
86
|
+
# @param vector_filter [Hash, nil] Atlas-native pre-search filter.
|
|
87
|
+
# @param chunker [#chunk_with_meta, #chunk, nil] chunking strategy.
|
|
88
|
+
# Defaults to {Chunker::FixedSizeOverlap}.
|
|
89
|
+
# @param tenant_scope [Hash, nil] `{ field:, value: }` merged into
|
|
90
|
+
# `vector_filter` (closing the cross-tenant existence side
|
|
91
|
+
# channel) — not just a post-stage match.
|
|
92
|
+
# @param score_quantize [Boolean] round scores to 1 decimal (limits
|
|
93
|
+
# membership-inference probing in non-admin contexts).
|
|
94
|
+
# @param source_transform [#call, nil] optional callable applied to
|
|
95
|
+
# each raw source record before it is stored on a Chunk. The agent
|
|
96
|
+
# tool injects tenant-scope assertion + `field_allowlist`
|
|
97
|
+
# projection here; a `StandardError` raised by the callable
|
|
98
|
+
# propagates and aborts the whole call (fail-closed). Kept as an
|
|
99
|
+
# injection point so this model-layer method stays free of any
|
|
100
|
+
# agent-layer dependency.
|
|
101
|
+
# @param hybrid [Object, nil] reserved — raises {NotImplementedError}
|
|
102
|
+
# if truthy. Hybrid (vector + lexical) retrieval lands in a later
|
|
103
|
+
# release; the kwarg locks the API shape now.
|
|
104
|
+
# @param rerank [Object, nil] reserved — raises {NotImplementedError}
|
|
105
|
+
# if non-nil. Cross-encoder rerank lands in a later release.
|
|
106
|
+
# @param scope_opts [Hash] ACL/CLP scope kwargs forwarded verbatim to
|
|
107
|
+
# `find_similar`: `session_token:` / `acl_user:` / `acl_role:` /
|
|
108
|
+
# `master:`.
|
|
109
|
+
# @return [Array<Parse::Retrieval::Chunk>] descending by score; chunk
|
|
110
|
+
# order within a document is positional.
|
|
111
|
+
def retrieve(query:, klass: nil, field: nil, text_field: nil, k: 10,
|
|
112
|
+
filter: nil, vector_filter: nil, chunker: nil,
|
|
113
|
+
tenant_scope: nil, score_quantize: false,
|
|
114
|
+
source_transform: nil, hybrid: nil, rerank: nil,
|
|
115
|
+
**scope_opts)
|
|
116
|
+
raise NotImplementedError,
|
|
117
|
+
"Parse::Retrieval.retrieve: `hybrid:` is reserved for a future release." if hybrid
|
|
118
|
+
raise NotImplementedError,
|
|
119
|
+
"Parse::Retrieval.retrieve: `rerank:` is reserved for a future release." if rerank
|
|
120
|
+
|
|
121
|
+
# `class:` alias (reserved word — arrives via **scope_opts).
|
|
122
|
+
klass ||= scope_opts.delete(:class)
|
|
123
|
+
klass = resolve_class!(klass)
|
|
124
|
+
|
|
125
|
+
unless query.is_a?(String) && !query.strip.empty?
|
|
126
|
+
raise ArgumentError, "Parse::Retrieval.retrieve: `query:` must be a non-empty String."
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
resolved_text_field = (text_field || infer_text_field!(klass)).to_sym
|
|
130
|
+
merged_vector_filter = fold_tenant_scope(klass, vector_filter, tenant_scope)
|
|
131
|
+
chunker ||= default_chunker
|
|
132
|
+
|
|
133
|
+
raw_hits = klass.find_similar(
|
|
134
|
+
text: query,
|
|
135
|
+
k: k,
|
|
136
|
+
field: field,
|
|
137
|
+
filter: filter,
|
|
138
|
+
vector_filter: merged_vector_filter,
|
|
139
|
+
raw: true,
|
|
140
|
+
**scope_opts,
|
|
141
|
+
)
|
|
142
|
+
return [] if raw_hits.nil? || raw_hits.empty?
|
|
143
|
+
|
|
144
|
+
text_wire = wire_name(klass, resolved_text_field)
|
|
145
|
+
|
|
146
|
+
raw_hits.flat_map do |doc|
|
|
147
|
+
build_chunks_for(doc, klass, text_wire, score_quantize, source_transform, chunker)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# @!visibility private
|
|
152
|
+
def resolve_class!(klass)
|
|
153
|
+
resolved =
|
|
154
|
+
case klass
|
|
155
|
+
when nil
|
|
156
|
+
nil
|
|
157
|
+
when Class
|
|
158
|
+
klass
|
|
159
|
+
else
|
|
160
|
+
Parse::Model.find_class(klass.to_s)
|
|
161
|
+
end
|
|
162
|
+
unless resolved.is_a?(Class) && resolved.respond_to?(:find_similar)
|
|
163
|
+
raise ArgumentError,
|
|
164
|
+
"Parse::Retrieval.retrieve: `klass:`/`class:` must be a Parse::Object " \
|
|
165
|
+
"subclass with a :vector property (got #{klass.inspect})."
|
|
166
|
+
end
|
|
167
|
+
resolved
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# @!visibility private
|
|
171
|
+
# Infer the text field to chunk from the class's `embed` directives:
|
|
172
|
+
# the sole text (non-image) source field. Raises when zero or more
|
|
173
|
+
# than one candidate exists — the caller must then pass `text_field:`.
|
|
174
|
+
def infer_text_field!(klass)
|
|
175
|
+
directives = klass.respond_to?(:embed_directives) ? klass.embed_directives.values : []
|
|
176
|
+
sources = directives.reject { |d| d.respond_to?(:image?) && d.image? }
|
|
177
|
+
.flat_map(&:sources).uniq
|
|
178
|
+
return sources.first if sources.length == 1
|
|
179
|
+
raise AmbiguousTextField,
|
|
180
|
+
"Parse::Retrieval.retrieve: cannot infer the text field to chunk for " \
|
|
181
|
+
"#{klass} (candidates: #{sources.inspect}); pass `text_field:` explicitly."
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# @!visibility private
|
|
185
|
+
def default_chunker
|
|
186
|
+
Chunker::FixedSizeOverlap.new(size: 800, overlap: 100)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# @!visibility private
|
|
190
|
+
# Merge the tenant scope into the Atlas pre-search filter using the
|
|
191
|
+
# field's wire/storage column name. A pre-existing constraint on the
|
|
192
|
+
# same field with a different value is a spoof attempt and is refused.
|
|
193
|
+
def fold_tenant_scope(klass, vector_filter, tenant_scope)
|
|
194
|
+
return vector_filter if tenant_scope.nil?
|
|
195
|
+
field = tenant_scope[:field] || tenant_scope["field"]
|
|
196
|
+
value = tenant_scope.key?(:value) ? tenant_scope[:value] : tenant_scope["value"]
|
|
197
|
+
return vector_filter if field.nil?
|
|
198
|
+
|
|
199
|
+
wire = wire_name(klass, field)
|
|
200
|
+
base = vector_filter ? vector_filter.dup : {}
|
|
201
|
+
existing_key = base.keys.find { |k| k.to_s == wire }
|
|
202
|
+
if existing_key && base[existing_key] != value
|
|
203
|
+
raise TenantScopeConflict,
|
|
204
|
+
"Parse::Retrieval.retrieve: vector_filter pins #{wire.inspect} to " \
|
|
205
|
+
"#{base[existing_key].inspect} but the tenant scope requires #{value.inspect}."
|
|
206
|
+
end
|
|
207
|
+
base[wire] = value
|
|
208
|
+
base
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# @!visibility private
|
|
212
|
+
# Ruby property symbol -> wire/storage column name. Prefers the
|
|
213
|
+
# class's explicit field_map alias; falls back to lowerCamelCase
|
|
214
|
+
# columnization. Matches the resolution MetadataRegistry uses.
|
|
215
|
+
def wire_name(klass, field)
|
|
216
|
+
sym = field.to_sym
|
|
217
|
+
fmap = klass.respond_to?(:field_map) ? klass.field_map : {}
|
|
218
|
+
mapped = fmap[sym]
|
|
219
|
+
(mapped || sym.to_s.columnize).to_s
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# @!visibility private
|
|
223
|
+
def fetch_field(doc, wire, sym)
|
|
224
|
+
return doc[wire] if doc.key?(wire)
|
|
225
|
+
return doc[wire.to_sym] if doc.key?(wire.to_sym)
|
|
226
|
+
return doc[sym.to_s] if doc.key?(sym.to_s)
|
|
227
|
+
doc[sym]
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# @!visibility private
|
|
231
|
+
def build_chunks_for(doc, klass, text_wire, score_quantize, source_transform, chunker)
|
|
232
|
+
object_id = (doc["_id"] || doc[:_id] || doc["objectId"] || doc[:objectId]).to_s
|
|
233
|
+
raw_score = doc["_vscore"] || doc[:_vscore]
|
|
234
|
+
score = quantize_score(raw_score, score_quantize)
|
|
235
|
+
|
|
236
|
+
text = fetch_field(doc, text_wire, text_wire)
|
|
237
|
+
meta = chunker.respond_to?(:chunk_with_meta) ? chunker.chunk_with_meta(text) : nil
|
|
238
|
+
chunks = meta ? meta[:chunks] : Array(chunker.chunk(text))
|
|
239
|
+
truncated = meta ? meta[:truncated] : false
|
|
240
|
+
# A document that matched on its vector but carries no presentation
|
|
241
|
+
# text yields no chunks (skipped, not an empty-content chunk).
|
|
242
|
+
return [] if chunks.empty?
|
|
243
|
+
|
|
244
|
+
source = source_transform ? source_transform.call(doc) : doc
|
|
245
|
+
count = chunks.length
|
|
246
|
+
chunks.each_with_index.map do |content, idx|
|
|
247
|
+
Chunk.new(
|
|
248
|
+
id: "#{object_id}##{idx}",
|
|
249
|
+
content: content,
|
|
250
|
+
score: score,
|
|
251
|
+
source: source,
|
|
252
|
+
metadata: {
|
|
253
|
+
chunk_index: idx,
|
|
254
|
+
chunk_count: count,
|
|
255
|
+
chunks_truncated: truncated,
|
|
256
|
+
object_id: object_id,
|
|
257
|
+
class: klass.parse_class,
|
|
258
|
+
},
|
|
259
|
+
)
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# @!visibility private
|
|
264
|
+
def quantize_score(score, quantize)
|
|
265
|
+
return score if score.nil?
|
|
266
|
+
f = score.to_f
|
|
267
|
+
quantize ? ((f * 10).round / 10.0) : f
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Discoverability alias. "RAG" ages badly as a term; `Retrieval` is
|
|
272
|
+
# the canonical name.
|
|
273
|
+
RAG = Retrieval
|
|
274
|
+
end
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Parse::Retrieval — retrieval-augmented-generation (RAG) helpers.
|
|
5
|
+
#
|
|
6
|
+
# Entry point that loads the chunker, the {Parse::Retrieval::Chunk}
|
|
7
|
+
# value object, and the {Parse::Retrieval.retrieve} core. The
|
|
8
|
+
# `semantic_search` agent tool (which depends on the agent layer) is
|
|
9
|
+
# loaded separately from `lib/parse/agent.rb`.
|
|
10
|
+
require_relative "retrieval/retriever"
|
data/lib/parse/schema.rb
CHANGED
|
@@ -231,16 +231,23 @@ module Parse
|
|
|
231
231
|
end
|
|
232
232
|
|
|
233
233
|
# Fields defined locally but missing on server.
|
|
234
|
-
#
|
|
234
|
+
#
|
|
235
|
+
# Iterates the model's `field_map` (one entry per canonical property,
|
|
236
|
+
# canonical name => wire column name) rather than `fields` (which carries
|
|
237
|
+
# both the snake_case and camelCase keys for every property and therefore
|
|
238
|
+
# double-counts multi-word fields). The wire name resolved from
|
|
239
|
+
# `field_map` is the authoritative server column — including custom
|
|
240
|
+
# `field:` mappings — so this both dedupes and fixes custom-column
|
|
241
|
+
# detection. Result is keyed by the CANONICAL (snake) name with the type
|
|
242
|
+
# taken from `fields[name]`.
|
|
243
|
+
# @return [Hash] canonical field name => type pairs
|
|
235
244
|
def missing_on_server
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
local = local_fields
|
|
239
|
-
server = server_field_names
|
|
245
|
+
server = server_exists? ? server_field_names : []
|
|
240
246
|
missing = {}
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
247
|
+
@model_class.field_map.each do |name, wire|
|
|
248
|
+
next if core_field?(name)
|
|
249
|
+
next if server.include?(wire.to_s)
|
|
250
|
+
missing[name] = @model_class.fields[name]
|
|
244
251
|
end
|
|
245
252
|
missing
|
|
246
253
|
end
|
|
@@ -262,15 +269,25 @@ module Parse
|
|
|
262
269
|
end
|
|
263
270
|
|
|
264
271
|
# Fields with type mismatches.
|
|
265
|
-
#
|
|
272
|
+
#
|
|
273
|
+
# Iterates `field_map` (canonical name => wire column) rather than
|
|
274
|
+
# deriving the server key with `camelize(:lower)`, so a property with a
|
|
275
|
+
# custom `field:` wire column (e.g. `property :post_id, field:
|
|
276
|
+
# "postIdentifier"`) resolves to its real server column instead of a
|
|
277
|
+
# camelized guess. This both dedupes multi-word fields (which appear
|
|
278
|
+
# under two keys in `fields`) and matches the `missing_on_server`
|
|
279
|
+
# resolution path, so type drift on custom-mapped columns is no longer
|
|
280
|
+
# silently skipped.
|
|
281
|
+
# @return [Hash] canonical field name => { local: type, server: type }
|
|
266
282
|
def type_mismatches
|
|
267
283
|
return {} unless server_exists?
|
|
268
284
|
|
|
269
285
|
mismatches = {}
|
|
270
|
-
|
|
286
|
+
@model_class.field_map.each do |name, wire|
|
|
271
287
|
next if core_field?(name)
|
|
272
|
-
|
|
273
|
-
|
|
288
|
+
local_type = @model_class.fields[name]
|
|
289
|
+
next if local_type.nil?
|
|
290
|
+
server_type = @server_schema.field_type(wire.to_s)
|
|
274
291
|
next unless server_type
|
|
275
292
|
|
|
276
293
|
# Normalize types for comparison
|
|
@@ -285,11 +302,30 @@ module Parse
|
|
|
285
302
|
end
|
|
286
303
|
|
|
287
304
|
# Check if schemas are in sync.
|
|
305
|
+
#
|
|
306
|
+
# Strict / bidirectional: requires the local and server schemas to match
|
|
307
|
+
# in BOTH directions — no fields missing on the server, no fields missing
|
|
308
|
+
# locally, and no type mismatches. A server that is a strict superset of
|
|
309
|
+
# the local model is NOT "in sync" by this measure (use
|
|
310
|
+
# {#server_covers_local?} for the one-way local ⊆ server check).
|
|
288
311
|
# @return [Boolean]
|
|
289
312
|
def in_sync?
|
|
290
313
|
missing_on_server.empty? && missing_locally.empty? && type_mismatches.empty?
|
|
291
314
|
end
|
|
292
315
|
|
|
316
|
+
# Check whether the server schema covers every locally-defined field.
|
|
317
|
+
#
|
|
318
|
+
# One-way (local ⊆ server): true when nothing the model declares is
|
|
319
|
+
# missing on the server and there are no type mismatches. Unlike
|
|
320
|
+
# {#in_sync?}, this ignores server-only columns, so a server that is a
|
|
321
|
+
# strict superset of the local model still satisfies it. This is the
|
|
322
|
+
# predicate that determines whether a migration has any work to do —
|
|
323
|
+
# extra server columns are not something the migrator would add.
|
|
324
|
+
# @return [Boolean]
|
|
325
|
+
def server_covers_local?
|
|
326
|
+
missing_on_server.empty? && type_mismatches.empty?
|
|
327
|
+
end
|
|
328
|
+
|
|
293
329
|
# Generate a human-readable summary.
|
|
294
330
|
# @return [String]
|
|
295
331
|
def summary
|
|
@@ -355,9 +391,17 @@ module Parse
|
|
|
355
391
|
end
|
|
356
392
|
|
|
357
393
|
# Check if migration is needed.
|
|
394
|
+
#
|
|
395
|
+
# A migration is needed when the class does not yet exist on the server,
|
|
396
|
+
# or when the server does not already cover every locally-defined field.
|
|
397
|
+
# Defined in terms of the one-way {SchemaDiff#server_covers_local?} rather
|
|
398
|
+
# than the strict bidirectional {SchemaDiff#in_sync?} so that a server
|
|
399
|
+
# which is a strict superset of the local model (extra server-only
|
|
400
|
+
# columns the migrator would never add) does not report a "needed"
|
|
401
|
+
# migration with zero operations.
|
|
358
402
|
# @return [Boolean]
|
|
359
403
|
def needed?
|
|
360
|
-
!@diff.
|
|
404
|
+
!@diff.server_exists? || !@diff.server_covers_local?
|
|
361
405
|
end
|
|
362
406
|
|
|
363
407
|
# Get the operations that would be performed.
|
|
@@ -372,7 +416,7 @@ module Parse
|
|
|
372
416
|
@diff.missing_on_server.each do |name, type|
|
|
373
417
|
ops << {
|
|
374
418
|
action: :add_field,
|
|
375
|
-
field: name.to_s
|
|
419
|
+
field: @model_class.field_map[name].to_s,
|
|
376
420
|
type: REVERSE_TYPE_MAP[type] || "String",
|
|
377
421
|
}
|
|
378
422
|
end
|
|
@@ -424,7 +468,7 @@ module Parse
|
|
|
424
468
|
|
|
425
469
|
# Add missing fields
|
|
426
470
|
@diff.missing_on_server.each do |name, type|
|
|
427
|
-
field_name = name.to_s
|
|
471
|
+
field_name = @model_class.field_map[name].to_s
|
|
428
472
|
field_schema = { "fields" => { field_name => field_definition(type) } }
|
|
429
473
|
|
|
430
474
|
response = @client.update_schema(@model_class.parse_class, field_schema)
|
|
@@ -444,15 +488,20 @@ module Parse
|
|
|
444
488
|
|
|
445
489
|
def build_schema
|
|
446
490
|
fields = {}
|
|
447
|
-
|
|
491
|
+
# Iterate `field_map` (canonical name => wire column) rather than
|
|
492
|
+
# `fields`, which carries both the snake_case and camelCase keys for
|
|
493
|
+
# every property and would emit a duplicate/phantom column for each
|
|
494
|
+
# multi-word or custom-`field:` property.
|
|
495
|
+
@model_class.field_map.each do |name, wire|
|
|
448
496
|
next if %i[id object_id created_at updated_at acl objectId createdAt updatedAt ACL].include?(name)
|
|
449
|
-
|
|
450
|
-
fields[field_name] = field_definition(type)
|
|
497
|
+
fields[wire.to_s] = field_definition(@model_class.fields[name])
|
|
451
498
|
end
|
|
452
499
|
|
|
453
|
-
# Add pointer targets
|
|
500
|
+
# Add pointer targets. `references` is keyed by the wire column name
|
|
501
|
+
# (the `parse_field`), so use it as-is — do not re-camelize, which
|
|
502
|
+
# would corrupt custom `field:` pointer columns.
|
|
454
503
|
@model_class.references.each do |name, target_class|
|
|
455
|
-
field_name = name.to_s
|
|
504
|
+
field_name = name.to_s
|
|
456
505
|
fields[field_name] = {
|
|
457
506
|
"type" => "Pointer",
|
|
458
507
|
"targetClass" => target_class.to_s,
|
data/lib/parse/stack/version.rb
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
4
|
module Parse
|
|
5
|
-
# @author Anthony Persaud, Henry Spindell
|
|
5
|
+
# @author Adrian Curtin, Anthony Persaud, Henry Spindell
|
|
6
6
|
# The Parse Server SDK for Ruby
|
|
7
7
|
module Stack
|
|
8
8
|
# The current version.
|
|
9
|
-
VERSION = "5.
|
|
9
|
+
VERSION = "5.2.1"
|
|
10
10
|
end
|
|
11
11
|
end
|
|
@@ -78,24 +78,35 @@ module Parse
|
|
|
78
78
|
hash = Hash[hash.map { |k, v| [k.to_s.underscore.to_sym, v] }]
|
|
79
79
|
@raw = hash
|
|
80
80
|
@master = hash[:master]
|
|
81
|
-
#
|
|
82
|
-
#
|
|
83
|
-
#
|
|
84
|
-
#
|
|
85
|
-
#
|
|
86
|
-
#
|
|
87
|
-
#
|
|
81
|
+
# Webhook trigger payloads (beforeSave/afterSave/etc.) are delivered by
|
|
82
|
+
# Parse Server and, when a webhook key is configured (the default; see
|
|
83
|
+
# Parse::Webhooks.allow_unauthenticated for the opt-out used in tests /
|
|
84
|
+
# local dev), authenticated by it -- so they are treated as trusted,
|
|
85
|
+
# server-authoritative state. A handler is meant to receive the full
|
|
86
|
+
# object -- createdAt/updatedAt, ACL, internal fields and all. The only
|
|
87
|
+
# thing stripped here is genuine credential material a handler never
|
|
88
|
+
# legitimately needs to read (live session tokens, offline-crackable
|
|
89
|
+
# password hashes); see WEBHOOK_TRIGGER_CREDENTIAL_KEYS. Protection
|
|
90
|
+
# against *persisting* forged privileged fields lives on the write path
|
|
91
|
+
# (changes_payload emits only declared, dirty-tracked properties), not on
|
|
92
|
+
# this read path.
|
|
88
93
|
if hash[:user].present?
|
|
89
|
-
|
|
94
|
+
# Trusted hydration via .build (not .new) so server-sent timestamps and
|
|
95
|
+
# data fields remain readable; credentials are removed first. Note
|
|
96
|
+
# Parse::User applies its own protections, so `payload.user.auth_data`
|
|
97
|
+
# is not exposed here. The built object is pristine, so a handler that
|
|
98
|
+
# saves payload.user transmits nothing (no dirty changes) and cannot
|
|
99
|
+
# persist forgeries.
|
|
100
|
+
@user = Parse::User.build(self.class.scrub_credentials(hash[:user]))
|
|
90
101
|
end
|
|
91
102
|
@installation_id = hash[:installation_id]
|
|
92
103
|
@params = hash[:params]
|
|
93
104
|
@params = @params.with_indifferent_access if @params.is_a?(Hash)
|
|
94
105
|
@function_name = hash[:function_name]
|
|
95
|
-
@object = self.class.
|
|
106
|
+
@object = self.class.scrub_credentials(hash[:object])
|
|
96
107
|
@trigger_name = hash[:trigger_name]
|
|
97
|
-
@original = self.class.
|
|
98
|
-
@update = self.class.
|
|
108
|
+
@original = self.class.scrub_credentials(hash[:original])
|
|
109
|
+
@update = self.class.scrub_credentials(hash[:update]) || {}
|
|
99
110
|
# Added for beforeFind and afterFind triggers
|
|
100
111
|
@query = hash[:query]
|
|
101
112
|
@objects = hash[:objects] || []
|
|
@@ -103,25 +114,32 @@ module Parse
|
|
|
103
114
|
end
|
|
104
115
|
|
|
105
116
|
# @!visibility private
|
|
106
|
-
#
|
|
107
|
-
#
|
|
108
|
-
#
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
|
|
117
|
+
# Genuine credential material that is stripped from every webhook trigger
|
|
118
|
+
# payload before a handler can see it, even though the rest of the
|
|
119
|
+
# (trusted, server-authoritative) payload passes through untouched. A
|
|
120
|
+
# session token is a live bearer credential; a password hash is
|
|
121
|
+
# offline-crackable. A handler has no legitimate reason to read either,
|
|
122
|
+
# and removing them keeps them out of logs and out of any object a handler
|
|
123
|
+
# might persist. Everything else Parse Server sends -- createdAt/updatedAt,
|
|
124
|
+
# ACL, authData, roles, _rperm/_wperm, internal fields -- is preserved so
|
|
125
|
+
# the handler observes the full object. Write-side protection
|
|
126
|
+
# (changes_payload emits only declared, dirty-tracked properties) is what
|
|
127
|
+
# prevents persisting forged privileged fields.
|
|
128
|
+
WEBHOOK_TRIGGER_CREDENTIAL_KEYS = %w[
|
|
129
|
+
sessionToken session_token
|
|
130
|
+
_hashed_password _password_history
|
|
131
|
+
].freeze
|
|
112
132
|
|
|
113
133
|
# @!visibility private
|
|
114
|
-
# Returns a copy of +obj+ with
|
|
115
|
-
# removed
|
|
116
|
-
# Operates on string and symbol keys (Parse Server uses camelCase
|
|
134
|
+
# Returns a copy of +obj+ with only +WEBHOOK_TRIGGER_CREDENTIAL_KEYS+
|
|
135
|
+
# removed. Operates on string and symbol keys (Parse Server uses camelCase
|
|
117
136
|
# strings on the wire; downstream code may have already symbolized).
|
|
118
137
|
# Pass-through for non-Hash input.
|
|
119
|
-
def self.
|
|
138
|
+
def self.scrub_credentials(obj)
|
|
120
139
|
return obj unless obj.is_a?(Hash)
|
|
121
|
-
denied =
|
|
140
|
+
denied = WEBHOOK_TRIGGER_CREDENTIAL_KEYS
|
|
122
141
|
obj.reject do |k, _|
|
|
123
142
|
name = k.to_s
|
|
124
|
-
next false if PAYLOAD_PRESERVED_KEYS.include?(name)
|
|
125
143
|
denied.include?(name) || denied.include?(name.underscore)
|
|
126
144
|
end
|
|
127
145
|
end
|
|
@@ -278,24 +296,34 @@ module Parse
|
|
|
278
296
|
if @original.present? && @original.is_a?(Hash)
|
|
279
297
|
o = Parse::Object.build @original, parse_class
|
|
280
298
|
o.apply_attributes! @object, dirty_track: true
|
|
281
|
-
|
|
282
|
-
if o.is_a?(Parse::User) && @update.present? && @update["authData"].present?
|
|
283
|
-
o.auth_data = @update["authData"]
|
|
284
|
-
end
|
|
285
299
|
return o
|
|
286
300
|
else #else the object must be new
|
|
287
301
|
klass = Parse::Object.find_class parse_class
|
|
288
302
|
# if we have a class, return that with updated changes, otherwise
|
|
289
303
|
# default to regular object
|
|
290
|
-
if klass.present?
|
|
291
|
-
o = klass.new(@object || {})
|
|
292
|
-
if o.is_a?(Parse::User) && @update.present? && @update["authData"].present?
|
|
293
|
-
o.auth_data = @update["authData"]
|
|
294
|
-
end
|
|
295
|
-
return o
|
|
296
|
-
end # if klass.present?
|
|
304
|
+
return klass.new(@object || {}) if klass.present?
|
|
297
305
|
end # if we have original
|
|
298
306
|
end # if before_trigger?
|
|
307
|
+
|
|
308
|
+
# afterSave on an UPDATE: build the prior state, then overlay the final
|
|
309
|
+
# state with dirty tracking so `*_changed?` / `changes` work inside
|
|
310
|
+
# afterSave handlers (symmetric with the beforeSave path above). The
|
|
311
|
+
# filter uses the timestamp-preserving INITIALIZE key set rather than the
|
|
312
|
+
# wide mass-assignment set: the wide set would strip the incoming
|
|
313
|
+
# `updatedAt` from the overlay, leaving the prior `updatedAt` and breaking
|
|
314
|
+
# `existed?`. The diff still excludes credentials / _rperm / _wperm /
|
|
315
|
+
# authData / roles, and an after-trigger response is only true/false, so
|
|
316
|
+
# there is no path for a forged privileged field to be persisted.
|
|
317
|
+
if after_save? && @original.present? && @original.is_a?(Hash)
|
|
318
|
+
o = Parse::Object.build @original, parse_class
|
|
319
|
+
o.apply_attributes! @object, dirty_track: true,
|
|
320
|
+
protected_set: Parse::Properties::PROTECTED_INITIALIZE_KEYS
|
|
321
|
+
return o
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# afterSave on a CREATE (and every other trigger): the full object as the
|
|
325
|
+
# server sent it. createdAt/updatedAt survive (only credentials are
|
|
326
|
+
# scrubbed), so `new?` / `existed?` read correctly.
|
|
299
327
|
Parse::Object.build(@object, parse_class)
|
|
300
328
|
end
|
|
301
329
|
|
data/lib/parse/webhooks.rb
CHANGED
|
@@ -233,11 +233,23 @@ module Parse
|
|
|
233
233
|
# ran ActiveModel before_save callbacks locally. A client-spoofed
|
|
234
234
|
# `_RB_` without master falls through and runs them here.
|
|
235
235
|
unless trusted_ruby_initiated
|
|
236
|
-
|
|
237
|
-
# If
|
|
238
|
-
if
|
|
236
|
+
before_save_result = result.run_before_save_callbacks
|
|
237
|
+
# If a before_save callback halted the chain (returned false), reject the save.
|
|
238
|
+
if before_save_result == false
|
|
239
239
|
raise Parse::Webhooks::ResponseError, "Save halted by before_save callback"
|
|
240
240
|
end
|
|
241
|
+
# Parse Server exposes no separate beforeCreate trigger, so the
|
|
242
|
+
# beforeSave hook is the single point at which before_create must
|
|
243
|
+
# run for a client-initiated create. Run it AFTER before_save, for
|
|
244
|
+
# new objects only -- matching ActiveModel order (before_save wraps
|
|
245
|
+
# before_create) and mirroring the afterSave hook, which runs
|
|
246
|
+
# after_create then after_save. `original.nil?` marks a create.
|
|
247
|
+
if payload && payload.original.nil?
|
|
248
|
+
create_result = result.run_before_create_callbacks
|
|
249
|
+
if create_result == false
|
|
250
|
+
raise Parse::Webhooks::ResponseError, "Save halted by before_create callback"
|
|
251
|
+
end
|
|
252
|
+
end
|
|
241
253
|
end
|
|
242
254
|
# For before_save, return the changes payload (what Parse Server expects)
|
|
243
255
|
result = result.changes_payload
|
data/parse-stack-next.gemspec
CHANGED
|
@@ -6,7 +6,7 @@ require "parse/stack/version"
|
|
|
6
6
|
Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "parse-stack-next"
|
|
8
8
|
spec.version = Parse::Stack::VERSION
|
|
9
|
-
spec.authors =
|
|
9
|
+
spec.authors = ["Adrian Curtin", "Anthony Persaud", "Henry Spindell"]
|
|
10
10
|
spec.email = ["adrian+parse-stack@neurosynq.net"]
|
|
11
11
|
|
|
12
12
|
spec.summary = %q{Parse Server SDK for Ruby — ORM, queries, auth, and MongoDB-direct access}
|