parse-stack-next 4.5.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/release.yml +32 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/.vscode/settings.json +3 -0
- data/CHANGELOG.md +305 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +88 -0
- data/lib/parse/cache/redis.rb +249 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/create_lock.rb +14 -2
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +77 -5
- data/parse-stack.png +0 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "digest"
|
|
5
|
+
require_relative "../../embeddings"
|
|
6
|
+
require_relative "../vector"
|
|
7
|
+
|
|
8
|
+
module Parse
|
|
9
|
+
module Core
|
|
10
|
+
# Class-level `embed` macro for `:vector` properties.
|
|
11
|
+
#
|
|
12
|
+
# Lets a model declare which scalar fields feed into a managed
|
|
13
|
+
# embedding, and arranges for that embedding to be computed
|
|
14
|
+
# automatically on save whenever the source fields change.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# class Document < Parse::Object
|
|
18
|
+
# property :title, :string
|
|
19
|
+
# property :body, :string
|
|
20
|
+
# property :body_embedding, :vector, dimensions: 1536, provider: :openai
|
|
21
|
+
# embed :title, :body, into: :body_embedding
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# doc = Document.new(title: "hello", body: "world")
|
|
25
|
+
# doc.save # provider :openai is called once; body_embedding populated
|
|
26
|
+
#
|
|
27
|
+
# == Mechanics
|
|
28
|
+
#
|
|
29
|
+
# The class macro:
|
|
30
|
+
# 1. Validates that `into:` names a declared `:vector` property with
|
|
31
|
+
# `provider:` metadata.
|
|
32
|
+
# 2. Auto-declares a `<into>_digest` `:string` sibling property
|
|
33
|
+
# (override with `digest_field:`).
|
|
34
|
+
# 3. Registers a `before_save` callback that re-computes the
|
|
35
|
+
# embedding whenever the SHA-256 of the concatenated source
|
|
36
|
+
# fields differs from the stored digest. On first save the digest
|
|
37
|
+
# is blank and the embedding is always populated. On a save where
|
|
38
|
+
# no source field changed the digest matches and the callback is
|
|
39
|
+
# a no-op (zero provider calls).
|
|
40
|
+
# 4. Prepends a guard module that raises {ProtectedFieldError} on
|
|
41
|
+
# direct `body_embedding=` assignment from user code. The guard
|
|
42
|
+
# lifts only inside the managed write path (the before_save
|
|
43
|
+
# callback itself).
|
|
44
|
+
#
|
|
45
|
+
# Provider calls flow through {Parse::Embeddings.provider} — the
|
|
46
|
+
# provider is resolved by name at save time, so registering a
|
|
47
|
+
# provider can happen any time before the first save. Declaration
|
|
48
|
+
# never makes a network call.
|
|
49
|
+
#
|
|
50
|
+
# == Single vector per record (v5.0)
|
|
51
|
+
#
|
|
52
|
+
# `embed` produces exactly one vector per record. All declared
|
|
53
|
+
# source fields are concatenated (joined with "\n\n", blank values
|
|
54
|
+
# skipped) and sent to the provider as a single string. There is
|
|
55
|
+
# no built-in chunker in v5.0: long source text whose concatenation
|
|
56
|
+
# exceeds the provider's per-call token budget will be truncated
|
|
57
|
+
# provider-side, and the resulting vector will represent only the
|
|
58
|
+
# leading portion of the document.
|
|
59
|
+
#
|
|
60
|
+
# If your source text is long-form (full articles, long
|
|
61
|
+
# transcripts, multi-page PDFs), you have two options in v5.0:
|
|
62
|
+
#
|
|
63
|
+
# 1. Pre-chunk client-side and write each chunk as its own
|
|
64
|
+
# Parse::Object record with its own `embed` declaration.
|
|
65
|
+
# 2. Maintain a dedicated `Chunk` subclass that belongs_to the
|
|
66
|
+
# parent record, with `embed :content, into: :embedding` on the
|
|
67
|
+
# chunk class itself.
|
|
68
|
+
#
|
|
69
|
+
# A built-in chunker + `semantic_search` agent tool are scheduled
|
|
70
|
+
# for v5.1.
|
|
71
|
+
module EmbedManaged
|
|
72
|
+
# Raised when user code tries to assign directly to a vector
|
|
73
|
+
# property that's managed by an {.embed} declaration. The intent
|
|
74
|
+
# is to make it impossible to silently desync the stored vector
|
|
75
|
+
# from the digest — every write goes through the digest-tracked
|
|
76
|
+
# recompute path.
|
|
77
|
+
class ProtectedFieldError < StandardError; end
|
|
78
|
+
|
|
79
|
+
# Raised at class-declaration time when `embed` is called with
|
|
80
|
+
# arguments that can't produce a valid managed vector — missing
|
|
81
|
+
# source fields, unknown target, target without `:vector` type, or
|
|
82
|
+
# `:vector` property without `provider:` metadata.
|
|
83
|
+
class InvalidEmbedDeclaration < ArgumentError; end
|
|
84
|
+
|
|
85
|
+
# Internal: name of the Thread-local key under which the managed
|
|
86
|
+
# writer marks the symbol of the field it is currently writing.
|
|
87
|
+
# The guard module's setter checks this key to permit a single
|
|
88
|
+
# field write; the guard is otherwise closed.
|
|
89
|
+
WRITER_KEY = :parse_embed_managed_writer
|
|
90
|
+
|
|
91
|
+
# Frozen value-object capturing one `embed` declaration. Stored on
|
|
92
|
+
# the owning class under `embed_directives[into]` and passed to
|
|
93
|
+
# {EmbedManaged.recompute_embedding!} from the per-class
|
|
94
|
+
# before_save callback.
|
|
95
|
+
EmbedDirective = Struct.new(
|
|
96
|
+
:sources, :into, :digest_field, :input_type, :provider_name,
|
|
97
|
+
keyword_init: true,
|
|
98
|
+
) do
|
|
99
|
+
def freeze
|
|
100
|
+
sources.freeze
|
|
101
|
+
super
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# @!visibility private
|
|
106
|
+
def self.included(base)
|
|
107
|
+
base.extend(ClassMethods)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
module ClassMethods
|
|
111
|
+
# Per-class registry of {EmbedDirective}s keyed by target vector
|
|
112
|
+
# property symbol. Read by tests and tooling; written only by
|
|
113
|
+
# {#embed}.
|
|
114
|
+
def embed_directives
|
|
115
|
+
@embed_directives ||= {}
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Declare a managed embedding. See {EmbedManaged} for the
|
|
119
|
+
# full description.
|
|
120
|
+
#
|
|
121
|
+
# @param source_fields [Array<Symbol>] one or more scalar
|
|
122
|
+
# property names whose values are concatenated (joined with
|
|
123
|
+
# "\n\n", `nil` skipped) to form the embed input.
|
|
124
|
+
# @param into [Symbol] the `:vector` property to populate.
|
|
125
|
+
# Must already be declared with `provider:` metadata.
|
|
126
|
+
# @param input_type [Symbol] forwarded to
|
|
127
|
+
# {Parse::Embeddings::Provider#embed_text}. Defaults to
|
|
128
|
+
# `:search_document` (the write-side counterpart to
|
|
129
|
+
# `find_similar(text:)`'s `:search_query`).
|
|
130
|
+
# @param digest_field [Symbol, nil] override for the digest
|
|
131
|
+
# sibling property. Defaults to `:"#{into}_digest"`. Auto-
|
|
132
|
+
# declared as `:string` if not already declared.
|
|
133
|
+
# @return [Symbol] the target vector field name.
|
|
134
|
+
# @raise [InvalidEmbedDeclaration] on declaration-time misuse.
|
|
135
|
+
def embed(*source_fields, into:, input_type: :search_document, digest_field: nil)
|
|
136
|
+
if source_fields.empty?
|
|
137
|
+
raise InvalidEmbedDeclaration,
|
|
138
|
+
"#{self}.embed: at least one source field is required."
|
|
139
|
+
end
|
|
140
|
+
into = into.to_sym
|
|
141
|
+
unless vector_properties.key?(into)
|
|
142
|
+
raise InvalidEmbedDeclaration,
|
|
143
|
+
"#{self}.embed: `into: :#{into}` is not a declared :vector property " \
|
|
144
|
+
"(declared :vector fields: #{vector_properties.keys.inspect})."
|
|
145
|
+
end
|
|
146
|
+
provider_name = vector_properties.dig(into, :provider)
|
|
147
|
+
if provider_name.nil?
|
|
148
|
+
raise InvalidEmbedDeclaration,
|
|
149
|
+
"#{self}.embed: `into: :#{into}` has no `provider:` declared on its :vector " \
|
|
150
|
+
"property. Add `provider: :openai` (or another registered name) to the " \
|
|
151
|
+
"property declaration."
|
|
152
|
+
end
|
|
153
|
+
sources = source_fields.map(&:to_sym)
|
|
154
|
+
missing = sources.reject { |f| fields.key?(f) }
|
|
155
|
+
unless missing.empty?
|
|
156
|
+
raise InvalidEmbedDeclaration,
|
|
157
|
+
"#{self}.embed: source fields #{missing.inspect} are not declared on this class."
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
digest_field = (digest_field || :"#{into}_digest").to_sym
|
|
161
|
+
unless fields.key?(digest_field)
|
|
162
|
+
property digest_field, :string
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
directive = EmbedDirective.new(
|
|
166
|
+
sources: sources,
|
|
167
|
+
into: into,
|
|
168
|
+
digest_field: digest_field,
|
|
169
|
+
input_type: input_type,
|
|
170
|
+
provider_name: provider_name,
|
|
171
|
+
).freeze
|
|
172
|
+
embed_directives[into] = directive
|
|
173
|
+
|
|
174
|
+
callback_method = :"_auto_embed_#{into}!"
|
|
175
|
+
define_method(callback_method) do
|
|
176
|
+
Parse::Core::EmbedManaged.recompute_embedding!(self, directive)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
already_registered = _save_callbacks.any? do |cb|
|
|
180
|
+
cb.kind == :before && (cb.filter.to_sym rescue cb.filter) == callback_method
|
|
181
|
+
end
|
|
182
|
+
before_save callback_method unless already_registered
|
|
183
|
+
|
|
184
|
+
install_embed_writer_guard!(into, sources)
|
|
185
|
+
|
|
186
|
+
into
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# @!visibility private
|
|
190
|
+
# Prepend a module that intercepts the public `<into>=` setter
|
|
191
|
+
# and raises {ProtectedFieldError} unless the current thread has
|
|
192
|
+
# marked itself as the managed writer for this field.
|
|
193
|
+
def install_embed_writer_guard!(into, sources)
|
|
194
|
+
setter = :"#{into}="
|
|
195
|
+
guard = Module.new
|
|
196
|
+
field_sym = into
|
|
197
|
+
source_list = sources
|
|
198
|
+
guard.module_eval do
|
|
199
|
+
define_method(setter) do |val|
|
|
200
|
+
if Thread.current[Parse::Core::EmbedManaged::WRITER_KEY] == field_sym
|
|
201
|
+
super(val)
|
|
202
|
+
else
|
|
203
|
+
raise Parse::Core::EmbedManaged::ProtectedFieldError,
|
|
204
|
+
"#{self.class}##{field_sym} is managed by `embed` and cannot be " \
|
|
205
|
+
"assigned directly. Update source fields #{source_list.inspect} " \
|
|
206
|
+
"and save; the embedding will be recomputed automatically."
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
prepend(guard)
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# @!visibility private
|
|
215
|
+
# Run the managed-write path with the writer guard lifted for
|
|
216
|
+
# exactly one field. Restores the prior value of the Thread-local
|
|
217
|
+
# on exit so nested calls (and unrelated callers on the same
|
|
218
|
+
# thread) are unaffected.
|
|
219
|
+
def self.with_writer(field)
|
|
220
|
+
prev = Thread.current[WRITER_KEY]
|
|
221
|
+
Thread.current[WRITER_KEY] = field
|
|
222
|
+
yield
|
|
223
|
+
ensure
|
|
224
|
+
Thread.current[WRITER_KEY] = prev
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# @!visibility private
|
|
228
|
+
# before_save body. Computes the SHA-256 digest of the
|
|
229
|
+
# concatenated source-field values. If the digest matches the
|
|
230
|
+
# stored sibling AND the target vector is already populated, the
|
|
231
|
+
# method returns without contacting the provider. Otherwise it
|
|
232
|
+
# calls the provider, validates the response shape, wraps the
|
|
233
|
+
# vector, and writes both the vector and digest under the writer
|
|
234
|
+
# guard (so the public setters' dirty-tracking fires).
|
|
235
|
+
def self.recompute_embedding!(record, directive)
|
|
236
|
+
text = build_source_text(record, directive.sources)
|
|
237
|
+
stored_digest = record.public_send(directive.digest_field)
|
|
238
|
+
target_present = !record.public_send(directive.into).nil?
|
|
239
|
+
|
|
240
|
+
if text.empty?
|
|
241
|
+
if target_present || !stored_digest.nil?
|
|
242
|
+
with_writer(directive.into) do
|
|
243
|
+
record.public_send(:"#{directive.into}=", nil)
|
|
244
|
+
end
|
|
245
|
+
record.public_send(:"#{directive.digest_field}=", nil)
|
|
246
|
+
end
|
|
247
|
+
return
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
digest = digest_for(text)
|
|
251
|
+
return if stored_digest == digest && target_present
|
|
252
|
+
|
|
253
|
+
provider = Parse::Embeddings.provider(directive.provider_name)
|
|
254
|
+
vectors = provider.embed_text([text], input_type: directive.input_type)
|
|
255
|
+
unless vectors.is_a?(Array) && vectors.length == 1 && vectors.first.is_a?(Array)
|
|
256
|
+
raise Parse::Embeddings::InvalidResponseError,
|
|
257
|
+
"Parse::Core::EmbedManaged (#{record.class}##{directive.into}): provider " \
|
|
258
|
+
"#{directive.provider_name.inspect} did not return a single vector " \
|
|
259
|
+
"(got #{vectors.inspect[0, 80]})."
|
|
260
|
+
end
|
|
261
|
+
vector = Parse::Vector.new(vectors.first)
|
|
262
|
+
expected_dims = record.class.vector_properties.dig(directive.into, :dimensions)
|
|
263
|
+
if expected_dims && vector.dimensions != expected_dims
|
|
264
|
+
raise Parse::Embeddings::InvalidResponseError,
|
|
265
|
+
"Parse::Core::EmbedManaged (#{record.class}##{directive.into}): provider " \
|
|
266
|
+
"#{directive.provider_name.inspect} returned #{vector.dimensions}-dim vector " \
|
|
267
|
+
"but property declares dimensions: #{expected_dims}."
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
with_writer(directive.into) do
|
|
271
|
+
record.public_send(:"#{directive.into}=", vector)
|
|
272
|
+
end
|
|
273
|
+
record.public_send(:"#{directive.digest_field}=", digest)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# @!visibility private
|
|
277
|
+
# Concatenate source-field string values. `nil` and blank entries
|
|
278
|
+
# are skipped; remaining values are joined with a double newline.
|
|
279
|
+
# If every source is blank the result is the empty string, which
|
|
280
|
+
# the caller treats as "clear the embedding".
|
|
281
|
+
def self.build_source_text(record, sources)
|
|
282
|
+
sources.map { |f| record.public_send(f).to_s }
|
|
283
|
+
.reject(&:empty?)
|
|
284
|
+
.join("\n\n")
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# @!visibility private
|
|
288
|
+
# Truncated SHA-256 hex of the source text. 32 hex chars (128
|
|
289
|
+
# bits) is plenty for a non-cryptographic change detector and
|
|
290
|
+
# keeps the digest sibling field compact.
|
|
291
|
+
def self.digest_for(text)
|
|
292
|
+
Digest::SHA256.hexdigest(text)[0, 32]
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
end
|
|
@@ -139,14 +139,14 @@ module Parse
|
|
|
139
139
|
# If we successfully fetched data, ensure the object is not marked as deleted
|
|
140
140
|
@_deleted = false
|
|
141
141
|
|
|
142
|
-
#
|
|
142
|
+
# Post dirty fields and their local values BEFORE applying server data
|
|
143
143
|
dirty_fields = {}
|
|
144
144
|
if respond_to?(:changed)
|
|
145
145
|
begin
|
|
146
146
|
changed_attrs = changed
|
|
147
147
|
if changed_attrs.respond_to?(:each)
|
|
148
148
|
changed_attrs.each do |attr|
|
|
149
|
-
# Only
|
|
149
|
+
# Only post if object responds to the attribute getter
|
|
150
150
|
if respond_to?(attr)
|
|
151
151
|
begin
|
|
152
152
|
dirty_fields[attr.to_sym] = send(attr)
|
|
@@ -436,7 +436,7 @@ module Parse
|
|
|
436
436
|
!Parse::Properties::BASE_KEYS.include?(key) &&
|
|
437
437
|
respond_to?(:fetch)
|
|
438
438
|
|
|
439
|
-
#
|
|
439
|
+
# Post caller stack BEFORE mutex for better error tracebacks
|
|
440
440
|
# Filter out internal parse-stack frames to show where user code accessed the field
|
|
441
441
|
caller_stack = caller.reject { |frame| frame.include?("/lib/parse/") }
|
|
442
442
|
|
|
@@ -487,7 +487,7 @@ module Parse
|
|
|
487
487
|
# Prepares object for dirty tracking by fetching if needed.
|
|
488
488
|
# Must be called BEFORE will_change! to prevent autofetch from wiping dirty state.
|
|
489
489
|
#
|
|
490
|
-
# When will_change!
|
|
490
|
+
# When will_change! posts the old value by calling the getter, it may trigger
|
|
491
491
|
# autofetch if the object is a pointer. That autofetch calls clear_changes! which
|
|
492
492
|
# wipes the dirty tracking state will_change! is trying to set up.
|
|
493
493
|
#
|
|
@@ -115,36 +115,50 @@ module Parse
|
|
|
115
115
|
# `Parse::Role.users`, the reverse direction is often the
|
|
116
116
|
# heavier-used index.
|
|
117
117
|
#
|
|
118
|
-
# Uniqueness
|
|
119
|
-
#
|
|
120
|
-
#
|
|
121
|
-
#
|
|
122
|
-
#
|
|
123
|
-
# index directly via `Parse::MongoDB.create_index` or a later
|
|
124
|
-
# extension to this DSL.
|
|
118
|
+
# Uniqueness on a *single-direction* relation index is NOT
|
|
119
|
+
# supported — `unique: true` on just `owningId` (or just
|
|
120
|
+
# `relatedId`) would assert each owner can hold at most one
|
|
121
|
+
# related, contradicting `has_many`. That mistake is rejected at
|
|
122
|
+
# declaration time.
|
|
125
123
|
#
|
|
126
|
-
#
|
|
124
|
+
# `dedup: true` is semantically different and IS supported: it
|
|
125
|
+
# registers a compound `{owningId: 1, relatedId: 1}` unique index
|
|
126
|
+
# on the join collection. The compound key prevents duplicate
|
|
127
|
+
# `(owner, related)` pair rows from accumulating (a real failure
|
|
128
|
+
# mode under concurrent `.add` calls on a Parse Relation), without
|
|
129
|
+
# constraining how many distinct relateds an owner may hold or
|
|
130
|
+
# vice versa. Default off — the index buys correctness at the
|
|
131
|
+
# cost of a write-time uniqueness check on every relation insert,
|
|
132
|
+
# and existing collections with duplicate pairs will fail the
|
|
133
|
+
# migrator's apply step until reconciled.
|
|
134
|
+
#
|
|
135
|
+
# @example Canonical case — role membership with dedup
|
|
127
136
|
# class Parse::Role < Parse::Object
|
|
128
137
|
# has_many :users, through: :relation
|
|
129
|
-
# mongo_relation_index :users, bidirectional: true
|
|
138
|
+
# mongo_relation_index :users, bidirectional: true, dedup: true
|
|
130
139
|
# # creates: _Join:users:_Role { owningId: 1 }
|
|
131
140
|
# # _Join:users:_Role { relatedId: 1 }
|
|
141
|
+
# # _Join:users:_Role { owningId: 1, relatedId: 1 } unique
|
|
132
142
|
# end
|
|
133
143
|
#
|
|
134
144
|
# @param field [Symbol] the relation field name (must be declared
|
|
135
145
|
# via `has_many :field, through: :relation`)
|
|
136
146
|
# @param bidirectional [Boolean] when true, register two
|
|
137
147
|
# declarations — one each for owningId and relatedId
|
|
148
|
+
# @param dedup [Boolean] when true, also register a compound
|
|
149
|
+
# `{owningId: 1, relatedId: 1}` unique index that prevents
|
|
150
|
+
# duplicate-pair membership rows
|
|
151
|
+
# @param unique [Boolean] rejected — see above
|
|
138
152
|
# @raise [ArgumentError] when `field` is not a declared relation
|
|
139
|
-
# or `unique:` is passed
|
|
153
|
+
# or `unique:` is passed
|
|
140
154
|
# @return [Array<Hash>] the registered declarations
|
|
141
|
-
def mongo_relation_index(field, bidirectional: false, unique: false)
|
|
155
|
+
def mongo_relation_index(field, bidirectional: false, dedup: false, unique: false)
|
|
142
156
|
if unique
|
|
143
157
|
raise ArgumentError,
|
|
144
158
|
"#{self}.mongo_relation_index does not support unique: — uniqueness on " \
|
|
145
|
-
"a single-direction relation column breaks has_many semantics.
|
|
146
|
-
"
|
|
147
|
-
"
|
|
159
|
+
"a single-direction relation column breaks has_many semantics. Use " \
|
|
160
|
+
"`dedup: true` for a compound `{owningId, relatedId}` unique index that " \
|
|
161
|
+
"prevents duplicate-pair membership without constraining cardinality."
|
|
148
162
|
end
|
|
149
163
|
field = field.to_sym
|
|
150
164
|
unless respond_to?(:relations) && relations.key?(field)
|
|
@@ -155,6 +169,9 @@ module Parse
|
|
|
155
169
|
join_collection = "_Join:#{field}:#{parse_class}"
|
|
156
170
|
decls = [register_relation_index(join_collection, "owningId", source: field)]
|
|
157
171
|
decls << register_relation_index(join_collection, "relatedId", source: field) if bidirectional
|
|
172
|
+
if dedup
|
|
173
|
+
decls << register_relation_dedup_index(join_collection, source: field)
|
|
174
|
+
end
|
|
158
175
|
decls
|
|
159
176
|
end
|
|
160
177
|
|
|
@@ -242,6 +259,28 @@ module Parse
|
|
|
242
259
|
decl
|
|
243
260
|
end
|
|
244
261
|
|
|
262
|
+
# Register the compound `{owningId: 1, relatedId: 1}` unique index
|
|
263
|
+
# on a relation join collection — the dedup form of
|
|
264
|
+
# `mongo_relation_index`. Compound uniqueness on both columns
|
|
265
|
+
# together is the *correctness* form: it forbids duplicate
|
|
266
|
+
# `(owner, related)` pair rows from accumulating without
|
|
267
|
+
# constraining how many distinct relateds an owner may hold.
|
|
268
|
+
# That is semantically different from `unique:` on a single
|
|
269
|
+
# column (which `mongo_relation_index` continues to reject).
|
|
270
|
+
def register_relation_dedup_index(collection, source:)
|
|
271
|
+
decl = {
|
|
272
|
+
keys: { "owningId" => 1, "relatedId" => 1 }.freeze,
|
|
273
|
+
options: { unique: true }.freeze,
|
|
274
|
+
declared_for: [source].freeze,
|
|
275
|
+
collection: collection,
|
|
276
|
+
}.freeze
|
|
277
|
+
if mongo_index_declarations.any? { |d| d[:keys] == decl[:keys] && d[:options] == decl[:options] && d[:collection] == collection }
|
|
278
|
+
return decl
|
|
279
|
+
end
|
|
280
|
+
mongo_index_declarations << decl
|
|
281
|
+
decl
|
|
282
|
+
end
|
|
283
|
+
|
|
245
284
|
# Translate a property symbol to the wire-format column name a
|
|
246
285
|
# MongoDB index must reference. Pointer fields (declared via
|
|
247
286
|
# `belongs_to`) live in Mongo at `_p_<field>` and the SDK already
|
|
@@ -10,8 +10,8 @@ module Parse
|
|
|
10
10
|
# subclasses. When `parse_reference` is declared on a class, every newly-
|
|
11
11
|
# created instance gets a string field auto-populated with the canonical
|
|
12
12
|
# `"ClassName$objectId"` form via an `after_create` callback. The value
|
|
13
|
-
# mirrors Parse Server's internal pointer-column format (`
|
|
14
|
-
# `"
|
|
13
|
+
# mirrors Parse Server's internal pointer-column format (`_p_workspace` ->
|
|
14
|
+
# `"Workspace$xyz"`), which makes direct MongoDB queries, `$lookup` joins, and
|
|
15
15
|
# cross-class analytics trivial: a single equality match on one column.
|
|
16
16
|
#
|
|
17
17
|
# Mechanics:
|
|
@@ -131,7 +131,7 @@ module Parse
|
|
|
131
131
|
extend ActiveSupport::Concern
|
|
132
132
|
|
|
133
133
|
# The separator between class name and object id. Matches Parse Server's
|
|
134
|
-
# own pointer-column format (e.g. `
|
|
134
|
+
# own pointer-column format (e.g. `_p_workspace = "Workspace$abcd1234"`).
|
|
135
135
|
SEPARATOR = "$".freeze
|
|
136
136
|
|
|
137
137
|
# Length of a Parse Server objectId. Matches the format the server itself
|
|
@@ -19,7 +19,7 @@ module Parse
|
|
|
19
19
|
# supported in Parse and mapping them between their remote names with their local ruby named attributes.
|
|
20
20
|
module Properties
|
|
21
21
|
# These are the base types supported by Parse.
|
|
22
|
-
TYPES = [:string, :relation, :integer, :float, :boolean, :date, :array, :file, :geopoint, :polygon, :bytes, :object, :acl, :timezone, :phone, :email].freeze
|
|
22
|
+
TYPES = [:string, :relation, :integer, :float, :boolean, :date, :array, :file, :geopoint, :polygon, :bytes, :object, :acl, :timezone, :phone, :email, :vector].freeze
|
|
23
23
|
# These are the base mappings of the remote field name types.
|
|
24
24
|
BASE = { objectId: :string, createdAt: :date, updatedAt: :date, ACL: :acl }.freeze
|
|
25
25
|
# The list of properties that are part of all objects
|
|
@@ -135,6 +135,18 @@ module Parse
|
|
|
135
135
|
@property_enum_descriptions ||= {}
|
|
136
136
|
end
|
|
137
137
|
|
|
138
|
+
# @return [Hash] per-property metadata for `:vector`-typed fields.
|
|
139
|
+
# Maps property names (symbols) to a frozen options hash:
|
|
140
|
+
# `{ dimensions: Integer, provider: Symbol, model: String, similarity: Symbol }`.
|
|
141
|
+
# `dimensions:` is required; the rest are optional and only carry
|
|
142
|
+
# meaning for the embedding provider plumbing layered above this
|
|
143
|
+
# type. Consumed by `Parse::Embeddings` and
|
|
144
|
+
# `Parse::AtlasSearch::IndexCatalog` to resolve which vector index
|
|
145
|
+
# to query for a given field.
|
|
146
|
+
def vector_properties
|
|
147
|
+
@vector_properties ||= {}
|
|
148
|
+
end
|
|
149
|
+
|
|
138
150
|
# Set the property fields for this class.
|
|
139
151
|
# @return [Hash]
|
|
140
152
|
def attributes=(hash)
|
|
@@ -318,6 +330,44 @@ module Parse
|
|
|
318
330
|
end # validates_each
|
|
319
331
|
end # data_type == :phone
|
|
320
332
|
|
|
333
|
+
# vector datatypes capture per-property embedding metadata
|
|
334
|
+
# (dimensions, provider, model, similarity) and validate that
|
|
335
|
+
# any assigned value matches the declared `dimensions:`.
|
|
336
|
+
# `dimensions:` is required at declaration time — the field
|
|
337
|
+
# cannot be safely indexed or compared against a query vector
|
|
338
|
+
# without it.
|
|
339
|
+
if data_type == :vector
|
|
340
|
+
dims = opts[:dimensions] || opts[:dims]
|
|
341
|
+
unless dims.is_a?(Integer) && dims > 0
|
|
342
|
+
raise ArgumentError,
|
|
343
|
+
"Property #{self}##{key} :vector requires `dimensions:` as a positive Integer."
|
|
344
|
+
end
|
|
345
|
+
if dims > Parse::Vector::MAX_DIMENSIONS
|
|
346
|
+
raise ArgumentError,
|
|
347
|
+
"Property #{self}##{key} :vector dimensions #{dims} exceeds max " \
|
|
348
|
+
"#{Parse::Vector::MAX_DIMENSIONS}."
|
|
349
|
+
end
|
|
350
|
+
vector_properties[key] = {
|
|
351
|
+
dimensions: dims,
|
|
352
|
+
provider: opts[:provider],
|
|
353
|
+
model: opts[:model],
|
|
354
|
+
similarity: opts[:similarity],
|
|
355
|
+
}.freeze
|
|
356
|
+
|
|
357
|
+
validates_each key do |record, attribute, value|
|
|
358
|
+
next if value.nil?
|
|
359
|
+
unless value.is_a?(Parse::Vector)
|
|
360
|
+
record.errors.add(attribute, "field :#{attribute} must be a Parse::Vector.")
|
|
361
|
+
else
|
|
362
|
+
expected = record.class.vector_properties.dig(attribute, :dimensions)
|
|
363
|
+
if expected && value.dimensions != expected
|
|
364
|
+
record.errors.add(attribute,
|
|
365
|
+
"field :#{attribute} expected #{expected} dimensions, got #{value.dimensions}.")
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
end # validates_each
|
|
369
|
+
end # data_type == :vector
|
|
370
|
+
|
|
321
371
|
# email datatypes validate email format.
|
|
322
372
|
if data_type == :email
|
|
323
373
|
validates_each key do |record, attribute, value|
|
|
@@ -794,6 +844,25 @@ module Parse
|
|
|
794
844
|
val = Parse::Phone.new(val) if val.present?
|
|
795
845
|
when :email
|
|
796
846
|
val = Parse::Email.new(val) if val.present?
|
|
847
|
+
when :vector
|
|
848
|
+
# nil/blank → unset; coerce Arrays (and pass-through Parse::Vector)
|
|
849
|
+
# to a Parse::Vector, which validates that every element is a
|
|
850
|
+
# finite Numeric. Dimension mismatch is reported via
|
|
851
|
+
# validates_each so callers can rescue ActiveModel::ValidationError
|
|
852
|
+
# at save time rather than at every assignment; raising here
|
|
853
|
+
# would break partial hydration where the dimension class-level
|
|
854
|
+
# declaration may not yet be loaded.
|
|
855
|
+
if val.nil?
|
|
856
|
+
val = nil
|
|
857
|
+
elsif val.is_a?(Parse::Vector)
|
|
858
|
+
val = val
|
|
859
|
+
elsif val.is_a?(Array)
|
|
860
|
+
val = Parse::Vector.new(val)
|
|
861
|
+
else
|
|
862
|
+
raise ArgumentError,
|
|
863
|
+
"Property #{self.class}##{key} :vector requires an Array or Parse::Vector " \
|
|
864
|
+
"(got #{val.class})."
|
|
865
|
+
end
|
|
797
866
|
else
|
|
798
867
|
# You can provide a specific class instead of a symbol format
|
|
799
868
|
if data_type.respond_to?(:typecast)
|
|
@@ -215,6 +215,21 @@ module Parse
|
|
|
215
215
|
prepared_query.results
|
|
216
216
|
end
|
|
217
217
|
|
|
218
|
+
# Convenience wrapper around {.all} that runs the query under a
|
|
219
|
+
# caller-supplied session token. Equivalent to passing
|
|
220
|
+
# `session_token:` in the constraints hash, surfaced as a named
|
|
221
|
+
# kwarg so client-mode callers don't have to remember the
|
|
222
|
+
# constraint-key form. Returns nil if `token` is blank.
|
|
223
|
+
# @param token [String, Parse::User, Parse::Session] session token,
|
|
224
|
+
# user instance, or session instance.
|
|
225
|
+
# @param constraints (see .all)
|
|
226
|
+
# @return [Array<Parse::Object>]
|
|
227
|
+
def all_as(token, constraints = { limit: :max }, &block)
|
|
228
|
+
tok = token.respond_to?(:session_token) ? token.session_token : token
|
|
229
|
+
return nil if tok.nil? || tok.to_s.empty?
|
|
230
|
+
all(constraints.merge(session_token: tok), &block)
|
|
231
|
+
end
|
|
232
|
+
|
|
218
233
|
# Returns the first item matching the constraint.
|
|
219
234
|
# @overload first(count = 1)
|
|
220
235
|
# @param count [Interger] The number of items to return.
|
|
@@ -239,6 +254,23 @@ module Parse
|
|
|
239
254
|
return res.first fetch_count
|
|
240
255
|
end
|
|
241
256
|
|
|
257
|
+
# Convenience wrapper around {.first} that runs the query under a
|
|
258
|
+
# caller-supplied session token.
|
|
259
|
+
# @param token [String, Parse::User, Parse::Session] session token,
|
|
260
|
+
# user instance, or session instance.
|
|
261
|
+
# @param constraints (see .first)
|
|
262
|
+
# @return [Parse::Object, Array<Parse::Object>, nil]
|
|
263
|
+
def first_as(token, constraints = {})
|
|
264
|
+
tok = token.respond_to?(:session_token) ? token.session_token : token
|
|
265
|
+
return nil if tok.nil? || tok.to_s.empty?
|
|
266
|
+
if constraints.is_a?(Numeric)
|
|
267
|
+
# `first(2)` shape — surface kwarg via a synthetic constraints hash
|
|
268
|
+
first({ limit: constraints.to_i, session_token: tok })
|
|
269
|
+
else
|
|
270
|
+
first(constraints.merge(session_token: tok))
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
242
274
|
# Returns the most recently created object (ordered by created_at descending).
|
|
243
275
|
# @overload latest(count = 1)
|
|
244
276
|
# @param count [Integer] The number of items to return.
|
|
@@ -407,6 +439,13 @@ module Parse
|
|
|
407
439
|
# @see Parse::LiveQuery::Subscription
|
|
408
440
|
# @see Parse::Query#subscribe
|
|
409
441
|
def subscribe(where: {}, fields: nil, session_token: nil, client: nil)
|
|
442
|
+
# Fall through to the ambient set by `Parse.with_session` / `Parse.login`
|
|
443
|
+
# so a caller wrapping a region with `with_session(user) { Klass.subscribe ... }`
|
|
444
|
+
# gets an ACL-aware subscription without re-threading the token.
|
|
445
|
+
if session_token.nil?
|
|
446
|
+
ambient = Parse.current_session_token
|
|
447
|
+
session_token = ambient if ambient.is_a?(String) && !ambient.empty?
|
|
448
|
+
end
|
|
410
449
|
query(where).subscribe(fields: fields, session_token: session_token, client: client)
|
|
411
450
|
end
|
|
412
451
|
|
|
@@ -430,7 +469,7 @@ module Parse
|
|
|
430
469
|
# - false - completely bypass cache (no read or write)
|
|
431
470
|
# @return [Parse::Object] if only one id was provided as a parameter.
|
|
432
471
|
# @return [Array<Parse::Object>] if more than one id was provided as a parameter.
|
|
433
|
-
def find(*parse_ids, type: :parallel, compact: true, cache: nil)
|
|
472
|
+
def find(*parse_ids, type: :parallel, compact: true, cache: nil, session_token: nil, use_master_key: nil)
|
|
434
473
|
# flatten the list of Object ids.
|
|
435
474
|
parse_ids.flatten!
|
|
436
475
|
parse_ids.compact!
|
|
@@ -446,11 +485,28 @@ module Parse
|
|
|
446
485
|
|
|
447
486
|
# Extract cache option for client requests
|
|
448
487
|
client_opts = { cache: cache }
|
|
488
|
+
# Forward session-token / use_master_key when supplied so client-mode
|
|
489
|
+
# callers can scope a `.find` to a logged-in user without dropping
|
|
490
|
+
# down to the raw `client.fetch_object` form.
|
|
491
|
+
client_opts[:session_token] = session_token unless session_token.nil?
|
|
492
|
+
client_opts[:use_master_key] = use_master_key unless use_master_key.nil?
|
|
493
|
+
# The parallel path spawns worker threads via `Parallel.map`. Worker
|
|
494
|
+
# threads don't inherit fiber-local storage from the calling thread,
|
|
495
|
+
# so `Parse.current_session_token` resolved inside the worker would
|
|
496
|
+
# be nil even when the caller is inside a `Parse.with_session(...)`
|
|
497
|
+
# block. Snapshot the ambient here (in the calling thread) and pass
|
|
498
|
+
# it explicitly so each worker sends the right auth.
|
|
499
|
+
if !client_opts.key?(:session_token)
|
|
500
|
+
ambient = Parse.current_session_token
|
|
501
|
+
client_opts[:session_token] = ambient if ambient.is_a?(String) && !ambient.empty?
|
|
502
|
+
end
|
|
449
503
|
|
|
450
504
|
if type == :batch
|
|
451
505
|
# use a .in query with the given id as a list
|
|
452
506
|
query = self.class.query(:id.in => parse_ids)
|
|
453
507
|
query.cache = cache
|
|
508
|
+
query.session_token = session_token unless session_token.nil?
|
|
509
|
+
query.use_master_key = use_master_key unless use_master_key.nil?
|
|
454
510
|
results = query.results
|
|
455
511
|
else
|
|
456
512
|
# use Parallel to make multiple threaded requests for finding these objects.
|