parse-stack-next 4.5.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/release.yml +32 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/.vscode/settings.json +3 -0
- data/CHANGELOG.md +305 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +88 -0
- data/lib/parse/cache/redis.rb +249 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/create_lock.rb +14 -2
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +77 -5
- data/parse-stack.png +0 -0
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative "pipeline_security"
|
|
5
|
+
require_relative "acl_scope"
|
|
6
|
+
require_relative "clp_scope"
|
|
7
|
+
require_relative "mongodb"
|
|
8
|
+
|
|
9
|
+
module Parse
|
|
10
|
+
# Atlas Vector Search entry point. Routes through `Parse::MongoDB`
|
|
11
|
+
# rather than Parse Server's REST aggregate (REST aggregate is master-
|
|
12
|
+
# key-only and bypasses ACL/CLP — see CLAUDE.md).
|
|
13
|
+
#
|
|
14
|
+
# v5.0 ships the low-level surface only:
|
|
15
|
+
#
|
|
16
|
+
# Parse::VectorSearch.search(
|
|
17
|
+
# "WikiArticle",
|
|
18
|
+
# field: :embedding,
|
|
19
|
+
# query_vector: vec,
|
|
20
|
+
# k: 10,
|
|
21
|
+
# index: "WikiArticle_embedding_voyage_multimodal_3_1024_idx",
|
|
22
|
+
# session_token: token,
|
|
23
|
+
# )
|
|
24
|
+
#
|
|
25
|
+
# The high-level `Class.find_similar(text: …)` wrapper and the
|
|
26
|
+
# `:vector` property type land later in the v5.0 cycle. This module
|
|
27
|
+
# is callable today against any collection that has a queryable
|
|
28
|
+
# `vectorSearch` index — including the `vector_prototype.Movie`
|
|
29
|
+
# fixture in `scripts/vector_prototype/`.
|
|
30
|
+
#
|
|
31
|
+
# == Stage 0 invariant
|
|
32
|
+
#
|
|
33
|
+
# Atlas refuses any pipeline whose stage 0 is not `$vectorSearch`,
|
|
34
|
+
# `$search`, or `$searchMeta`. The module therefore bypasses
|
|
35
|
+
# `Parse::MongoDB.aggregate` (which prepends an ACL `$match` at
|
|
36
|
+
# stage 0) and reproduces the SDK-side enforcement chain inline —
|
|
37
|
+
# ACL `$match` is appended AFTER `$vectorSearch`, mirroring
|
|
38
|
+
# `Parse::AtlasSearch.search`.
|
|
39
|
+
#
|
|
40
|
+
# == ACL / CLP enforcement
|
|
41
|
+
#
|
|
42
|
+
# Identity is resolved through {Parse::ACLScope.resolve!}, so the
|
|
43
|
+
# same kwargs accepted by mongo-direct paths are honored here:
|
|
44
|
+
# `session_token:`, `master: true`, `acl_user:`, `acl_role:`. The
|
|
45
|
+
# resolution drives:
|
|
46
|
+
#
|
|
47
|
+
# * CLP `find` boundary check — refuses calls the equivalent REST
|
|
48
|
+
# find would refuse.
|
|
49
|
+
# * Optional `pointerFields` post-filter — drops rows that don't
|
|
50
|
+
# name the current user_id in the configured pointer fields.
|
|
51
|
+
# * Post-`$vectorSearch` ACL `$match` injection (Parse Server's
|
|
52
|
+
# `_rperm` predicate).
|
|
53
|
+
# * Post-fetch `protectedFields` redaction.
|
|
54
|
+
#
|
|
55
|
+
# `master: true` bypasses ACL/CLP injection (matches the standard
|
|
56
|
+
# mongo-direct semantics). The unconditional
|
|
57
|
+
# {Parse::PipelineSecurity.strip_internal_fields} pass runs on
|
|
58
|
+
# every result row regardless of mode, so `_hashed_password` and
|
|
59
|
+
# friends never appear in returned documents.
|
|
60
|
+
module VectorSearch
|
|
61
|
+
# Raised when the caller's query vector has the wrong shape.
|
|
62
|
+
# Inherits from `ArgumentError` so callers can rescue uniformly
|
|
63
|
+
# alongside the other bad-input `ArgumentError`s raised inline by
|
|
64
|
+
# {.search} (bad k, bad field, bad num_candidates).
|
|
65
|
+
class InvalidQueryVector < ArgumentError; end
|
|
66
|
+
|
|
67
|
+
# Raised when the module is called but `Parse::MongoDB` is not
|
|
68
|
+
# configured.
|
|
69
|
+
class NotAvailable < StandardError; end
|
|
70
|
+
|
|
71
|
+
# Raised when a `Parse::Query` constraint is built against a
|
|
72
|
+
# declared `:vector` property using an operator other than the
|
|
73
|
+
# narrow allow-list (`:exists`, `:null`). Vector fields are dense
|
|
74
|
+
# numeric arrays — equality, range, `$in`, and friends will either
|
|
75
|
+
# return nonsense or do something the caller did not intend. The
|
|
76
|
+
# right way to query a `:vector` is {Parse::Core::VectorSearchable#find_similar},
|
|
77
|
+
# which routes through Atlas `$vectorSearch`. Inherits from
|
|
78
|
+
# {ArgumentError} so it joins {InvalidQueryVector} and the inline
|
|
79
|
+
# bad-input raises in a single rescue boundary.
|
|
80
|
+
class ConstraintNotSupported < ArgumentError; end
|
|
81
|
+
|
|
82
|
+
# Hard cap on query-vector dimensions to bound validator work and
|
|
83
|
+
# to refuse obvious garbage (the largest production-grade model
|
|
84
|
+
# today, Voyage `voyage-multimodal-3`, is 1024-dim; OpenAI
|
|
85
|
+
# `text-embedding-3-large` is 3072-dim).
|
|
86
|
+
MAX_DIMENSIONS = 8192
|
|
87
|
+
|
|
88
|
+
# Hard cap on `limit` (k). Atlas itself caps `$vectorSearch.limit`
|
|
89
|
+
# at 10_000 but practical RAG workloads stay well below that;
|
|
90
|
+
# tighter cap here keeps a runaway caller from materializing a
|
|
91
|
+
# huge result set client-side.
|
|
92
|
+
MAX_K = 1000
|
|
93
|
+
|
|
94
|
+
# Default `numCandidates` multiplier when the caller doesn't pass
|
|
95
|
+
# one. Atlas's guidance: numCandidates ≥ 10 × limit, ≤ 10_000.
|
|
96
|
+
DEFAULT_NUM_CANDIDATES_MULTIPLIER = 20
|
|
97
|
+
|
|
98
|
+
class << self
|
|
99
|
+
# Low-level `$vectorSearch` entry point.
|
|
100
|
+
#
|
|
101
|
+
# @param collection_name [String] Parse class name / Mongo
|
|
102
|
+
# collection name. Treated as a literal collection name; no
|
|
103
|
+
# property-type lookup happens at this layer.
|
|
104
|
+
# @param field [String, Symbol] vector field path inside the
|
|
105
|
+
# document. Must match `path:` on the Atlas index definition.
|
|
106
|
+
# @param query_vector [Array<Float>] the query embedding.
|
|
107
|
+
# @param k [Integer] number of hits to return. Capped at
|
|
108
|
+
# {MAX_K}.
|
|
109
|
+
# @param num_candidates [Integer, nil] Atlas's HNSW search
|
|
110
|
+
# width. Defaults to `k * DEFAULT_NUM_CANDIDATES_MULTIPLIER`.
|
|
111
|
+
# @param filter [Hash, nil] additional post-`$vectorSearch`
|
|
112
|
+
# match (validated by {Parse::PipelineSecurity.validate_filter!}).
|
|
113
|
+
# For pre-search filtering use `vector_filter:`.
|
|
114
|
+
# @param vector_filter [Hash, nil] Atlas-native pre-search
|
|
115
|
+
# filter, injected into `$vectorSearch.filter`. Atlas requires
|
|
116
|
+
# the referenced fields be declared as `type: "filter"` in the
|
|
117
|
+
# index definition. Validated by
|
|
118
|
+
# {Parse::PipelineSecurity.validate_filter!}.
|
|
119
|
+
# @param index [String, nil] Atlas vectorSearch index name. If
|
|
120
|
+
# nil, falls back to {.default_index}.
|
|
121
|
+
# @param session_token [String, nil] session token for ACL/CLP
|
|
122
|
+
# resolution via {Parse::ACLScope.resolve!}.
|
|
123
|
+
# @param master [Boolean] explicit master-key opt-in; bypasses
|
|
124
|
+
# ACL/CLP enforcement.
|
|
125
|
+
# @param acl_user [Parse::User, Parse::Pointer, nil] pre-resolved
|
|
126
|
+
# user pointer for ACL scoping.
|
|
127
|
+
# @param acl_role [String, Parse::Role, nil] role-only scope.
|
|
128
|
+
# @param max_time_ms [Integer, nil] server-side timeout.
|
|
129
|
+
# @return [Array<Hash>] raw result documents. Each row includes
|
|
130
|
+
# `_vscore` (the Atlas vectorSearchScore — projected under
|
|
131
|
+
# `_vscore` rather than `_score` so hybrid pipelines with
|
|
132
|
+
# Atlas Search don't collide on the same key).
|
|
133
|
+
def search(collection_name, field:, query_vector:, k: 10,
|
|
134
|
+
num_candidates: nil, filter: nil, vector_filter: nil,
|
|
135
|
+
index: nil, max_time_ms: nil, **scope_opts)
|
|
136
|
+
require_available!
|
|
137
|
+
index_name = (index || @default_index)
|
|
138
|
+
if index_name.nil? || index_name.to_s.empty?
|
|
139
|
+
raise ArgumentError,
|
|
140
|
+
"Parse::VectorSearch.search requires index: (or set Parse::VectorSearch.default_index)."
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# `Parse::ACLScope.resolve!` mutates the options hash by deleting
|
|
144
|
+
# auth kwargs. Pass a fresh hash so we don't accidentally drop
|
|
145
|
+
# caller kwargs and so `resolve!` can refuse 2-of-N combinations.
|
|
146
|
+
resolution = Parse::ACLScope.resolve!(scope_opts, method_name: :"VectorSearch.search")
|
|
147
|
+
|
|
148
|
+
path = field.to_s
|
|
149
|
+
if path.empty? || path.start_with?("$") || path.include?(".")
|
|
150
|
+
raise ArgumentError,
|
|
151
|
+
"field: must be a non-empty, non-$-prefixed, non-dotted field name."
|
|
152
|
+
end
|
|
153
|
+
if Parse::PipelineSecurity::INTERNAL_FIELDS_DENYLIST.include?(path) ||
|
|
154
|
+
path.start_with?("_auth_data_")
|
|
155
|
+
raise ArgumentError,
|
|
156
|
+
"field: refuses internal/sensitive field path #{path.inspect}."
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
k_int = Integer(k)
|
|
160
|
+
if k_int <= 0 || k_int > MAX_K
|
|
161
|
+
raise ArgumentError, "k must be in 1..#{MAX_K} (got #{k_int})."
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
num_candidates_int = (num_candidates || (k_int * DEFAULT_NUM_CANDIDATES_MULTIPLIER)).to_i
|
|
165
|
+
if num_candidates_int < k_int
|
|
166
|
+
raise ArgumentError, "num_candidates (#{num_candidates_int}) must be >= k (#{k_int})."
|
|
167
|
+
end
|
|
168
|
+
if num_candidates_int > 10_000
|
|
169
|
+
raise ArgumentError, "num_candidates capped at 10000 by Atlas (got #{num_candidates_int})."
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
validated_vector = validate_query_vector!(query_vector)
|
|
173
|
+
|
|
174
|
+
Parse::PipelineSecurity.validate_filter!(filter) if filter
|
|
175
|
+
Parse::PipelineSecurity.validate_filter!(vector_filter) if vector_filter
|
|
176
|
+
|
|
177
|
+
# CLP `find` boundary + pointerFields. Mirrors
|
|
178
|
+
# `Parse::AtlasSearch.search` — without this, a scoped caller
|
|
179
|
+
# could issue $vectorSearch against a collection whose CLP
|
|
180
|
+
# would refuse them on the equivalent REST find.
|
|
181
|
+
assert_clp_find!(collection_name, resolution)
|
|
182
|
+
pointer_fields = resolve_pointer_fields!(collection_name, resolution)
|
|
183
|
+
protected_fields = Parse::CLPScope.protected_fields_for(
|
|
184
|
+
collection_name, resolution.permission_strings,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
vs_stage = {
|
|
188
|
+
"index" => index_name.to_s,
|
|
189
|
+
"path" => path,
|
|
190
|
+
"queryVector" => validated_vector,
|
|
191
|
+
"numCandidates" => num_candidates_int,
|
|
192
|
+
"limit" => k_int,
|
|
193
|
+
}
|
|
194
|
+
vs_stage["filter"] = vector_filter if vector_filter && !vector_filter.empty?
|
|
195
|
+
pipeline = [{ "$vectorSearch" => vs_stage }]
|
|
196
|
+
|
|
197
|
+
pipeline << {
|
|
198
|
+
"$addFields" => { "_vscore" => { "$meta" => "vectorSearchScore" } },
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# Inject ACL $match AFTER $vectorSearch + the score projection
|
|
202
|
+
# but BEFORE the caller-supplied filter, so the user-controlled
|
|
203
|
+
# filter cannot exfiltrate restricted documents that passed the
|
|
204
|
+
# $vectorSearch operator. NOTE: Atlas's `$vectorSearch.filter`
|
|
205
|
+
# (the pre-filter) cannot enforce ACL here because `_rperm`
|
|
206
|
+
# would need to be declared as `type: "filter"` in the index
|
|
207
|
+
# definition — out of scope at the SDK layer. The post-stage
|
|
208
|
+
# `$match` is the enforcement boundary.
|
|
209
|
+
unless resolution.master?
|
|
210
|
+
acl_match = Parse::ACLScope.match_stage_for(resolution)
|
|
211
|
+
pipeline << acl_match if acl_match
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
pipeline << { "$match" => filter } if filter
|
|
215
|
+
|
|
216
|
+
raw_results = run_pipeline!(collection_name, pipeline, max_time_ms: max_time_ms)
|
|
217
|
+
|
|
218
|
+
# Post-fetch enforcement: walk the rows the same way
|
|
219
|
+
# Parse::MongoDB.aggregate would. Master mode skips every
|
|
220
|
+
# redaction layer (matches the helper's behavior).
|
|
221
|
+
unless resolution.master?
|
|
222
|
+
Parse::ACLScope.redact_results!(raw_results, resolution)
|
|
223
|
+
Parse::CLPScope.redact_protected_fields!(raw_results, protected_fields) if protected_fields.any?
|
|
224
|
+
if pointer_fields
|
|
225
|
+
raw_results = Parse::CLPScope.filter_by_pointer_fields(
|
|
226
|
+
raw_results, pointer_fields, resolution.user_id,
|
|
227
|
+
)
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Internal-fields denylist is the process-level floor: runs in
|
|
232
|
+
# every mode, master included, so `_hashed_password` /
|
|
233
|
+
# `_session_token` can never surface through this entry point.
|
|
234
|
+
raw_results.map! { |doc| Parse::PipelineSecurity.strip_internal_fields(doc) }
|
|
235
|
+
raw_results
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Validate a query vector. Public so callers (and tests) can
|
|
239
|
+
# invoke it independently of {.search}.
|
|
240
|
+
#
|
|
241
|
+
# @param vec [Array<Float>] candidate query vector.
|
|
242
|
+
# @param dimensions [Integer, nil] expected length; nil to skip
|
|
243
|
+
# the length check.
|
|
244
|
+
# @return [Array<Float>] the vector, coerced to Float and
|
|
245
|
+
# frozen.
|
|
246
|
+
# @raise [InvalidQueryVector] on bad shape, infinite, or NaN
|
|
247
|
+
# values.
|
|
248
|
+
def validate_query_vector!(vec, dimensions: nil)
|
|
249
|
+
unless vec.is_a?(Array)
|
|
250
|
+
raise InvalidQueryVector, "query_vector must be an Array (got #{vec.class})."
|
|
251
|
+
end
|
|
252
|
+
if vec.empty?
|
|
253
|
+
raise InvalidQueryVector, "query_vector cannot be empty."
|
|
254
|
+
end
|
|
255
|
+
if vec.length > MAX_DIMENSIONS
|
|
256
|
+
raise InvalidQueryVector,
|
|
257
|
+
"query_vector length #{vec.length} exceeds MAX_DIMENSIONS=#{MAX_DIMENSIONS}."
|
|
258
|
+
end
|
|
259
|
+
if dimensions && vec.length != dimensions
|
|
260
|
+
raise InvalidQueryVector,
|
|
261
|
+
"query_vector length #{vec.length} != declared dimensions #{dimensions}."
|
|
262
|
+
end
|
|
263
|
+
out = Array.new(vec.length)
|
|
264
|
+
vec.each_with_index do |v, i|
|
|
265
|
+
unless v.is_a?(Numeric)
|
|
266
|
+
raise InvalidQueryVector, "query_vector[#{i}] is not numeric (#{v.class})."
|
|
267
|
+
end
|
|
268
|
+
f = v.to_f
|
|
269
|
+
unless f.finite?
|
|
270
|
+
raise InvalidQueryVector, "query_vector[#{i}] is not finite (#{v.inspect})."
|
|
271
|
+
end
|
|
272
|
+
out[i] = f
|
|
273
|
+
end
|
|
274
|
+
out.freeze
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# @!attribute [rw] default_index
|
|
278
|
+
# Optional fallback for {.search}'s `index:` keyword.
|
|
279
|
+
# @return [String, nil]
|
|
280
|
+
attr_accessor :default_index
|
|
281
|
+
|
|
282
|
+
private
|
|
283
|
+
|
|
284
|
+
def require_available!
|
|
285
|
+
Parse::MongoDB.require_gem!
|
|
286
|
+
unless Parse::MongoDB.available?
|
|
287
|
+
raise NotAvailable,
|
|
288
|
+
"Parse::VectorSearch requires Parse::MongoDB.configure(enabled: true)."
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# CLP `find` boundary check. Master-mode skips; for every other
|
|
293
|
+
# scope, refuse the call when the resolved claim set can't
|
|
294
|
+
# `find` on the collection. Mirrors `Parse::AtlasSearch.search`.
|
|
295
|
+
def assert_clp_find!(collection_name, resolution)
|
|
296
|
+
return if resolution.nil? || resolution.master?
|
|
297
|
+
unless Parse::CLPScope.permits?(collection_name, :find, resolution.permission_strings)
|
|
298
|
+
raise Parse::CLPScope::Denied.new(
|
|
299
|
+
collection_name, :find,
|
|
300
|
+
"CLP refuses find on '#{collection_name}' for the current VectorSearch scope.",
|
|
301
|
+
)
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Resolve and return pointerFields for `find` on the collection.
|
|
306
|
+
# Raises CLPScope::Denied when pointerFields is set but the
|
|
307
|
+
# current scope has no user_id (acl_role-only / public agents).
|
|
308
|
+
# Returns nil when master-mode or no pointerFields entry exists.
|
|
309
|
+
def resolve_pointer_fields!(collection_name, resolution)
|
|
310
|
+
return nil if resolution.nil? || resolution.master?
|
|
311
|
+
pointer_fields = Parse::CLPScope.pointer_fields_for(collection_name, :find)
|
|
312
|
+
return nil if pointer_fields.nil?
|
|
313
|
+
if resolution.user_id.nil?
|
|
314
|
+
raise Parse::CLPScope::Denied.new(
|
|
315
|
+
collection_name, :find,
|
|
316
|
+
"CLP requires user identity (pointerFields=#{pointer_fields.inspect}) " \
|
|
317
|
+
"but the current VectorSearch scope has no user_id.",
|
|
318
|
+
)
|
|
319
|
+
end
|
|
320
|
+
pointer_fields
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Execute the pipeline directly against the MongoDB collection.
|
|
324
|
+
# Mirrors `Parse::AtlasSearch#run_atlas_pipeline!` — bypasses
|
|
325
|
+
# `Parse::MongoDB.aggregate` because that helper prepends an
|
|
326
|
+
# ACL `$match` at stage 0, which Atlas rejects for any pipeline
|
|
327
|
+
# whose stage 0 is `$vectorSearch`.
|
|
328
|
+
def run_pipeline!(collection_name, pipeline, max_time_ms: nil)
|
|
329
|
+
agg_opts = {}
|
|
330
|
+
agg_opts[:max_time_ms] = max_time_ms if max_time_ms
|
|
331
|
+
coll = Parse::MongoDB.collection(collection_name)
|
|
332
|
+
coll.aggregate(pipeline, agg_opts).to_a
|
|
333
|
+
rescue => e
|
|
334
|
+
Parse::MongoDB.send(:raise_if_timeout!, e, collection_name, max_time_ms)
|
|
335
|
+
raise
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
@default_index = nil
|
|
340
|
+
end
|
|
341
|
+
end
|
data/parse-stack-next.gemspec
CHANGED
|
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
|
10
10
|
spec.email = ["adrian+parse-stack@neurosynq.net"]
|
|
11
11
|
|
|
12
12
|
spec.summary = %q{Parse Server Ruby Client SDK (parse-stack-next fork)}
|
|
13
|
-
spec.description = %q{Parse Server Ruby Client. Perform Object-relational mapping between Parse Server and Ruby classes, with authentication, cloud code webhooks, push notifications and more built in.}
|
|
13
|
+
spec.description = %q{Parse Server Ruby Client. Perform Object-relational mapping between Parse Server and Ruby classes, with authentication, cloud code webhooks, push notifications and more built in. parse-stack-next is a fork of parse-stack with additional features: vector search, Atlas Search, agent ACL scopes, GraphQL, MongoDB-direct pipeline enforcement, and ongoing maintenance.}
|
|
14
14
|
spec.homepage = "https://github.com/neurosynq/parse-stack-next"
|
|
15
15
|
spec.license = "MIT"
|
|
16
16
|
|
|
@@ -22,13 +22,6 @@ Gem::Specification.new do |spec|
|
|
|
22
22
|
"documentation_uri" => "https://neurosynq.github.io/parse-stack-next/",
|
|
23
23
|
"rubygems_mfa_required" => "true",
|
|
24
24
|
}
|
|
25
|
-
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
|
26
|
-
# delete this section to allow pushing this gem to any host.
|
|
27
|
-
# if spec.respond_to?(:metadata)
|
|
28
|
-
# spec.metadata['allowed_push_host'] = "http://www.modernistik.com"
|
|
29
|
-
# else
|
|
30
|
-
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
|
31
|
-
# end
|
|
32
25
|
|
|
33
26
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
34
27
|
spec.bindir = "bin"
|
|
@@ -42,6 +35,7 @@ Gem::Specification.new do |spec|
|
|
|
42
35
|
spec.add_runtime_dependency "faraday", "~> 2.0"
|
|
43
36
|
spec.add_runtime_dependency "faraday-net_http_persistent", "~> 2.0"
|
|
44
37
|
spec.add_runtime_dependency "moneta", "< 2"
|
|
38
|
+
spec.add_runtime_dependency "connection_pool", ">= 2.2", "< 4"
|
|
45
39
|
spec.add_runtime_dependency "rack", ">= 2.0.6", "< 4"
|
|
46
40
|
spec.add_runtime_dependency "csv", "~> 3.3"
|
|
47
41
|
spec.add_runtime_dependency "ostruct", "~> 0.6"
|
|
@@ -61,13 +55,20 @@ Gem::Specification.new do |spec|
|
|
|
61
55
|
# gem 'mongo', '~> 2.18'
|
|
62
56
|
# Note: The gem is loaded at runtime only when MongoDB features are used
|
|
63
57
|
|
|
58
|
+
# Optional dependency for GraphQL schema type generation
|
|
59
|
+
# Required for: Parse::GraphQL::TypeGenerator
|
|
60
|
+
# Users can add this to their Gemfile for graphql-ruby type generation:
|
|
61
|
+
# gem 'graphql', '~> 2.0'
|
|
62
|
+
# Note: The gem is loaded at runtime only when `require "parse/graphql"` is called.
|
|
63
|
+
spec.add_development_dependency "graphql", "~> 2.0"
|
|
64
|
+
|
|
64
65
|
# spec.post_install_message = <<UPGRADE
|
|
65
66
|
#
|
|
66
67
|
# ** BREAKING CHANGES **
|
|
67
68
|
# The default `has_many` association form has changed from :array to :query.
|
|
68
69
|
# To use arrays, you must now pass `through: :array` option to `has_many`.
|
|
69
70
|
#
|
|
70
|
-
# Visit: https://github.com/
|
|
71
|
+
# Visit: https://github.com/neurosynq/parse-stack-next/wiki/Changes-to-has_many-in-1.5.0
|
|
71
72
|
#
|
|
72
73
|
# UPGRADE
|
|
73
74
|
end
|
|
@@ -61,6 +61,13 @@ services:
|
|
|
61
61
|
# environment — production should keep the allowlist tight to the
|
|
62
62
|
# subnets that actually host the Ruby app.
|
|
63
63
|
PARSE_SERVER_MASTER_KEY_IPS: "127.0.0.1/32,::1/128,172.16.0.0/12,192.168.0.0/16,10.0.0.0/8"
|
|
64
|
+
# Round-trip Parse.track_event through a real adapter so the v5.0
|
|
65
|
+
# integration test can drain the captured events back. The default
|
|
66
|
+
# adapter is a no-op, which would make the integration test
|
|
67
|
+
# indistinguishable from the wire-stub regression guard in
|
|
68
|
+
# test/lib/parse/track_event_wire_shape_test.rb. See
|
|
69
|
+
# test/cloud/analytics-adapter.js for the in-process recorder.
|
|
70
|
+
PARSE_SERVER_ANALYTICS_ADAPTER: "/parse-server/cloud/analytics-adapter.js"
|
|
64
71
|
# Remove health check for now since it's causing startup delays
|
|
65
72
|
# healthcheck:
|
|
66
73
|
# test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:1337/parse/health"]
|
|
@@ -69,6 +76,17 @@ services:
|
|
|
69
76
|
# retries: 5
|
|
70
77
|
# start_period: 30s
|
|
71
78
|
|
|
79
|
+
redis:
|
|
80
|
+
image: redis:7-alpine
|
|
81
|
+
container_name: parse-stack-test-redis
|
|
82
|
+
# Loopback-only by default. Used by the cache integration test
|
|
83
|
+
# (cache_redis_integration_test.rb) and the synchronize-create lock
|
|
84
|
+
# tests. Override with `REDIS_BIND=0.0.0.0` if you need to point a
|
|
85
|
+
# remote client at it during debugging.
|
|
86
|
+
ports:
|
|
87
|
+
- "${REDIS_BIND:-127.0.0.1}:6399:6379"
|
|
88
|
+
command: ["redis-server", "--save", "", "--appendonly", "no"]
|
|
89
|
+
|
|
72
90
|
parse-dashboard:
|
|
73
91
|
image: parseplatform/parse-dashboard:9
|
|
74
92
|
container_name: parse-stack-test-dashboard
|
data/scripts/start-parse.sh
CHANGED
|
@@ -64,6 +64,12 @@ export PARSE_SERVER_ALLOW_CUSTOM_OBJECT_ID="${PARSE_SERVER_ALLOW_CUSTOM_OBJECT_I
|
|
|
64
64
|
export PARSE_SERVER_LIVE_QUERY="${PARSE_SERVER_LIVE_QUERY:-{\"classNames\":[\"Song\",\"Album\",\"User\",\"_User\",\"TestLiveQuery\"]}}"
|
|
65
65
|
export PARSE_SERVER_START_LIVE_QUERY_SERVER="${PARSE_SERVER_START_LIVE_QUERY_SERVER:-true}"
|
|
66
66
|
|
|
67
|
+
# File upload — test-stack only. Authenticated session-token uploads are
|
|
68
|
+
# permitted; public/anonymous uploads are NOT (mirrors a typical hardened
|
|
69
|
+
# Parse Server config). The client_rest_files integration tests assert
|
|
70
|
+
# both pathways: authed upload succeeds, anon upload is rejected.
|
|
71
|
+
export PARSE_SERVER_FILE_UPLOAD="${PARSE_SERVER_FILE_UPLOAD:-{\"enableForPublic\":false,\"enableForAnonymousUser\":false,\"enableForAuthenticatedUser\":true}}"
|
|
72
|
+
|
|
67
73
|
echo "Environment configured:"
|
|
68
74
|
echo " PARSE_SERVER_APPLICATION_ID: $PARSE_SERVER_APPLICATION_ID"
|
|
69
75
|
echo " PARSE_SERVER_LIVE_QUERY: $PARSE_SERVER_LIVE_QUERY"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// Atlas $vectorSearch index for vector_prototype.WikiArticle.
|
|
2
|
+
//
|
|
3
|
+
// Idempotent: drops any existing search indexes on the collection before
|
|
4
|
+
// creating the canonical one. Naming follows vector_rag_plan.md §3:
|
|
5
|
+
//
|
|
6
|
+
// <table>_<field>_<provider>_<dimensions>_idx
|
|
7
|
+
//
|
|
8
|
+
// Reused by both Parse::VectorSearch tests (v4.3) and Parse::Retrieval
|
|
9
|
+
// tests (v4.4). The collection's data is loaded separately by
|
|
10
|
+
// fetch_embeddings.py.
|
|
11
|
+
//
|
|
12
|
+
// Run:
|
|
13
|
+
// mongosh "mongodb://localhost:27020/vector_prototype?directConnection=true" \
|
|
14
|
+
// scripts/vector_prototype/create_vector_index.js
|
|
15
|
+
//
|
|
16
|
+
// To switch to 1024-dim (voyage-multimodal-3 compat), set DIMS=1024
|
|
17
|
+
// before running. Mongosh exposes shell args via passthroughs only,
|
|
18
|
+
// so the value is read from an env shim or edit the constant below.
|
|
19
|
+
|
|
20
|
+
// Read the manifest written by fetch_embeddings.py so dims/provider/name
|
|
21
|
+
// can't drift from the loaded data. mongosh runs on Node, so use fs.
|
|
22
|
+
const fs = require("fs");
|
|
23
|
+
const path = require("path");
|
|
24
|
+
const _scriptDir = path.dirname(typeof __filename !== "undefined" ? __filename : process.argv[1] || ".");
|
|
25
|
+
const _manifestPath = path.join(_scriptDir, "fixture_manifest.json");
|
|
26
|
+
const MANIFEST = JSON.parse(fs.readFileSync(_manifestPath, "utf8"));
|
|
27
|
+
|
|
28
|
+
const COLL = MANIFEST.collection;
|
|
29
|
+
const DIMS = MANIFEST.dims;
|
|
30
|
+
const PROVIDER = MANIFEST.provider;
|
|
31
|
+
const INDEX_NAME = MANIFEST.index_name;
|
|
32
|
+
|
|
33
|
+
print(`[idx] target: ${db.getName()}.${COLL} → ${INDEX_NAME}`);
|
|
34
|
+
|
|
35
|
+
// Drop any stale search indexes so re-runs converge to a known state.
|
|
36
|
+
try {
|
|
37
|
+
const existing = db[COLL].getSearchIndexes();
|
|
38
|
+
existing.forEach(function (i) {
|
|
39
|
+
print(` drop existing index: ${i.name}`);
|
|
40
|
+
db[COLL].dropSearchIndex(i.name);
|
|
41
|
+
});
|
|
42
|
+
} catch (e) {
|
|
43
|
+
print(` (no existing indexes / error listing: ${e.message})`);
|
|
44
|
+
}
|
|
45
|
+
sleep(1000);
|
|
46
|
+
|
|
47
|
+
print(`[idx] creating ${INDEX_NAME} (vectorSearch, ${DIMS} dims, cosine)`);
|
|
48
|
+
db[COLL].createSearchIndex(INDEX_NAME, "vectorSearch", {
|
|
49
|
+
fields: [
|
|
50
|
+
{
|
|
51
|
+
type: "vector",
|
|
52
|
+
path: "embedding",
|
|
53
|
+
numDimensions: DIMS,
|
|
54
|
+
similarity: "cosine",
|
|
55
|
+
},
|
|
56
|
+
// Filter fields — declare here anything you want to use as a
|
|
57
|
+
// $vectorSearch filter constraint. Atlas only accepts filter
|
|
58
|
+
// predicates on fields declared as type:"filter" in the index.
|
|
59
|
+
{ type: "filter", path: "wiki_id" },
|
|
60
|
+
],
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
print("[idx] waiting for queryable...");
|
|
64
|
+
let attempts = 0;
|
|
65
|
+
const maxAttempts = 60;
|
|
66
|
+
while (attempts < maxAttempts) {
|
|
67
|
+
const found = db[COLL].getSearchIndexes().find(function (i) {
|
|
68
|
+
return i.name === INDEX_NAME;
|
|
69
|
+
});
|
|
70
|
+
if (found && found.queryable === true) {
|
|
71
|
+
print(`[idx] ready after ${attempts * 2}s`);
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
sleep(2000);
|
|
75
|
+
attempts++;
|
|
76
|
+
}
|
|
77
|
+
if (attempts >= maxAttempts) {
|
|
78
|
+
print("[idx] WARNING: index not queryable yet; later queries may fail");
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Smoke-test: pick an arbitrary doc and find its top-5 neighbours.
|
|
82
|
+
print("\n[smoke] $vectorSearch self-similarity check");
|
|
83
|
+
const seed = db[COLL].findOne({});
|
|
84
|
+
if (!seed) {
|
|
85
|
+
print("[smoke] no docs loaded — run fetch_embeddings.py first");
|
|
86
|
+
} else {
|
|
87
|
+
const out = db[COLL].aggregate([
|
|
88
|
+
{
|
|
89
|
+
$vectorSearch: {
|
|
90
|
+
index: INDEX_NAME,
|
|
91
|
+
path: "embedding",
|
|
92
|
+
queryVector: seed.embedding,
|
|
93
|
+
numCandidates: 100,
|
|
94
|
+
limit: 5,
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
{ $project: { _id: 1, title: 1, _vscore: { $meta: "vectorSearchScore" } } },
|
|
98
|
+
]).toArray();
|
|
99
|
+
print(` seed: ${seed.title}`);
|
|
100
|
+
out.forEach(function (r, i) {
|
|
101
|
+
print(` ${i + 1}. score=${r._vscore.toFixed(4)} ${r.title}`);
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
print("\n[done]");
|