parse-stack-next 4.5.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/release.yml +32 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/.vscode/settings.json +3 -0
- data/CHANGELOG.md +305 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +88 -0
- data/lib/parse/cache/redis.rb +249 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/create_lock.rb +14 -2
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +77 -5
- data/parse-stack.png +0 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Vector-search & RAG test fixture loader.
|
|
4
|
+
|
|
5
|
+
Pulls a subset of a pre-computed embeddings dataset from HuggingFace and
|
|
6
|
+
loads it into Atlas Local at localhost:27020 (the same container managed
|
|
7
|
+
by scripts/docker/docker-compose.atlas.yml). Designed to be reused by both:
|
|
8
|
+
|
|
9
|
+
1. Vector search integration tests (Parse::VectorSearch — v4.3 plan)
|
|
10
|
+
2. RAG retrieval tests (Parse::Retrieval — v4.4 plan)
|
|
11
|
+
|
|
12
|
+
The Wikipedia article shape (title + full text + url + embedding) covers
|
|
13
|
+
both surfaces: vector tests need (title, embedding), RAG/chunking tests
|
|
14
|
+
need the full text body.
|
|
15
|
+
|
|
16
|
+
The runtime target is voyage-multimodal-3 (1024-dim) — the production
|
|
17
|
+
preference. The fixture data is provider-agnostic: any pre-computed
|
|
18
|
+
embeddings exercise the same Atlas $vectorSearch surface, since the
|
|
19
|
+
index contract is (path, dims, similarity) — provider is metadata.
|
|
20
|
+
|
|
21
|
+
Two presets:
|
|
22
|
+
|
|
23
|
+
PRESET=fast (default) — Cohere/wikipedia-22-12-simple-embeddings
|
|
24
|
+
768-dim, ~13MB shards, ~485k rows total. Quick to download.
|
|
25
|
+
Use when iterating on pipeline mechanics, not Voyage-shape parity.
|
|
26
|
+
|
|
27
|
+
PRESET=voyage_compat — Cohere/wikipedia-2023-11-embed-multilingual-v3
|
|
28
|
+
1024-dim (matches voyage-multimodal-3), ~1.5GB shards.
|
|
29
|
+
Use when the index/query surface needs to be Voyage-dimension-shaped
|
|
30
|
+
even though vectors come from Cohere. Vectors are NOT interchangeable
|
|
31
|
+
with Voyage outputs — same dim, different latent space — but the
|
|
32
|
+
SDK pipeline mechanics are validated correctly.
|
|
33
|
+
|
|
34
|
+
For actual Voyage-vector parity at test time, compute query vectors via
|
|
35
|
+
the Voyage API into the same 1024-dim index (no local-inference path
|
|
36
|
+
exists — Voyage models are closed-weights, API-only).
|
|
37
|
+
|
|
38
|
+
Prereqs: pip install pyarrow pymongo requests
|
|
39
|
+
|
|
40
|
+
Run: python3 scripts/vector_prototype/fetch_embeddings.py
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
import os
|
|
44
|
+
import sys
|
|
45
|
+
import datetime
|
|
46
|
+
import requests
|
|
47
|
+
import pyarrow.parquet as pq
|
|
48
|
+
from pymongo import MongoClient
|
|
49
|
+
|
|
50
|
+
PRESETS = {
|
|
51
|
+
"fast": {
|
|
52
|
+
# MongoDB's reference dataset for Atlas Vector Search demos.
|
|
53
|
+
# ~3500 movies × 1536-dim OpenAI ada-002 plot embeddings.
|
|
54
|
+
# Public, ~42MB, single JSON file. Format: array of objects with
|
|
55
|
+
# plot_embedding field.
|
|
56
|
+
"url": "https://huggingface.co/datasets/MongoDB/embedded_movies/resolve/main/sample_mflix.embedded_movies.json",
|
|
57
|
+
"dims": 1536,
|
|
58
|
+
"provider": "openai-text-embedding-ada-002",
|
|
59
|
+
"format": "json",
|
|
60
|
+
"embedding_field": "plot_embedding",
|
|
61
|
+
"id_field": None, # MongoDB will auto-assign or we synthesize
|
|
62
|
+
},
|
|
63
|
+
"voyage_compat": {
|
|
64
|
+
# 1024-dim matches voyage-multimodal-3 — same index shape, different
|
|
65
|
+
# latent space. Vectors NOT mixable with Voyage outputs at query time.
|
|
66
|
+
# NOTE: requires an authenticated HuggingFace download
|
|
67
|
+
# (HF_TOKEN env var or `huggingface-cli login`). The Cohere wikipedia
|
|
68
|
+
# v3 dataset is gated since late 2024.
|
|
69
|
+
"url": "https://huggingface.co/datasets/Cohere/wikipedia-2023-11-embed-multilingual-v3/resolve/main/en/0000.parquet",
|
|
70
|
+
"dims": 1024,
|
|
71
|
+
"provider": "cohere-embed-multilingual-v3",
|
|
72
|
+
"format": "parquet",
|
|
73
|
+
"embedding_field": "emb",
|
|
74
|
+
"id_field": "id",
|
|
75
|
+
},
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
PRESET = os.environ.get("PRESET", "fast")
|
|
79
|
+
if PRESET not in PRESETS:
|
|
80
|
+
print(f"[err] unknown PRESET={PRESET}; choose one of: {list(PRESETS)}", file=sys.stderr)
|
|
81
|
+
sys.exit(2)
|
|
82
|
+
|
|
83
|
+
_p = PRESETS[PRESET]
|
|
84
|
+
DATASET_URL = os.environ.get("DATASET_URL", _p["url"])
|
|
85
|
+
DIMS_EXPECTED = int(os.environ.get("DIMS_EXPECTED", _p["dims"]))
|
|
86
|
+
PROVIDER_LABEL = os.environ.get("PROVIDER_LABEL", _p["provider"])
|
|
87
|
+
DATA_FORMAT = _p["format"]
|
|
88
|
+
EMBEDDING_FIELD = _p["embedding_field"]
|
|
89
|
+
ID_FIELD = _p["id_field"]
|
|
90
|
+
|
|
91
|
+
_ext = "parquet" if DATA_FORMAT == "parquet" else "json"
|
|
92
|
+
LOCAL_FILE = os.environ.get("LOCAL_FILE", f"/tmp/parse-stack-fixture-{PRESET}.{_ext}")
|
|
93
|
+
MONGO_URI = os.environ.get("ATLAS_URI", "mongodb://localhost:27020/?directConnection=true")
|
|
94
|
+
DB_NAME = os.environ.get("DB_NAME", "vector_prototype")
|
|
95
|
+
# Collection name mirrors the dataset shape so RAG tests can pivot
|
|
96
|
+
# without coupling test assertions to a hard-coded class name.
|
|
97
|
+
DEFAULT_COLL = "Movie" if PRESET == "fast" else "WikiArticle"
|
|
98
|
+
COLL_NAME = os.environ.get("COLL_NAME", DEFAULT_COLL)
|
|
99
|
+
LIMIT = int(os.environ.get("LIMIT", "10000"))
|
|
100
|
+
|
|
101
|
+
print(f"[preset] {PRESET} provider={PROVIDER_LABEL} dims={DIMS_EXPECTED}")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def download():
|
|
105
|
+
if os.path.exists(LOCAL_FILE) and os.path.getsize(LOCAL_FILE) > 0:
|
|
106
|
+
print(f"[skip] {LOCAL_FILE} already present ({os.path.getsize(LOCAL_FILE)} bytes)")
|
|
107
|
+
return
|
|
108
|
+
headers = {}
|
|
109
|
+
token = os.environ.get("HF_TOKEN")
|
|
110
|
+
if token:
|
|
111
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
112
|
+
print(f"[download] {DATASET_URL}")
|
|
113
|
+
with requests.get(DATASET_URL, stream=True, timeout=120, headers=headers) as r:
|
|
114
|
+
if r.status_code == 401:
|
|
115
|
+
print(
|
|
116
|
+
f"[err] HTTP 401 — dataset requires authentication. "
|
|
117
|
+
f"Set HF_TOKEN env var (huggingface.co token) or pick a different PRESET.",
|
|
118
|
+
file=sys.stderr,
|
|
119
|
+
)
|
|
120
|
+
sys.exit(2)
|
|
121
|
+
r.raise_for_status()
|
|
122
|
+
total = int(r.headers.get("content-length", 0))
|
|
123
|
+
written = 0
|
|
124
|
+
with open(LOCAL_FILE, "wb") as f:
|
|
125
|
+
for chunk in r.iter_content(chunk_size=1024 * 1024):
|
|
126
|
+
f.write(chunk)
|
|
127
|
+
written += len(chunk)
|
|
128
|
+
if total:
|
|
129
|
+
pct = 100 * written / total
|
|
130
|
+
print(f" {written // (1024*1024)}MB / {total // (1024*1024)}MB ({pct:.1f}%)", end="\r")
|
|
131
|
+
print()
|
|
132
|
+
print(f"[download] wrote {LOCAL_FILE}")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _read_rows():
|
|
136
|
+
if DATA_FORMAT == "parquet":
|
|
137
|
+
print(f"[read] parquet {LOCAL_FILE}")
|
|
138
|
+
table = pq.read_table(LOCAL_FILE)
|
|
139
|
+
print(f"[read] rows={table.num_rows} columns={table.column_names}")
|
|
140
|
+
return table.to_pylist()
|
|
141
|
+
elif DATA_FORMAT == "json":
|
|
142
|
+
print(f"[read] json {LOCAL_FILE}")
|
|
143
|
+
import json as _json
|
|
144
|
+
with open(LOCAL_FILE, "r") as f:
|
|
145
|
+
data = _json.load(f)
|
|
146
|
+
# embedded_movies is a top-level array
|
|
147
|
+
if not isinstance(data, list):
|
|
148
|
+
print(f"[err] expected top-level JSON array, got {type(data).__name__}", file=sys.stderr)
|
|
149
|
+
sys.exit(1)
|
|
150
|
+
print(f"[read] rows={len(data)}")
|
|
151
|
+
if data:
|
|
152
|
+
print(f"[read] sample keys: {list(data[0].keys())[:10]}")
|
|
153
|
+
return data
|
|
154
|
+
else:
|
|
155
|
+
print(f"[err] unknown DATA_FORMAT={DATA_FORMAT}", file=sys.stderr)
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def load():
|
|
160
|
+
rows = _read_rows()
|
|
161
|
+
if LIMIT > 0:
|
|
162
|
+
rows = rows[:LIMIT]
|
|
163
|
+
if not rows:
|
|
164
|
+
print("[err] no rows", file=sys.stderr)
|
|
165
|
+
sys.exit(1)
|
|
166
|
+
|
|
167
|
+
# Find the first row that actually has an embedding (some datasets,
|
|
168
|
+
# including embedded_movies, have null embeddings for entries with
|
|
169
|
+
# missing plot text).
|
|
170
|
+
sample = next((r for r in rows if r.get(EMBEDDING_FIELD)), None)
|
|
171
|
+
if sample is None:
|
|
172
|
+
print(f"[err] no rows have field '{EMBEDDING_FIELD}'; sample keys: {list(rows[0].keys())}", file=sys.stderr)
|
|
173
|
+
sys.exit(1)
|
|
174
|
+
dims = len(sample[EMBEDDING_FIELD])
|
|
175
|
+
print(f"[verify] embedding field='{EMBEDDING_FIELD}' dims={dims}")
|
|
176
|
+
if dims != DIMS_EXPECTED:
|
|
177
|
+
print(
|
|
178
|
+
f"[warn] dims={dims} differs from preset DIMS_EXPECTED={DIMS_EXPECTED}; "
|
|
179
|
+
f"manifest will record the actual dims",
|
|
180
|
+
file=sys.stderr,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
now = datetime.datetime.utcnow()
|
|
184
|
+
docs = []
|
|
185
|
+
skipped = 0
|
|
186
|
+
for idx, r in enumerate(rows):
|
|
187
|
+
emb = r.get(EMBEDDING_FIELD)
|
|
188
|
+
if not emb or len(emb) != dims:
|
|
189
|
+
skipped += 1
|
|
190
|
+
continue
|
|
191
|
+
# Carry through dataset fields (text/title/etc.) so RAG/chunker
|
|
192
|
+
# tests have real content. Source fields go first; our canonical
|
|
193
|
+
# fields win.
|
|
194
|
+
doc = {k: v for k, v in r.items() if k != EMBEDDING_FIELD}
|
|
195
|
+
doc["embedding"] = list(emb)
|
|
196
|
+
doc["_created_at"] = now
|
|
197
|
+
doc["_updated_at"] = now
|
|
198
|
+
if ID_FIELD and r.get(ID_FIELD) is not None:
|
|
199
|
+
doc["_id"] = f"{COLL_NAME.lower()}_{r[ID_FIELD]}"
|
|
200
|
+
else:
|
|
201
|
+
doc["_id"] = f"{COLL_NAME.lower()}_{idx:06d}"
|
|
202
|
+
docs.append(doc)
|
|
203
|
+
|
|
204
|
+
if skipped:
|
|
205
|
+
print(f"[load] skipped {skipped} rows missing/short embeddings")
|
|
206
|
+
|
|
207
|
+
client = MongoClient(MONGO_URI)
|
|
208
|
+
coll = client[DB_NAME][COLL_NAME]
|
|
209
|
+
print(f"[mongo] dropping {DB_NAME}.{COLL_NAME}")
|
|
210
|
+
coll.drop()
|
|
211
|
+
|
|
212
|
+
# Bulk insert in chunks — pymongo's default is fine but explicit is clearer
|
|
213
|
+
BATCH = 1000
|
|
214
|
+
for i in range(0, len(docs), BATCH):
|
|
215
|
+
coll.insert_many(docs[i:i + BATCH], ordered=False)
|
|
216
|
+
print(f" inserted {min(i + BATCH, len(docs))}/{len(docs)}")
|
|
217
|
+
|
|
218
|
+
count = coll.count_documents({})
|
|
219
|
+
print(f"[mongo] {DB_NAME}.{COLL_NAME} now has {count} docs (embedding dims={dims})")
|
|
220
|
+
|
|
221
|
+
# Manifest — single source of truth shared with create_vector_index.js
|
|
222
|
+
# so the index name + dimensions can never drift from the loaded data.
|
|
223
|
+
import json
|
|
224
|
+
manifest = {
|
|
225
|
+
"preset": PRESET,
|
|
226
|
+
"provider": PROVIDER_LABEL,
|
|
227
|
+
"dims": dims,
|
|
228
|
+
"db": DB_NAME,
|
|
229
|
+
"collection": COLL_NAME,
|
|
230
|
+
"count": count,
|
|
231
|
+
"index_name": f"{COLL_NAME}_embedding_{PROVIDER_LABEL.replace('-', '_')}_{dims}_idx",
|
|
232
|
+
}
|
|
233
|
+
manifest_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixture_manifest.json")
|
|
234
|
+
with open(manifest_path, "w") as f:
|
|
235
|
+
json.dump(manifest, f, indent=2)
|
|
236
|
+
print(f"[manifest] wrote {manifest_path}: {manifest['index_name']}")
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
if __name__ == "__main__":
|
|
240
|
+
download()
|
|
241
|
+
load()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# Exercise Atlas $vectorSearch end-to-end via the Ruby mongo driver — the
|
|
3
|
+
# same driver Parse::MongoDB.aggregate uses. This is the literal pipeline
|
|
4
|
+
# shape Parse::VectorSearch will produce (vector_rag_plan.md §3).
|
|
5
|
+
#
|
|
6
|
+
# Run:
|
|
7
|
+
# bundle exec ruby scripts/vector_prototype/query_prototype.rb
|
|
8
|
+
#
|
|
9
|
+
# Prereq: fetch_embeddings.py + create_vector_index.js have been run, so
|
|
10
|
+
# vector_prototype.WikiArticle is populated and the search index is
|
|
11
|
+
# queryable.
|
|
12
|
+
#
|
|
13
|
+
# This script intentionally avoids any Parse::* SDK code — it's the raw
|
|
14
|
+
# Atlas surface, used as the ground-truth comparison once Parse::VectorSearch
|
|
15
|
+
# lands. The same pipeline shape will be emitted by Parse::VectorSearch::SearchBuilder.
|
|
16
|
+
|
|
17
|
+
require "mongo"
|
|
18
|
+
|
|
19
|
+
require "json"
|
|
20
|
+
|
|
21
|
+
MANIFEST_PATH = File.expand_path("fixture_manifest.json", __dir__)
|
|
22
|
+
unless File.exist?(MANIFEST_PATH)
|
|
23
|
+
abort "no fixture_manifest.json — run fetch_embeddings.py first"
|
|
24
|
+
end
|
|
25
|
+
MANIFEST = JSON.parse(File.read(MANIFEST_PATH))
|
|
26
|
+
|
|
27
|
+
MONGO_URI = ENV.fetch("ATLAS_URI", "mongodb://localhost:27020/#{MANIFEST['db']}?directConnection=true")
|
|
28
|
+
INDEX_NAME = ENV.fetch("VECTOR_INDEX", MANIFEST["index_name"])
|
|
29
|
+
COLL_NAME = MANIFEST["collection"].to_sym
|
|
30
|
+
|
|
31
|
+
puts "[manifest] preset=#{MANIFEST['preset']} provider=#{MANIFEST['provider']} dims=#{MANIFEST['dims']} index=#{INDEX_NAME}"
|
|
32
|
+
|
|
33
|
+
client = Mongo::Client.new(MONGO_URI)
|
|
34
|
+
coll = client[COLL_NAME]
|
|
35
|
+
|
|
36
|
+
count = coll.count_documents({})
|
|
37
|
+
abort "no docs loaded — run fetch_embeddings.py first" if count.zero?
|
|
38
|
+
puts "[setup] #{count} docs in #{client.database.name}.#{COLL_NAME}"
|
|
39
|
+
|
|
40
|
+
# Use an existing doc's vector as the query — exercises the index without
|
|
41
|
+
# requiring an embedding API. When Voyage lands, swap this for a freshly
|
|
42
|
+
# computed query vector against the same index.
|
|
43
|
+
seed = coll.find.limit(1).first
|
|
44
|
+
puts "[seed] #{seed["title"]} (#{seed["embedding"].size}-dim)"
|
|
45
|
+
|
|
46
|
+
pipeline = [
|
|
47
|
+
{
|
|
48
|
+
"$vectorSearch" => {
|
|
49
|
+
"index" => INDEX_NAME,
|
|
50
|
+
"path" => "embedding",
|
|
51
|
+
"queryVector" => seed["embedding"],
|
|
52
|
+
"numCandidates" => 200,
|
|
53
|
+
"limit" => 10,
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"$project" => {
|
|
58
|
+
"_id" => 1,
|
|
59
|
+
"title" => 1,
|
|
60
|
+
# Project the score under _vscore (not _score) so hybrid search with
|
|
61
|
+
# Atlas Search lexical scores doesn't collide. Matches the convention
|
|
62
|
+
# the SDK will adopt — vector_rag_plan.md §3.
|
|
63
|
+
"_vscore" => { "$meta" => "vectorSearchScore" },
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
puts "[query] $vectorSearch limit=10 numCandidates=200"
|
|
69
|
+
t0 = Time.now
|
|
70
|
+
results = coll.aggregate(pipeline).to_a
|
|
71
|
+
elapsed_ms = ((Time.now - t0) * 1000).round(1)
|
|
72
|
+
|
|
73
|
+
puts "[result] #{results.size} hits in #{elapsed_ms}ms"
|
|
74
|
+
results.each_with_index do |r, i|
|
|
75
|
+
printf(" %2d. score=%.4f %s\n", i + 1, r["_vscore"], r["title"])
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Sanity: top hit should be the seed itself (cosine = 1.0)
|
|
79
|
+
top = results.first
|
|
80
|
+
if top && top["_id"] == seed["_id"]
|
|
81
|
+
puts "[ok] top hit == seed (self-similarity verified)"
|
|
82
|
+
else
|
|
83
|
+
puts "[warn] top hit was not the seed — index may still be building"
|
|
84
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Orchestrate the vector-search / RAG test fixture setup.
|
|
3
|
+
# Assumes the Atlas Local container from docker-compose.atlas.yml is up
|
|
4
|
+
# (i.e. localhost:27020 is reachable).
|
|
5
|
+
#
|
|
6
|
+
# Usage: ./scripts/vector_prototype/run.sh
|
|
7
|
+
#
|
|
8
|
+
# Once this stabilises, the fetch + index-create steps will be folded
|
|
9
|
+
# into scripts/docker/docker-compose.atlas.yml as additional init
|
|
10
|
+
# containers, alongside the existing atlas-init service.
|
|
11
|
+
|
|
12
|
+
set -euo pipefail
|
|
13
|
+
|
|
14
|
+
HERE="$(cd "$(dirname "$0")" && pwd)"
|
|
15
|
+
|
|
16
|
+
echo "[1/3] verifying Atlas Local on localhost:27020"
|
|
17
|
+
if ! mongosh --quiet --eval "db.runCommand({ ping: 1 })" \
|
|
18
|
+
"mongodb://localhost:27020/?directConnection=true" >/dev/null; then
|
|
19
|
+
echo " ERROR: Atlas Local not reachable. Start it with:"
|
|
20
|
+
echo " docker-compose -f scripts/docker/docker-compose.atlas.yml up -d"
|
|
21
|
+
exit 1
|
|
22
|
+
fi
|
|
23
|
+
echo " ok"
|
|
24
|
+
|
|
25
|
+
echo "[2/3] downloading + loading embeddings"
|
|
26
|
+
python3 "$HERE/fetch_embeddings.py"
|
|
27
|
+
|
|
28
|
+
echo "[3/3] creating vectorSearch index"
|
|
29
|
+
mongosh --quiet "mongodb://localhost:27020/vector_prototype?directConnection=true" \
|
|
30
|
+
"$HERE/create_vector_index.js"
|
|
31
|
+
|
|
32
|
+
echo
|
|
33
|
+
echo "Done. Run the Ruby query exercise with:"
|
|
34
|
+
echo " bundle exec ruby scripts/vector_prototype/query_prototype.rb"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: parse-stack-next
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 5.0.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Anthony Persaud
|
|
@@ -113,6 +113,26 @@ dependencies:
|
|
|
113
113
|
- - "<"
|
|
114
114
|
- !ruby/object:Gem::Version
|
|
115
115
|
version: '2'
|
|
116
|
+
- !ruby/object:Gem::Dependency
|
|
117
|
+
name: connection_pool
|
|
118
|
+
requirement: !ruby/object:Gem::Requirement
|
|
119
|
+
requirements:
|
|
120
|
+
- - ">="
|
|
121
|
+
- !ruby/object:Gem::Version
|
|
122
|
+
version: '2.2'
|
|
123
|
+
- - "<"
|
|
124
|
+
- !ruby/object:Gem::Version
|
|
125
|
+
version: '4'
|
|
126
|
+
type: :runtime
|
|
127
|
+
prerelease: false
|
|
128
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
129
|
+
requirements:
|
|
130
|
+
- - ">="
|
|
131
|
+
- !ruby/object:Gem::Version
|
|
132
|
+
version: '2.2'
|
|
133
|
+
- - "<"
|
|
134
|
+
- !ruby/object:Gem::Version
|
|
135
|
+
version: '4'
|
|
116
136
|
- !ruby/object:Gem::Dependency
|
|
117
137
|
name: rack
|
|
118
138
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -161,9 +181,25 @@ dependencies:
|
|
|
161
181
|
- - "~>"
|
|
162
182
|
- !ruby/object:Gem::Version
|
|
163
183
|
version: '0.6'
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
184
|
+
- !ruby/object:Gem::Dependency
|
|
185
|
+
name: graphql
|
|
186
|
+
requirement: !ruby/object:Gem::Requirement
|
|
187
|
+
requirements:
|
|
188
|
+
- - "~>"
|
|
189
|
+
- !ruby/object:Gem::Version
|
|
190
|
+
version: '2.0'
|
|
191
|
+
type: :development
|
|
192
|
+
prerelease: false
|
|
193
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
194
|
+
requirements:
|
|
195
|
+
- - "~>"
|
|
196
|
+
- !ruby/object:Gem::Version
|
|
197
|
+
version: '2.0'
|
|
198
|
+
description: 'Parse Server Ruby Client. Perform Object-relational mapping between
|
|
199
|
+
Parse Server and Ruby classes, with authentication, cloud code webhooks, push notifications
|
|
200
|
+
and more built in. parse-stack-next is a fork of parse-stack with additional features:
|
|
201
|
+
vector search, Atlas Search, agent ACL scopes, GraphQL, MongoDB-direct pipeline
|
|
202
|
+
enforcement, and ongoing maintenance.'
|
|
167
203
|
email:
|
|
168
204
|
- adrian+parse-stack@neurosynq.net
|
|
169
205
|
executables:
|
|
@@ -174,10 +210,14 @@ files:
|
|
|
174
210
|
- ".bundle/config"
|
|
175
211
|
- ".env.sample"
|
|
176
212
|
- ".env.test"
|
|
213
|
+
- ".github/workflows/codeql.yml"
|
|
214
|
+
- ".github/workflows/docs.yml"
|
|
215
|
+
- ".github/workflows/release.yml"
|
|
177
216
|
- ".github/workflows/ruby.yml"
|
|
178
217
|
- ".gitignore"
|
|
179
218
|
- ".ruby-version"
|
|
180
219
|
- ".solargraph.yml"
|
|
220
|
+
- ".vscode/settings.json"
|
|
181
221
|
- CHANGELOG.md
|
|
182
222
|
- Gemfile
|
|
183
223
|
- Gemfile.lock
|
|
@@ -185,6 +225,12 @@ files:
|
|
|
185
225
|
- Makefile
|
|
186
226
|
- README.md
|
|
187
227
|
- Rakefile
|
|
228
|
+
- SECURITY.md
|
|
229
|
+
- assets/parse-stack-next-avatar.png
|
|
230
|
+
- assets/parse-stack-next-avatar.svg
|
|
231
|
+
- assets/parse-stack-next-banner.png
|
|
232
|
+
- assets/parse-stack-next-banner.svg
|
|
233
|
+
- assets/parse-stack-next-social-preview.png
|
|
188
234
|
- bin/console
|
|
189
235
|
- bin/parse-console
|
|
190
236
|
- bin/server
|
|
@@ -192,9 +238,12 @@ files:
|
|
|
192
238
|
- config/parse-config.json
|
|
193
239
|
- docs/TEST_SERVER.md
|
|
194
240
|
- docs/_config.yml
|
|
241
|
+
- docs/atlas_vector_search_guide.md
|
|
242
|
+
- docs/client_sdk_guide.md
|
|
195
243
|
- docs/mcp_guide.md
|
|
196
244
|
- docs/mongodb_direct_guide.md
|
|
197
245
|
- docs/mongodb_index_optimization_guide.md
|
|
246
|
+
- docs/usage_guide.md
|
|
198
247
|
- examples/transaction_example.rb
|
|
199
248
|
- lib/parse-stack-next.rb
|
|
200
249
|
- lib/parse-stack.rb
|
|
@@ -237,6 +286,8 @@ files:
|
|
|
237
286
|
- lib/parse/atlas_search/result.rb
|
|
238
287
|
- lib/parse/atlas_search/search_builder.rb
|
|
239
288
|
- lib/parse/atlas_search/session.rb
|
|
289
|
+
- lib/parse/cache/pool.rb
|
|
290
|
+
- lib/parse/cache/redis.rb
|
|
240
291
|
- lib/parse/client.rb
|
|
241
292
|
- lib/parse/client/authentication.rb
|
|
242
293
|
- lib/parse/client/batch.rb
|
|
@@ -248,6 +299,19 @@ files:
|
|
|
248
299
|
- lib/parse/client/request.rb
|
|
249
300
|
- lib/parse/client/response.rb
|
|
250
301
|
- lib/parse/clp_scope.rb
|
|
302
|
+
- lib/parse/console.rb
|
|
303
|
+
- lib/parse/embeddings.rb
|
|
304
|
+
- lib/parse/embeddings/cohere.rb
|
|
305
|
+
- lib/parse/embeddings/fixture.rb
|
|
306
|
+
- lib/parse/embeddings/jina.rb
|
|
307
|
+
- lib/parse/embeddings/local_http.rb
|
|
308
|
+
- lib/parse/embeddings/openai.rb
|
|
309
|
+
- lib/parse/embeddings/provider.rb
|
|
310
|
+
- lib/parse/embeddings/qwen.rb
|
|
311
|
+
- lib/parse/embeddings/voyage.rb
|
|
312
|
+
- lib/parse/graphql.rb
|
|
313
|
+
- lib/parse/graphql/scalars.rb
|
|
314
|
+
- lib/parse/graphql/type_generator.rb
|
|
251
315
|
- lib/parse/live_query.rb
|
|
252
316
|
- lib/parse/live_query/circuit_breaker.rb
|
|
253
317
|
- lib/parse/live_query/client.rb
|
|
@@ -280,6 +344,7 @@ files:
|
|
|
280
344
|
- lib/parse/model/core/builder.rb
|
|
281
345
|
- lib/parse/model/core/create_lock.rb
|
|
282
346
|
- lib/parse/model/core/describe.rb
|
|
347
|
+
- lib/parse/model/core/embed_managed.rb
|
|
283
348
|
- lib/parse/model/core/enhanced_change_tracking.rb
|
|
284
349
|
- lib/parse/model/core/errors.rb
|
|
285
350
|
- lib/parse/model/core/fetching.rb
|
|
@@ -290,6 +355,7 @@ files:
|
|
|
290
355
|
- lib/parse/model/core/querying.rb
|
|
291
356
|
- lib/parse/model/core/schema.rb
|
|
292
357
|
- lib/parse/model/core/search_indexing.rb
|
|
358
|
+
- lib/parse/model/core/vector_searchable.rb
|
|
293
359
|
- lib/parse/model/date.rb
|
|
294
360
|
- lib/parse/model/email.rb
|
|
295
361
|
- lib/parse/model/file.rb
|
|
@@ -305,6 +371,7 @@ files:
|
|
|
305
371
|
- lib/parse/model/time_zone.rb
|
|
306
372
|
- lib/parse/model/validations.rb
|
|
307
373
|
- lib/parse/model/validations/uniqueness_validator.rb
|
|
374
|
+
- lib/parse/model/vector.rb
|
|
308
375
|
- lib/parse/mongodb.rb
|
|
309
376
|
- lib/parse/pipeline_security.rb
|
|
310
377
|
- lib/parse/query.rb
|
|
@@ -331,12 +398,12 @@ files:
|
|
|
331
398
|
- lib/parse/stack/version.rb
|
|
332
399
|
- lib/parse/two_factor_auth.rb
|
|
333
400
|
- lib/parse/two_factor_auth/user_extension.rb
|
|
401
|
+
- lib/parse/vector_search.rb
|
|
334
402
|
- lib/parse/webhooks.rb
|
|
335
403
|
- lib/parse/webhooks/payload.rb
|
|
336
404
|
- lib/parse/webhooks/registration.rb
|
|
337
405
|
- lib/parse/webhooks/replay_protection.rb
|
|
338
406
|
- parse-stack-next.gemspec
|
|
339
|
-
- parse-stack.png
|
|
340
407
|
- scripts/debug-ips.js
|
|
341
408
|
- scripts/docker/Dockerfile.parse
|
|
342
409
|
- scripts/docker/atlas-init.js
|
|
@@ -347,6 +414,11 @@ files:
|
|
|
347
414
|
- scripts/start-parse.sh
|
|
348
415
|
- scripts/start_mcp_server.rb
|
|
349
416
|
- scripts/test_server_connection.rb
|
|
417
|
+
- scripts/vector_prototype/create_vector_index.js
|
|
418
|
+
- scripts/vector_prototype/fetch_embeddings.py
|
|
419
|
+
- scripts/vector_prototype/fixture_manifest.json
|
|
420
|
+
- scripts/vector_prototype/query_prototype.rb
|
|
421
|
+
- scripts/vector_prototype/run.sh
|
|
350
422
|
homepage: https://github.com/neurosynq/parse-stack-next
|
|
351
423
|
licenses:
|
|
352
424
|
- MIT
|
data/parse-stack.png
DELETED
|
Binary file
|