parse-stack-next 4.5.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.sample +17 -3
- data/.github/workflows/codeql.yml +44 -0
- data/.github/workflows/docs.yml +39 -0
- data/.github/workflows/ruby.yml +8 -6
- data/.gitignore +4 -0
- data/CHANGELOG.md +295 -72
- data/Gemfile.lock +10 -3
- data/LICENSE.txt +1 -1
- data/README.md +190 -219
- data/Rakefile +1 -1
- data/SECURITY.md +30 -0
- data/assets/parse-stack-next-avatar.png +0 -0
- data/assets/parse-stack-next-avatar.svg +37 -0
- data/assets/parse-stack-next-banner.png +0 -0
- data/assets/parse-stack-next-banner.svg +45 -0
- data/assets/parse-stack-next-social-preview.png +0 -0
- data/docs/atlas_vector_search_guide.md +511 -0
- data/docs/client_sdk_guide.md +1320 -0
- data/docs/mcp_guide.md +225 -104
- data/docs/mongodb_direct_guide.md +21 -4
- data/docs/usage_guide.md +585 -0
- data/examples/transaction_example.rb +28 -28
- data/lib/parse/acl_scope.rb +2 -2
- data/lib/parse/agent/mcp_rack_app.rb +184 -16
- data/lib/parse/agent/metadata_dsl.rb +16 -16
- data/lib/parse/agent/pipeline_validator.rb +28 -1
- data/lib/parse/agent/prompts.rb +5 -5
- data/lib/parse/agent/tools.rb +287 -14
- data/lib/parse/agent.rb +209 -12
- data/lib/parse/api/analytics.rb +27 -5
- data/lib/parse/api/files.rb +6 -2
- data/lib/parse/api/push.rb +21 -4
- data/lib/parse/api/server.rb +59 -0
- data/lib/parse/api/users.rb +26 -2
- data/lib/parse/atlas_search/index_manager.rb +84 -0
- data/lib/parse/atlas_search.rb +37 -9
- data/lib/parse/cache/pool.rb +73 -0
- data/lib/parse/cache/redis.rb +190 -0
- data/lib/parse/client/body_builder.rb +94 -0
- data/lib/parse/client/caching.rb +109 -9
- data/lib/parse/client/response.rb +27 -0
- data/lib/parse/client.rb +74 -3
- data/lib/parse/console.rb +203 -0
- data/lib/parse/embeddings/cohere.rb +484 -0
- data/lib/parse/embeddings/fixture.rb +130 -0
- data/lib/parse/embeddings/jina.rb +454 -0
- data/lib/parse/embeddings/local_http.rb +492 -0
- data/lib/parse/embeddings/openai.rb +520 -0
- data/lib/parse/embeddings/provider.rb +264 -0
- data/lib/parse/embeddings/qwen.rb +431 -0
- data/lib/parse/embeddings/voyage.rb +550 -0
- data/lib/parse/embeddings.rb +225 -0
- data/lib/parse/graphql/scalars.rb +53 -0
- data/lib/parse/graphql/type_generator.rb +264 -0
- data/lib/parse/graphql.rb +48 -0
- data/lib/parse/live_query/client.rb +24 -5
- data/lib/parse/live_query/subscription.rb +17 -6
- data/lib/parse/live_query.rb +9 -4
- data/lib/parse/model/associations/collection_proxy.rb +2 -2
- data/lib/parse/model/associations/has_many.rb +32 -1
- data/lib/parse/model/associations/has_one.rb +17 -0
- data/lib/parse/model/associations/pointer_collection_proxy.rb +3 -3
- data/lib/parse/model/classes/user.rb +307 -11
- data/lib/parse/model/clp.rb +1 -1
- data/lib/parse/model/core/embed_managed.rb +296 -0
- data/lib/parse/model/core/fetching.rb +4 -4
- data/lib/parse/model/core/indexing.rb +53 -14
- data/lib/parse/model/core/parse_reference.rb +3 -3
- data/lib/parse/model/core/properties.rb +70 -1
- data/lib/parse/model/core/querying.rb +57 -1
- data/lib/parse/model/core/vector_searchable.rb +285 -0
- data/lib/parse/model/file.rb +16 -4
- data/lib/parse/model/model.rb +26 -10
- data/lib/parse/model/object.rb +63 -6
- data/lib/parse/model/pointer.rb +16 -2
- data/lib/parse/model/shortnames.rb +2 -0
- data/lib/parse/model/validations/uniqueness_validator.rb +3 -3
- data/lib/parse/model/vector.rb +102 -0
- data/lib/parse/mongodb.rb +90 -8
- data/lib/parse/pipeline_security.rb +59 -2
- data/lib/parse/query/constraints.rb +16 -14
- data/lib/parse/query/ordering.rb +1 -1
- data/lib/parse/query.rb +137 -64
- data/lib/parse/stack/generators/templates/model.erb +2 -2
- data/lib/parse/stack/generators/templates/model_installation.rb +1 -1
- data/lib/parse/stack/generators/templates/model_role.rb +1 -1
- data/lib/parse/stack/generators/templates/model_session.rb +1 -1
- data/lib/parse/stack/generators/templates/parse.rb +1 -1
- data/lib/parse/stack/generators/templates/webhooks.rb +1 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/stack.rb +375 -73
- data/lib/parse/two_factor_auth/user_extension.rb +5 -2
- data/lib/parse/vector_search.rb +341 -0
- data/parse-stack-next.gemspec +10 -9
- data/scripts/docker/docker-compose.test.yml +18 -0
- data/scripts/start-parse.sh +6 -0
- data/scripts/vector_prototype/create_vector_index.js +105 -0
- data/scripts/vector_prototype/fetch_embeddings.py +241 -0
- data/scripts/vector_prototype/fixture_manifest.json +9 -0
- data/scripts/vector_prototype/query_prototype.rb +84 -0
- data/scripts/vector_prototype/run.sh +34 -0
- metadata +75 -5
- data/parse-stack.png +0 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Vector-search & RAG test fixture loader.
|
|
4
|
+
|
|
5
|
+
Pulls a subset of a pre-computed embeddings dataset from HuggingFace and
|
|
6
|
+
loads it into Atlas Local at localhost:27020 (the same container managed
|
|
7
|
+
by scripts/docker/docker-compose.atlas.yml). Designed to be reused by both:
|
|
8
|
+
|
|
9
|
+
1. Vector search integration tests (Parse::VectorSearch — v4.3 plan)
|
|
10
|
+
2. RAG retrieval tests (Parse::Retrieval — v4.4 plan)
|
|
11
|
+
|
|
12
|
+
The Wikipedia article shape (title + full text + url + embedding) covers
|
|
13
|
+
both surfaces: vector tests need (title, embedding), RAG/chunking tests
|
|
14
|
+
need the full text body.
|
|
15
|
+
|
|
16
|
+
The runtime target is voyage-multimodal-3 (1024-dim) — the production
|
|
17
|
+
preference. The fixture data is provider-agnostic: any pre-computed
|
|
18
|
+
embeddings exercise the same Atlas $vectorSearch surface, since the
|
|
19
|
+
index contract is (path, dims, similarity) — provider is metadata.
|
|
20
|
+
|
|
21
|
+
Two presets:
|
|
22
|
+
|
|
23
|
+
PRESET=fast (default) — Cohere/wikipedia-22-12-simple-embeddings
|
|
24
|
+
768-dim, ~13MB shards, ~485k rows total. Quick to download.
|
|
25
|
+
Use when iterating on pipeline mechanics, not Voyage-shape parity.
|
|
26
|
+
|
|
27
|
+
PRESET=voyage_compat — Cohere/wikipedia-2023-11-embed-multilingual-v3
|
|
28
|
+
1024-dim (matches voyage-multimodal-3), ~1.5GB shards.
|
|
29
|
+
Use when the index/query surface needs to be Voyage-dimension-shaped
|
|
30
|
+
even though vectors come from Cohere. Vectors are NOT interchangeable
|
|
31
|
+
with Voyage outputs — same dim, different latent space — but the
|
|
32
|
+
SDK pipeline mechanics are validated correctly.
|
|
33
|
+
|
|
34
|
+
For actual Voyage-vector parity at test time, compute query vectors via
|
|
35
|
+
the Voyage API into the same 1024-dim index (no local-inference path
|
|
36
|
+
exists — Voyage models are closed-weights, API-only).
|
|
37
|
+
|
|
38
|
+
Prereqs: pip install pyarrow pymongo requests
|
|
39
|
+
|
|
40
|
+
Run: python3 scripts/vector_prototype/fetch_embeddings.py
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
import os
|
|
44
|
+
import sys
|
|
45
|
+
import datetime
|
|
46
|
+
import requests
|
|
47
|
+
import pyarrow.parquet as pq
|
|
48
|
+
from pymongo import MongoClient
|
|
49
|
+
|
|
50
|
+
PRESETS = {
|
|
51
|
+
"fast": {
|
|
52
|
+
# MongoDB's reference dataset for Atlas Vector Search demos.
|
|
53
|
+
# ~3500 movies × 1536-dim OpenAI ada-002 plot embeddings.
|
|
54
|
+
# Public, ~42MB, single JSON file. Format: array of objects with
|
|
55
|
+
# plot_embedding field.
|
|
56
|
+
"url": "https://huggingface.co/datasets/MongoDB/embedded_movies/resolve/main/sample_mflix.embedded_movies.json",
|
|
57
|
+
"dims": 1536,
|
|
58
|
+
"provider": "openai-text-embedding-ada-002",
|
|
59
|
+
"format": "json",
|
|
60
|
+
"embedding_field": "plot_embedding",
|
|
61
|
+
"id_field": None, # MongoDB will auto-assign or we synthesize
|
|
62
|
+
},
|
|
63
|
+
"voyage_compat": {
|
|
64
|
+
# 1024-dim matches voyage-multimodal-3 — same index shape, different
|
|
65
|
+
# latent space. Vectors NOT mixable with Voyage outputs at query time.
|
|
66
|
+
# NOTE: requires an authenticated HuggingFace download
|
|
67
|
+
# (HF_TOKEN env var or `huggingface-cli login`). The Cohere wikipedia
|
|
68
|
+
# v3 dataset is gated since late 2024.
|
|
69
|
+
"url": "https://huggingface.co/datasets/Cohere/wikipedia-2023-11-embed-multilingual-v3/resolve/main/en/0000.parquet",
|
|
70
|
+
"dims": 1024,
|
|
71
|
+
"provider": "cohere-embed-multilingual-v3",
|
|
72
|
+
"format": "parquet",
|
|
73
|
+
"embedding_field": "emb",
|
|
74
|
+
"id_field": "id",
|
|
75
|
+
},
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
PRESET = os.environ.get("PRESET", "fast")
|
|
79
|
+
if PRESET not in PRESETS:
|
|
80
|
+
print(f"[err] unknown PRESET={PRESET}; choose one of: {list(PRESETS)}", file=sys.stderr)
|
|
81
|
+
sys.exit(2)
|
|
82
|
+
|
|
83
|
+
_p = PRESETS[PRESET]
|
|
84
|
+
DATASET_URL = os.environ.get("DATASET_URL", _p["url"])
|
|
85
|
+
DIMS_EXPECTED = int(os.environ.get("DIMS_EXPECTED", _p["dims"]))
|
|
86
|
+
PROVIDER_LABEL = os.environ.get("PROVIDER_LABEL", _p["provider"])
|
|
87
|
+
DATA_FORMAT = _p["format"]
|
|
88
|
+
EMBEDDING_FIELD = _p["embedding_field"]
|
|
89
|
+
ID_FIELD = _p["id_field"]
|
|
90
|
+
|
|
91
|
+
_ext = "parquet" if DATA_FORMAT == "parquet" else "json"
|
|
92
|
+
LOCAL_FILE = os.environ.get("LOCAL_FILE", f"/tmp/parse-stack-fixture-{PRESET}.{_ext}")
|
|
93
|
+
MONGO_URI = os.environ.get("ATLAS_URI", "mongodb://localhost:27020/?directConnection=true")
|
|
94
|
+
DB_NAME = os.environ.get("DB_NAME", "vector_prototype")
|
|
95
|
+
# Collection name mirrors the dataset shape so RAG tests can pivot
|
|
96
|
+
# without coupling test assertions to a hard-coded class name.
|
|
97
|
+
DEFAULT_COLL = "Movie" if PRESET == "fast" else "WikiArticle"
|
|
98
|
+
COLL_NAME = os.environ.get("COLL_NAME", DEFAULT_COLL)
|
|
99
|
+
LIMIT = int(os.environ.get("LIMIT", "10000"))
|
|
100
|
+
|
|
101
|
+
print(f"[preset] {PRESET} provider={PROVIDER_LABEL} dims={DIMS_EXPECTED}")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def download():
|
|
105
|
+
if os.path.exists(LOCAL_FILE) and os.path.getsize(LOCAL_FILE) > 0:
|
|
106
|
+
print(f"[skip] {LOCAL_FILE} already present ({os.path.getsize(LOCAL_FILE)} bytes)")
|
|
107
|
+
return
|
|
108
|
+
headers = {}
|
|
109
|
+
token = os.environ.get("HF_TOKEN")
|
|
110
|
+
if token:
|
|
111
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
112
|
+
print(f"[download] {DATASET_URL}")
|
|
113
|
+
with requests.get(DATASET_URL, stream=True, timeout=120, headers=headers) as r:
|
|
114
|
+
if r.status_code == 401:
|
|
115
|
+
print(
|
|
116
|
+
f"[err] HTTP 401 — dataset requires authentication. "
|
|
117
|
+
f"Set HF_TOKEN env var (huggingface.co token) or pick a different PRESET.",
|
|
118
|
+
file=sys.stderr,
|
|
119
|
+
)
|
|
120
|
+
sys.exit(2)
|
|
121
|
+
r.raise_for_status()
|
|
122
|
+
total = int(r.headers.get("content-length", 0))
|
|
123
|
+
written = 0
|
|
124
|
+
with open(LOCAL_FILE, "wb") as f:
|
|
125
|
+
for chunk in r.iter_content(chunk_size=1024 * 1024):
|
|
126
|
+
f.write(chunk)
|
|
127
|
+
written += len(chunk)
|
|
128
|
+
if total:
|
|
129
|
+
pct = 100 * written / total
|
|
130
|
+
print(f" {written // (1024*1024)}MB / {total // (1024*1024)}MB ({pct:.1f}%)", end="\r")
|
|
131
|
+
print()
|
|
132
|
+
print(f"[download] wrote {LOCAL_FILE}")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _read_rows():
|
|
136
|
+
if DATA_FORMAT == "parquet":
|
|
137
|
+
print(f"[read] parquet {LOCAL_FILE}")
|
|
138
|
+
table = pq.read_table(LOCAL_FILE)
|
|
139
|
+
print(f"[read] rows={table.num_rows} columns={table.column_names}")
|
|
140
|
+
return table.to_pylist()
|
|
141
|
+
elif DATA_FORMAT == "json":
|
|
142
|
+
print(f"[read] json {LOCAL_FILE}")
|
|
143
|
+
import json as _json
|
|
144
|
+
with open(LOCAL_FILE, "r") as f:
|
|
145
|
+
data = _json.load(f)
|
|
146
|
+
# embedded_movies is a top-level array
|
|
147
|
+
if not isinstance(data, list):
|
|
148
|
+
print(f"[err] expected top-level JSON array, got {type(data).__name__}", file=sys.stderr)
|
|
149
|
+
sys.exit(1)
|
|
150
|
+
print(f"[read] rows={len(data)}")
|
|
151
|
+
if data:
|
|
152
|
+
print(f"[read] sample keys: {list(data[0].keys())[:10]}")
|
|
153
|
+
return data
|
|
154
|
+
else:
|
|
155
|
+
print(f"[err] unknown DATA_FORMAT={DATA_FORMAT}", file=sys.stderr)
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def load():
|
|
160
|
+
rows = _read_rows()
|
|
161
|
+
if LIMIT > 0:
|
|
162
|
+
rows = rows[:LIMIT]
|
|
163
|
+
if not rows:
|
|
164
|
+
print("[err] no rows", file=sys.stderr)
|
|
165
|
+
sys.exit(1)
|
|
166
|
+
|
|
167
|
+
# Find the first row that actually has an embedding (some datasets,
|
|
168
|
+
# including embedded_movies, have null embeddings for entries with
|
|
169
|
+
# missing plot text).
|
|
170
|
+
sample = next((r for r in rows if r.get(EMBEDDING_FIELD)), None)
|
|
171
|
+
if sample is None:
|
|
172
|
+
print(f"[err] no rows have field '{EMBEDDING_FIELD}'; sample keys: {list(rows[0].keys())}", file=sys.stderr)
|
|
173
|
+
sys.exit(1)
|
|
174
|
+
dims = len(sample[EMBEDDING_FIELD])
|
|
175
|
+
print(f"[verify] embedding field='{EMBEDDING_FIELD}' dims={dims}")
|
|
176
|
+
if dims != DIMS_EXPECTED:
|
|
177
|
+
print(
|
|
178
|
+
f"[warn] dims={dims} differs from preset DIMS_EXPECTED={DIMS_EXPECTED}; "
|
|
179
|
+
f"manifest will record the actual dims",
|
|
180
|
+
file=sys.stderr,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
now = datetime.datetime.utcnow()
|
|
184
|
+
docs = []
|
|
185
|
+
skipped = 0
|
|
186
|
+
for idx, r in enumerate(rows):
|
|
187
|
+
emb = r.get(EMBEDDING_FIELD)
|
|
188
|
+
if not emb or len(emb) != dims:
|
|
189
|
+
skipped += 1
|
|
190
|
+
continue
|
|
191
|
+
# Carry through dataset fields (text/title/etc.) so RAG/chunker
|
|
192
|
+
# tests have real content. Source fields go first; our canonical
|
|
193
|
+
# fields win.
|
|
194
|
+
doc = {k: v for k, v in r.items() if k != EMBEDDING_FIELD}
|
|
195
|
+
doc["embedding"] = list(emb)
|
|
196
|
+
doc["_created_at"] = now
|
|
197
|
+
doc["_updated_at"] = now
|
|
198
|
+
if ID_FIELD and r.get(ID_FIELD) is not None:
|
|
199
|
+
doc["_id"] = f"{COLL_NAME.lower()}_{r[ID_FIELD]}"
|
|
200
|
+
else:
|
|
201
|
+
doc["_id"] = f"{COLL_NAME.lower()}_{idx:06d}"
|
|
202
|
+
docs.append(doc)
|
|
203
|
+
|
|
204
|
+
if skipped:
|
|
205
|
+
print(f"[load] skipped {skipped} rows missing/short embeddings")
|
|
206
|
+
|
|
207
|
+
client = MongoClient(MONGO_URI)
|
|
208
|
+
coll = client[DB_NAME][COLL_NAME]
|
|
209
|
+
print(f"[mongo] dropping {DB_NAME}.{COLL_NAME}")
|
|
210
|
+
coll.drop()
|
|
211
|
+
|
|
212
|
+
# Bulk insert in chunks — pymongo's default is fine but explicit is clearer
|
|
213
|
+
BATCH = 1000
|
|
214
|
+
for i in range(0, len(docs), BATCH):
|
|
215
|
+
coll.insert_many(docs[i:i + BATCH], ordered=False)
|
|
216
|
+
print(f" inserted {min(i + BATCH, len(docs))}/{len(docs)}")
|
|
217
|
+
|
|
218
|
+
count = coll.count_documents({})
|
|
219
|
+
print(f"[mongo] {DB_NAME}.{COLL_NAME} now has {count} docs (embedding dims={dims})")
|
|
220
|
+
|
|
221
|
+
# Manifest — single source of truth shared with create_vector_index.js
|
|
222
|
+
# so the index name + dimensions can never drift from the loaded data.
|
|
223
|
+
import json
|
|
224
|
+
manifest = {
|
|
225
|
+
"preset": PRESET,
|
|
226
|
+
"provider": PROVIDER_LABEL,
|
|
227
|
+
"dims": dims,
|
|
228
|
+
"db": DB_NAME,
|
|
229
|
+
"collection": COLL_NAME,
|
|
230
|
+
"count": count,
|
|
231
|
+
"index_name": f"{COLL_NAME}_embedding_{PROVIDER_LABEL.replace('-', '_')}_{dims}_idx",
|
|
232
|
+
}
|
|
233
|
+
manifest_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixture_manifest.json")
|
|
234
|
+
with open(manifest_path, "w") as f:
|
|
235
|
+
json.dump(manifest, f, indent=2)
|
|
236
|
+
print(f"[manifest] wrote {manifest_path}: {manifest['index_name']}")
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
if __name__ == "__main__":
|
|
240
|
+
download()
|
|
241
|
+
load()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# Exercise Atlas $vectorSearch end-to-end via the Ruby mongo driver — the
|
|
3
|
+
# same driver Parse::MongoDB.aggregate uses. This is the literal pipeline
|
|
4
|
+
# shape Parse::VectorSearch will produce (vector_rag_plan.md §3).
|
|
5
|
+
#
|
|
6
|
+
# Run:
|
|
7
|
+
# bundle exec ruby scripts/vector_prototype/query_prototype.rb
|
|
8
|
+
#
|
|
9
|
+
# Prereq: fetch_embeddings.py + create_vector_index.js have been run, so
|
|
10
|
+
# vector_prototype.WikiArticle is populated and the search index is
|
|
11
|
+
# queryable.
|
|
12
|
+
#
|
|
13
|
+
# This script intentionally avoids any Parse::* SDK code — it's the raw
|
|
14
|
+
# Atlas surface, used as the ground-truth comparison once Parse::VectorSearch
|
|
15
|
+
# lands. The same pipeline shape will be emitted by Parse::VectorSearch::SearchBuilder.
|
|
16
|
+
|
|
17
|
+
require "mongo"
|
|
18
|
+
|
|
19
|
+
require "json"
|
|
20
|
+
|
|
21
|
+
MANIFEST_PATH = File.expand_path("fixture_manifest.json", __dir__)
|
|
22
|
+
unless File.exist?(MANIFEST_PATH)
|
|
23
|
+
abort "no fixture_manifest.json — run fetch_embeddings.py first"
|
|
24
|
+
end
|
|
25
|
+
MANIFEST = JSON.parse(File.read(MANIFEST_PATH))
|
|
26
|
+
|
|
27
|
+
MONGO_URI = ENV.fetch("ATLAS_URI", "mongodb://localhost:27020/#{MANIFEST['db']}?directConnection=true")
|
|
28
|
+
INDEX_NAME = ENV.fetch("VECTOR_INDEX", MANIFEST["index_name"])
|
|
29
|
+
COLL_NAME = MANIFEST["collection"].to_sym
|
|
30
|
+
|
|
31
|
+
puts "[manifest] preset=#{MANIFEST['preset']} provider=#{MANIFEST['provider']} dims=#{MANIFEST['dims']} index=#{INDEX_NAME}"
|
|
32
|
+
|
|
33
|
+
client = Mongo::Client.new(MONGO_URI)
|
|
34
|
+
coll = client[COLL_NAME]
|
|
35
|
+
|
|
36
|
+
count = coll.count_documents({})
|
|
37
|
+
abort "no docs loaded — run fetch_embeddings.py first" if count.zero?
|
|
38
|
+
puts "[setup] #{count} docs in #{client.database.name}.#{COLL_NAME}"
|
|
39
|
+
|
|
40
|
+
# Use an existing doc's vector as the query — exercises the index without
|
|
41
|
+
# requiring an embedding API. When Voyage lands, swap this for a freshly
|
|
42
|
+
# computed query vector against the same index.
|
|
43
|
+
seed = coll.find.limit(1).first
|
|
44
|
+
puts "[seed] #{seed["title"]} (#{seed["embedding"].size}-dim)"
|
|
45
|
+
|
|
46
|
+
pipeline = [
|
|
47
|
+
{
|
|
48
|
+
"$vectorSearch" => {
|
|
49
|
+
"index" => INDEX_NAME,
|
|
50
|
+
"path" => "embedding",
|
|
51
|
+
"queryVector" => seed["embedding"],
|
|
52
|
+
"numCandidates" => 200,
|
|
53
|
+
"limit" => 10,
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"$project" => {
|
|
58
|
+
"_id" => 1,
|
|
59
|
+
"title" => 1,
|
|
60
|
+
# Project the score under _vscore (not _score) so hybrid search with
|
|
61
|
+
# Atlas Search lexical scores doesn't collide. Matches the convention
|
|
62
|
+
# the SDK will adopt — vector_rag_plan.md §3.
|
|
63
|
+
"_vscore" => { "$meta" => "vectorSearchScore" },
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
puts "[query] $vectorSearch limit=10 numCandidates=200"
|
|
69
|
+
t0 = Time.now
|
|
70
|
+
results = coll.aggregate(pipeline).to_a
|
|
71
|
+
elapsed_ms = ((Time.now - t0) * 1000).round(1)
|
|
72
|
+
|
|
73
|
+
puts "[result] #{results.size} hits in #{elapsed_ms}ms"
|
|
74
|
+
results.each_with_index do |r, i|
|
|
75
|
+
printf(" %2d. score=%.4f %s\n", i + 1, r["_vscore"], r["title"])
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Sanity: top hit should be the seed itself (cosine = 1.0)
|
|
79
|
+
top = results.first
|
|
80
|
+
if top && top["_id"] == seed["_id"]
|
|
81
|
+
puts "[ok] top hit == seed (self-similarity verified)"
|
|
82
|
+
else
|
|
83
|
+
puts "[warn] top hit was not the seed — index may still be building"
|
|
84
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Orchestrate the vector-search / RAG test fixture setup.
|
|
3
|
+
# Assumes the Atlas Local container from docker-compose.atlas.yml is up
|
|
4
|
+
# (i.e. localhost:27020 is reachable).
|
|
5
|
+
#
|
|
6
|
+
# Usage: ./scripts/vector_prototype/run.sh
|
|
7
|
+
#
|
|
8
|
+
# Once this stabilises, the fetch + index-create steps will be folded
|
|
9
|
+
# into scripts/docker/docker-compose.atlas.yml as additional init
|
|
10
|
+
# containers, alongside the existing atlas-init service.
|
|
11
|
+
|
|
12
|
+
set -euo pipefail
|
|
13
|
+
|
|
14
|
+
HERE="$(cd "$(dirname "$0")" && pwd)"
|
|
15
|
+
|
|
16
|
+
echo "[1/3] verifying Atlas Local on localhost:27020"
|
|
17
|
+
if ! mongosh --quiet --eval "db.runCommand({ ping: 1 })" \
|
|
18
|
+
"mongodb://localhost:27020/?directConnection=true" >/dev/null; then
|
|
19
|
+
echo " ERROR: Atlas Local not reachable. Start it with:"
|
|
20
|
+
echo " docker-compose -f scripts/docker/docker-compose.atlas.yml up -d"
|
|
21
|
+
exit 1
|
|
22
|
+
fi
|
|
23
|
+
echo " ok"
|
|
24
|
+
|
|
25
|
+
echo "[2/3] downloading + loading embeddings"
|
|
26
|
+
python3 "$HERE/fetch_embeddings.py"
|
|
27
|
+
|
|
28
|
+
echo "[3/3] creating vectorSearch index"
|
|
29
|
+
mongosh --quiet "mongodb://localhost:27020/vector_prototype?directConnection=true" \
|
|
30
|
+
"$HERE/create_vector_index.js"
|
|
31
|
+
|
|
32
|
+
echo
|
|
33
|
+
echo "Done. Run the Ruby query exercise with:"
|
|
34
|
+
echo " bundle exec ruby scripts/vector_prototype/query_prototype.rb"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: parse-stack-next
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 5.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Anthony Persaud
|
|
@@ -113,6 +113,26 @@ dependencies:
|
|
|
113
113
|
- - "<"
|
|
114
114
|
- !ruby/object:Gem::Version
|
|
115
115
|
version: '2'
|
|
116
|
+
- !ruby/object:Gem::Dependency
|
|
117
|
+
name: connection_pool
|
|
118
|
+
requirement: !ruby/object:Gem::Requirement
|
|
119
|
+
requirements:
|
|
120
|
+
- - ">="
|
|
121
|
+
- !ruby/object:Gem::Version
|
|
122
|
+
version: '2.2'
|
|
123
|
+
- - "<"
|
|
124
|
+
- !ruby/object:Gem::Version
|
|
125
|
+
version: '4'
|
|
126
|
+
type: :runtime
|
|
127
|
+
prerelease: false
|
|
128
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
129
|
+
requirements:
|
|
130
|
+
- - ">="
|
|
131
|
+
- !ruby/object:Gem::Version
|
|
132
|
+
version: '2.2'
|
|
133
|
+
- - "<"
|
|
134
|
+
- !ruby/object:Gem::Version
|
|
135
|
+
version: '4'
|
|
116
136
|
- !ruby/object:Gem::Dependency
|
|
117
137
|
name: rack
|
|
118
138
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -161,9 +181,25 @@ dependencies:
|
|
|
161
181
|
- - "~>"
|
|
162
182
|
- !ruby/object:Gem::Version
|
|
163
183
|
version: '0.6'
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
184
|
+
- !ruby/object:Gem::Dependency
|
|
185
|
+
name: graphql
|
|
186
|
+
requirement: !ruby/object:Gem::Requirement
|
|
187
|
+
requirements:
|
|
188
|
+
- - "~>"
|
|
189
|
+
- !ruby/object:Gem::Version
|
|
190
|
+
version: '2.0'
|
|
191
|
+
type: :development
|
|
192
|
+
prerelease: false
|
|
193
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
194
|
+
requirements:
|
|
195
|
+
- - "~>"
|
|
196
|
+
- !ruby/object:Gem::Version
|
|
197
|
+
version: '2.0'
|
|
198
|
+
description: 'Parse Server Ruby Client. Perform Object-relational mapping between
|
|
199
|
+
Parse Server and Ruby classes, with authentication, cloud code webhooks, push notifications
|
|
200
|
+
and more built in. parse-stack-next is a fork of parse-stack with additional features:
|
|
201
|
+
vector search, Atlas Search, agent ACL scopes, GraphQL, MongoDB-direct pipeline
|
|
202
|
+
enforcement, and ongoing maintenance.'
|
|
167
203
|
email:
|
|
168
204
|
- adrian+parse-stack@neurosynq.net
|
|
169
205
|
executables:
|
|
@@ -174,6 +210,8 @@ files:
|
|
|
174
210
|
- ".bundle/config"
|
|
175
211
|
- ".env.sample"
|
|
176
212
|
- ".env.test"
|
|
213
|
+
- ".github/workflows/codeql.yml"
|
|
214
|
+
- ".github/workflows/docs.yml"
|
|
177
215
|
- ".github/workflows/ruby.yml"
|
|
178
216
|
- ".gitignore"
|
|
179
217
|
- ".ruby-version"
|
|
@@ -185,6 +223,12 @@ files:
|
|
|
185
223
|
- Makefile
|
|
186
224
|
- README.md
|
|
187
225
|
- Rakefile
|
|
226
|
+
- SECURITY.md
|
|
227
|
+
- assets/parse-stack-next-avatar.png
|
|
228
|
+
- assets/parse-stack-next-avatar.svg
|
|
229
|
+
- assets/parse-stack-next-banner.png
|
|
230
|
+
- assets/parse-stack-next-banner.svg
|
|
231
|
+
- assets/parse-stack-next-social-preview.png
|
|
188
232
|
- bin/console
|
|
189
233
|
- bin/parse-console
|
|
190
234
|
- bin/server
|
|
@@ -192,9 +236,12 @@ files:
|
|
|
192
236
|
- config/parse-config.json
|
|
193
237
|
- docs/TEST_SERVER.md
|
|
194
238
|
- docs/_config.yml
|
|
239
|
+
- docs/atlas_vector_search_guide.md
|
|
240
|
+
- docs/client_sdk_guide.md
|
|
195
241
|
- docs/mcp_guide.md
|
|
196
242
|
- docs/mongodb_direct_guide.md
|
|
197
243
|
- docs/mongodb_index_optimization_guide.md
|
|
244
|
+
- docs/usage_guide.md
|
|
198
245
|
- examples/transaction_example.rb
|
|
199
246
|
- lib/parse-stack-next.rb
|
|
200
247
|
- lib/parse-stack.rb
|
|
@@ -237,6 +284,8 @@ files:
|
|
|
237
284
|
- lib/parse/atlas_search/result.rb
|
|
238
285
|
- lib/parse/atlas_search/search_builder.rb
|
|
239
286
|
- lib/parse/atlas_search/session.rb
|
|
287
|
+
- lib/parse/cache/pool.rb
|
|
288
|
+
- lib/parse/cache/redis.rb
|
|
240
289
|
- lib/parse/client.rb
|
|
241
290
|
- lib/parse/client/authentication.rb
|
|
242
291
|
- lib/parse/client/batch.rb
|
|
@@ -248,6 +297,19 @@ files:
|
|
|
248
297
|
- lib/parse/client/request.rb
|
|
249
298
|
- lib/parse/client/response.rb
|
|
250
299
|
- lib/parse/clp_scope.rb
|
|
300
|
+
- lib/parse/console.rb
|
|
301
|
+
- lib/parse/embeddings.rb
|
|
302
|
+
- lib/parse/embeddings/cohere.rb
|
|
303
|
+
- lib/parse/embeddings/fixture.rb
|
|
304
|
+
- lib/parse/embeddings/jina.rb
|
|
305
|
+
- lib/parse/embeddings/local_http.rb
|
|
306
|
+
- lib/parse/embeddings/openai.rb
|
|
307
|
+
- lib/parse/embeddings/provider.rb
|
|
308
|
+
- lib/parse/embeddings/qwen.rb
|
|
309
|
+
- lib/parse/embeddings/voyage.rb
|
|
310
|
+
- lib/parse/graphql.rb
|
|
311
|
+
- lib/parse/graphql/scalars.rb
|
|
312
|
+
- lib/parse/graphql/type_generator.rb
|
|
251
313
|
- lib/parse/live_query.rb
|
|
252
314
|
- lib/parse/live_query/circuit_breaker.rb
|
|
253
315
|
- lib/parse/live_query/client.rb
|
|
@@ -280,6 +342,7 @@ files:
|
|
|
280
342
|
- lib/parse/model/core/builder.rb
|
|
281
343
|
- lib/parse/model/core/create_lock.rb
|
|
282
344
|
- lib/parse/model/core/describe.rb
|
|
345
|
+
- lib/parse/model/core/embed_managed.rb
|
|
283
346
|
- lib/parse/model/core/enhanced_change_tracking.rb
|
|
284
347
|
- lib/parse/model/core/errors.rb
|
|
285
348
|
- lib/parse/model/core/fetching.rb
|
|
@@ -290,6 +353,7 @@ files:
|
|
|
290
353
|
- lib/parse/model/core/querying.rb
|
|
291
354
|
- lib/parse/model/core/schema.rb
|
|
292
355
|
- lib/parse/model/core/search_indexing.rb
|
|
356
|
+
- lib/parse/model/core/vector_searchable.rb
|
|
293
357
|
- lib/parse/model/date.rb
|
|
294
358
|
- lib/parse/model/email.rb
|
|
295
359
|
- lib/parse/model/file.rb
|
|
@@ -305,6 +369,7 @@ files:
|
|
|
305
369
|
- lib/parse/model/time_zone.rb
|
|
306
370
|
- lib/parse/model/validations.rb
|
|
307
371
|
- lib/parse/model/validations/uniqueness_validator.rb
|
|
372
|
+
- lib/parse/model/vector.rb
|
|
308
373
|
- lib/parse/mongodb.rb
|
|
309
374
|
- lib/parse/pipeline_security.rb
|
|
310
375
|
- lib/parse/query.rb
|
|
@@ -331,12 +396,12 @@ files:
|
|
|
331
396
|
- lib/parse/stack/version.rb
|
|
332
397
|
- lib/parse/two_factor_auth.rb
|
|
333
398
|
- lib/parse/two_factor_auth/user_extension.rb
|
|
399
|
+
- lib/parse/vector_search.rb
|
|
334
400
|
- lib/parse/webhooks.rb
|
|
335
401
|
- lib/parse/webhooks/payload.rb
|
|
336
402
|
- lib/parse/webhooks/registration.rb
|
|
337
403
|
- lib/parse/webhooks/replay_protection.rb
|
|
338
404
|
- parse-stack-next.gemspec
|
|
339
|
-
- parse-stack.png
|
|
340
405
|
- scripts/debug-ips.js
|
|
341
406
|
- scripts/docker/Dockerfile.parse
|
|
342
407
|
- scripts/docker/atlas-init.js
|
|
@@ -347,6 +412,11 @@ files:
|
|
|
347
412
|
- scripts/start-parse.sh
|
|
348
413
|
- scripts/start_mcp_server.rb
|
|
349
414
|
- scripts/test_server_connection.rb
|
|
415
|
+
- scripts/vector_prototype/create_vector_index.js
|
|
416
|
+
- scripts/vector_prototype/fetch_embeddings.py
|
|
417
|
+
- scripts/vector_prototype/fixture_manifest.json
|
|
418
|
+
- scripts/vector_prototype/query_prototype.rb
|
|
419
|
+
- scripts/vector_prototype/run.sh
|
|
350
420
|
homepage: https://github.com/neurosynq/parse-stack-next
|
|
351
421
|
licenses:
|
|
352
422
|
- MIT
|
data/parse-stack.png
DELETED
|
Binary file
|