parse-stack-next 5.1.1 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.sample +12 -0
- data/.env.test +4 -4
- data/CHANGELOG.md +545 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +6 -1
- data/README.md +167 -38
- data/Rakefile +56 -10
- data/docs/atlas_vector_search_guide.md +110 -9
- data/docs/mcp_guide.md +433 -0
- data/docs/mongodb_direct_guide.md +66 -1
- data/docs/mongodb_index_optimization_guide.md +22 -1
- data/docs/usage_guide.md +15 -0
- data/lib/parse/agent/approval_gate.rb +0 -0
- data/lib/parse/agent/constraint_translator.rb +90 -19
- data/lib/parse/agent/describe.rb +1 -0
- data/lib/parse/agent/errors.rb +16 -0
- data/lib/parse/agent/mcp_client.rb +9 -0
- data/lib/parse/agent/mcp_dispatcher.rb +139 -7
- data/lib/parse/agent/mcp_rack_app.rb +621 -17
- data/lib/parse/agent/mcp_subscriptions.rb +607 -0
- data/lib/parse/agent/metadata_dsl.rb +58 -0
- data/lib/parse/agent/metadata_registry.rb +141 -1
- data/lib/parse/agent/prompt_hardening.rb +213 -0
- data/lib/parse/agent/result_formatter.rb +18 -3
- data/lib/parse/agent/tools.rb +167 -24
- data/lib/parse/agent.rb +692 -21
- data/lib/parse/client/request.rb +55 -4
- data/lib/parse/client/response.rb +4 -0
- data/lib/parse/client.rb +205 -7
- data/lib/parse/model/classes/installation.rb +27 -10
- data/lib/parse/model/classes/user.rb +8 -0
- data/lib/parse/model/core/actions.rb +58 -4
- data/lib/parse/model/core/embed_managed.rb +19 -14
- data/lib/parse/model/core/indexing.rb +108 -16
- data/lib/parse/model/core/querying.rb +29 -0
- data/lib/parse/model/model.rb +34 -3
- data/lib/parse/model/object.rb +1 -0
- data/lib/parse/query.rb +90 -24
- data/lib/parse/retrieval/agent_tool.rb +369 -0
- data/lib/parse/retrieval/chunk.rb +74 -0
- data/lib/parse/retrieval/chunker.rb +208 -0
- data/lib/parse/retrieval/retriever.rb +274 -0
- data/lib/parse/retrieval.rb +10 -0
- data/lib/parse/schema.rb +69 -20
- data/lib/parse/stack/version.rb +2 -2
- data/parse-stack-next.gemspec +1 -1
- data/scripts/docker/docker-compose.atlas.yml +14 -10
- data/scripts/docker/docker-compose.test.yml +24 -20
- data/scripts/docker/mongo-init.js +3 -3
- data/scripts/start-parse.sh +10 -0
- data/scripts/start_mcp_server.rb +1 -1
- data/scripts/test_server_connection.rb +1 -1
- data/scripts/vector_prototype/create_vector_index.js +1 -1
- data/scripts/vector_prototype/fetch_embeddings.py +2 -2
- data/scripts/vector_prototype/query_prototype.rb +1 -1
- data/scripts/vector_prototype/run.sh +4 -4
- metadata +10 -2
data/lib/parse/agent/tools.rb
CHANGED
|
@@ -1096,9 +1096,18 @@ module Parse
|
|
|
1096
1096
|
# Treat the handler list as part of your application's trust boundary:
|
|
1097
1097
|
# register at boot from code you control; never accept registrations
|
|
1098
1098
|
# from configuration files at runtime.
|
|
1099
|
-
def register(name:, description:, parameters:,
|
|
1099
|
+
def register(name:, description:, parameters:, handler:,
|
|
1100
|
+
permission: nil, permissions: nil,
|
|
1100
1101
|
timeout: DEFAULT_TIMEOUT, output_schema: nil, category: "custom",
|
|
1101
1102
|
client_safe: false)
|
|
1103
|
+
# Accept `permissions:` as an alias for the canonical `permission:`
|
|
1104
|
+
# (Agent.new uses the plural, so callers mix them up). `permission:`
|
|
1105
|
+
# remains effectively required — just no longer a hard keyword so the
|
|
1106
|
+
# alias can satisfy it.
|
|
1107
|
+
permission ||= permissions
|
|
1108
|
+
if permission.nil?
|
|
1109
|
+
raise ArgumentError, "permission: is required (:readonly, :write, or :admin)"
|
|
1110
|
+
end
|
|
1102
1111
|
unless %i[readonly write admin].include?(permission)
|
|
1103
1112
|
raise ArgumentError, "permission must be :readonly, :write, or :admin (got #{permission.inspect})"
|
|
1104
1113
|
end
|
|
@@ -1442,6 +1451,7 @@ module Parse
|
|
|
1442
1451
|
|
|
1443
1452
|
# Enrich with local model metadata (descriptions, agent methods)
|
|
1444
1453
|
enriched = MetadataRegistry.enriched_schemas(schemas, agent_permission: agent.permissions)
|
|
1454
|
+
enriched = enriched.map { |s| Parse::Agent::PromptHardening.sanitize_schema_for_llm(s) } if enriched.is_a?(Array)
|
|
1445
1455
|
|
|
1446
1456
|
ResultFormatter.format_schemas(enriched)
|
|
1447
1457
|
end
|
|
@@ -1823,11 +1833,26 @@ module Parse
|
|
|
1823
1833
|
field = scope[:field].to_s
|
|
1824
1834
|
value = scope[:value]
|
|
1825
1835
|
# Parse Server returns camelCase field names on the wire (e.g. orgId for
|
|
1826
|
-
# the Ruby field org_id).
|
|
1827
|
-
#
|
|
1836
|
+
# the Ruby field org_id). A mongo-direct hit (semantic_search's raw
|
|
1837
|
+
# $vectorSearch path) instead carries the field under its STORAGE column
|
|
1838
|
+
# — which is the class's explicit `field_map` alias when one is declared,
|
|
1839
|
+
# NOT the camelized form. Check all three forms (snake, naive-camel, and
|
|
1840
|
+
# the field_map alias) so this gate resolves the scope column the SAME way
|
|
1841
|
+
# the pre-search filter (Parse::Retrieval.wire_name) did. Otherwise a
|
|
1842
|
+
# field_map'd scope field reads as nil here and fails closed on records
|
|
1843
|
+
# that legitimately belong to the tenant. field_map values may be symbols,
|
|
1844
|
+
# so stringify; an unregistered/system class (find_class -> nil) falls
|
|
1845
|
+
# back to the snake/camel pair.
|
|
1828
1846
|
camel_field = field.gsub(/_([a-z])/) { Regexp.last_match(1).upcase }
|
|
1847
|
+
# Assign unconditionally (the modifier-if is the RHS, yielding nil when
|
|
1848
|
+
# false) so neither local is ever read before initialization.
|
|
1849
|
+
klass = (Parse::Model.find_class(class_name) if defined?(Parse::Model))
|
|
1850
|
+
mapped = (klass.field_map[field.to_sym].to_s if klass.respond_to?(:field_map))
|
|
1829
1851
|
rec_value = if record.is_a?(Hash)
|
|
1830
|
-
|
|
1852
|
+
keys = [field, camel_field]
|
|
1853
|
+
keys << mapped if mapped && !mapped.empty?
|
|
1854
|
+
found = keys.find { |k| record.key?(k) }
|
|
1855
|
+
record[found] if found
|
|
1831
1856
|
end
|
|
1832
1857
|
unless rec_value == value
|
|
1833
1858
|
raise Parse::Agent::AccessDenied.new(
|
|
@@ -2885,15 +2910,77 @@ module Parse
|
|
|
2885
2910
|
response = agent.client.schema(class_name)
|
|
2886
2911
|
|
|
2887
2912
|
unless response.success?
|
|
2888
|
-
|
|
2913
|
+
# Raise a ValidationError (not a bare RuntimeError) so the message
|
|
2914
|
+
# — including the did-you-mean hint — reaches the LLM via
|
|
2915
|
+
# error_response instead of being collapsed to a generic
|
|
2916
|
+
# "internal error" by the sanitizing StandardError rescue. A
|
|
2917
|
+
# mistyped class name is the common cause; suggesting near matches
|
|
2918
|
+
# lets the model self-correct in one retry instead of falling back
|
|
2919
|
+
# to a full get_all_schemas sweep.
|
|
2920
|
+
suggestions = suggest_class_names(class_name, agent: agent)
|
|
2921
|
+
hint = suggestions.empty? ? "" : " Did you mean: #{suggestions.join(", ")}?"
|
|
2922
|
+
raise Parse::Agent::ValidationError,
|
|
2923
|
+
"Could not fetch schema for '#{class_name}'.#{hint}"
|
|
2889
2924
|
end
|
|
2890
2925
|
|
|
2891
2926
|
# Enrich with local model metadata (descriptions, agent methods)
|
|
2892
2927
|
enriched = MetadataRegistry.enriched_schema(class_name, response.result, agent_permission: agent.permissions)
|
|
2928
|
+
enriched = Parse::Agent::PromptHardening.sanitize_schema_for_llm(enriched)
|
|
2893
2929
|
|
|
2894
2930
|
ResultFormatter.format_schema(enriched)
|
|
2895
2931
|
end
|
|
2896
2932
|
|
|
2933
|
+
# Locally-known Parse class names usable as did-you-mean candidates:
|
|
2934
|
+
# MetadataRegistry-visible classes plus every loaded Parse::Object
|
|
2935
|
+
# subclass, minus agent_hidden classes. Cheap; only called on the
|
|
2936
|
+
# get_schema error path.
|
|
2937
|
+
def known_class_names_for_suggestions(agent = nil)
|
|
2938
|
+
names = []
|
|
2939
|
+
reg = Parse::Agent::MetadataRegistry
|
|
2940
|
+
names.concat(Array(reg.visible_class_names)) if reg.respond_to?(:visible_class_names)
|
|
2941
|
+
if defined?(Parse::Object) && Parse::Object.respond_to?(:descendants)
|
|
2942
|
+
Parse::Object.descendants.each do |klass|
|
|
2943
|
+
names << klass.parse_class if klass.respond_to?(:parse_class)
|
|
2944
|
+
end
|
|
2945
|
+
end
|
|
2946
|
+
hidden = reg.respond_to?(:hidden_class_names) ? Array(reg.hidden_class_names) : []
|
|
2947
|
+
names.compact.map(&:to_s).uniq - hidden.map(&:to_s)
|
|
2948
|
+
end
|
|
2949
|
+
module_function :known_class_names_for_suggestions
|
|
2950
|
+
|
|
2951
|
+
# Up to `limit` known class names within a small edit distance of the
|
|
2952
|
+
# (likely mistyped) `class_name`. Bounded threshold keeps unrelated
|
|
2953
|
+
# names out of the suggestion list.
|
|
2954
|
+
def suggest_class_names(class_name, agent: nil, limit: 3)
|
|
2955
|
+
target = class_name.to_s.downcase
|
|
2956
|
+
return [] if target.empty?
|
|
2957
|
+
threshold = [3, (target.length / 2.0).ceil].max
|
|
2958
|
+
known_class_names_for_suggestions(agent)
|
|
2959
|
+
.map { |name| [name, name_edit_distance(target, name.downcase)] }
|
|
2960
|
+
.select { |(_, dist)| dist <= threshold }
|
|
2961
|
+
.sort_by { |(name, dist)| [dist, name] }
|
|
2962
|
+
.first(limit)
|
|
2963
|
+
.map(&:first)
|
|
2964
|
+
end
|
|
2965
|
+
module_function :suggest_class_names
|
|
2966
|
+
|
|
2967
|
+
# Compact iterative Levenshtein distance.
|
|
2968
|
+
def name_edit_distance(a, b)
|
|
2969
|
+
return b.length if a.empty?
|
|
2970
|
+
return a.length if b.empty?
|
|
2971
|
+
prev = (0..b.length).to_a
|
|
2972
|
+
a.each_char.with_index do |ca, i|
|
|
2973
|
+
cur = [i + 1]
|
|
2974
|
+
b.each_char.with_index do |cb, j|
|
|
2975
|
+
cost = ca == cb ? 0 : 1
|
|
2976
|
+
cur << [cur[j] + 1, prev[j + 1] + 1, prev[j] + cost].min
|
|
2977
|
+
end
|
|
2978
|
+
prev = cur
|
|
2979
|
+
end
|
|
2980
|
+
prev[b.length]
|
|
2981
|
+
end
|
|
2982
|
+
module_function :name_edit_distance
|
|
2983
|
+
|
|
2897
2984
|
# ============================================================
|
|
2898
2985
|
# QUERY TOOLS
|
|
2899
2986
|
# ============================================================
|
|
@@ -3034,11 +3121,15 @@ module Parse
|
|
|
3034
3121
|
if normalized_format && normalized_format != "json"
|
|
3035
3122
|
format_query_results_as(normalized_format, class_name, results)
|
|
3036
3123
|
else
|
|
3037
|
-
ResultFormatter.format_query_results(class_name, results,
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3124
|
+
formatted = ResultFormatter.format_query_results(class_name, results,
|
|
3125
|
+
limit: limit, skip: skip || 0,
|
|
3126
|
+
where: where, keys: keys,
|
|
3127
|
+
order: order, include: include,
|
|
3128
|
+
truncated_include_fields: truncated_includes)
|
|
3129
|
+
if formatted.is_a?(Hash) && formatted[:results].is_a?(Array)
|
|
3130
|
+
stamp_source!(formatted[:results], class_name: class_name, tool: :query_class)
|
|
3131
|
+
end
|
|
3132
|
+
formatted
|
|
3042
3133
|
end
|
|
3043
3134
|
end
|
|
3044
3135
|
end
|
|
@@ -3368,12 +3459,19 @@ module Parse
|
|
|
3368
3459
|
oid = obj.is_a?(Hash) ? (obj["objectId"] || obj[:objectId]) : obj.id
|
|
3369
3460
|
h[oid] = obj
|
|
3370
3461
|
end
|
|
3462
|
+
stamp_source!(objects_by_id.values, class_name: class_name, tool: :get_objects)
|
|
3371
3463
|
|
|
3372
3464
|
missing = unique_ids.reject { |id| objects_by_id.key?(id) }
|
|
3373
3465
|
|
|
3466
|
+
# Normalize each row to the same LLM-friendly form query_class
|
|
3467
|
+
# emits (Pointers -> {_type,class,id}, Dates -> ISO, ACL stripped)
|
|
3468
|
+
# instead of shipping raw wire-form. Done after stamp_source! so
|
|
3469
|
+
# the `_source` citation survives.
|
|
3470
|
+
simplified = objects_by_id.transform_values { |obj| ResultFormatter.simplify_object(obj) }
|
|
3471
|
+
|
|
3374
3472
|
envelope = {
|
|
3375
3473
|
class_name: class_name,
|
|
3376
|
-
objects:
|
|
3474
|
+
objects: simplified,
|
|
3377
3475
|
missing: missing,
|
|
3378
3476
|
requested: unique_ids.size,
|
|
3379
3477
|
found: objects_by_id.size,
|
|
@@ -3553,13 +3651,14 @@ module Parse
|
|
|
3553
3651
|
|
|
3554
3652
|
# Parse Server's REST aggregate endpoint does NOT enforce per-row
|
|
3555
3653
|
# ACL — only the SDK's mongo-direct path applies the `_rperm`
|
|
3556
|
-
# `$match` injection via Parse::ACLScope. For
|
|
3557
|
-
# (session_token / acl_user / acl_role
|
|
3654
|
+
# `$match` injection via Parse::ACLScope. For any non-master
|
|
3655
|
+
# identity (session_token / acl_user / acl_role, including a
|
|
3656
|
+
# runtime #impersonate that cleared @acl_scope), the caller's
|
|
3558
3657
|
# `mongo_direct: false` would silently bypass the agent's
|
|
3559
3658
|
# declared scope; auto-promote to mongo-direct so the ACLScope
|
|
3560
3659
|
# enforcement runs. Master-key agents keep their REST path
|
|
3561
3660
|
# (no ACL enforcement was claimed in the first place).
|
|
3562
|
-
if !use_mongo_direct && agent.respond_to?(:
|
|
3661
|
+
if !use_mongo_direct && agent.respond_to?(:requires_mongo_direct?) && agent.requires_mongo_direct? &&
|
|
3563
3662
|
defined?(Parse::MongoDB) && Parse::MongoDB.enabled?
|
|
3564
3663
|
use_mongo_direct = true
|
|
3565
3664
|
end
|
|
@@ -3614,6 +3713,9 @@ module Parse
|
|
|
3614
3713
|
# losing information. Opt-out via `compact_pointers: false` when
|
|
3615
3714
|
# the caller specifically needs the raw Parse-on-Mongo shape.
|
|
3616
3715
|
pointer_map = compact_pointers ? compact_pointers!(results) : {}
|
|
3716
|
+
# Stamp provenance AFTER compaction/redaction. Grouped rows have
|
|
3717
|
+
# no objectId — `_source.object_id` is nil for those (documented).
|
|
3718
|
+
stamp_source!(results, class_name: class_name, tool: :aggregate)
|
|
3617
3719
|
|
|
3618
3720
|
result = {
|
|
3619
3721
|
class_name: class_name,
|
|
@@ -4209,10 +4311,11 @@ module Parse
|
|
|
4209
4311
|
# Parse Server's REST aggregate endpoint does NOT enforce per-row
|
|
4210
4312
|
# ACL — only the SDK's mongo-direct path applies the _rperm match
|
|
4211
4313
|
# injection via Parse::ACLScope. So we must route through
|
|
4212
|
-
# mongo-direct for ANY
|
|
4213
|
-
# acl_role
|
|
4214
|
-
#
|
|
4215
|
-
|
|
4314
|
+
# mongo-direct for ANY non-master identity (session_token,
|
|
4315
|
+
# acl_user, acl_role, including a runtime-impersonated agent whose
|
|
4316
|
+
# @acl_scope was cleared), not just acl_user/acl_role. Master-key
|
|
4317
|
+
# agents keep the REST path because they've already opted out of ACL.
|
|
4318
|
+
use_direct = agent.respond_to?(:requires_mongo_direct?) && agent.requires_mongo_direct? &&
|
|
4216
4319
|
defined?(Parse::MongoDB) && Parse::MongoDB.enabled?
|
|
4217
4320
|
|
|
4218
4321
|
with_timeout(tool) do
|
|
@@ -4561,10 +4664,12 @@ module Parse
|
|
|
4561
4664
|
# clips the underlying query too.
|
|
4562
4665
|
effective_pipeline, _auto_limited = ensure_aggregate_terminal_limit(scoped_pipeline)
|
|
4563
4666
|
|
|
4564
|
-
# Route to mongo-direct
|
|
4565
|
-
#
|
|
4566
|
-
|
|
4567
|
-
|
|
4667
|
+
# Route to mongo-direct for ANY non-master identity. The REST
|
|
4668
|
+
# aggregate endpoint enforces no ACL, so a session-token agent's
|
|
4669
|
+
# REST aggregate would run unscoped — only master-key agents
|
|
4670
|
+
# (which opted out of ACL) keep the REST path.
|
|
4671
|
+
use_direct = agent.respond_to?(:requires_mongo_direct?) &&
|
|
4672
|
+
agent.requires_mongo_direct? &&
|
|
4568
4673
|
defined?(Parse::MongoDB) && Parse::MongoDB.enabled?
|
|
4569
4674
|
|
|
4570
4675
|
rows = nil
|
|
@@ -5361,6 +5466,37 @@ module Parse
|
|
|
5361
5466
|
end
|
|
5362
5467
|
module_function :project_object_to_allowlist
|
|
5363
5468
|
|
|
5469
|
+
# Stamp each row hash with an SDK-added `_source` provenance
|
|
5470
|
+
# citation `{ "class", "tool", "object_id" }`. No-op unless
|
|
5471
|
+
# `Parse::Agent.include_source_provenance?`. MUST be called AFTER
|
|
5472
|
+
# field-allowlist projection and hidden-class redaction: `_source`
|
|
5473
|
+
# is SDK metadata, not a Parse field, so stamping last keeps it out
|
|
5474
|
+
# of (and safe from) those gates. Idempotent — a row already
|
|
5475
|
+
# carrying `_source` is left untouched. `object_id` is nil-safe
|
|
5476
|
+
# (aggregation/group rows have no objectId).
|
|
5477
|
+
#
|
|
5478
|
+
# @param rows [Array<Hash>] row hashes (mutated in place).
|
|
5479
|
+
# @param class_name [String]
|
|
5480
|
+
# @param tool [Symbol, String]
|
|
5481
|
+
# @param id_key [String] the row key holding the objectId.
|
|
5482
|
+
# @return [Array<Hash>] the same rows.
|
|
5483
|
+
def stamp_source!(rows, class_name:, tool:, id_key: "objectId")
|
|
5484
|
+
return rows unless Parse::Agent.include_source_provenance?
|
|
5485
|
+
return rows unless rows.is_a?(Array)
|
|
5486
|
+
rows.each do |row|
|
|
5487
|
+
next unless row.is_a?(Hash)
|
|
5488
|
+
next if row.key?("_source") || row.key?(:_source)
|
|
5489
|
+
oid = row[id_key] || row[id_key.to_sym]
|
|
5490
|
+
row["_source"] = {
|
|
5491
|
+
"class" => class_name.to_s,
|
|
5492
|
+
"tool" => tool.to_s,
|
|
5493
|
+
"object_id" => oid,
|
|
5494
|
+
}
|
|
5495
|
+
end
|
|
5496
|
+
rows
|
|
5497
|
+
end
|
|
5498
|
+
module_function :stamp_source!
|
|
5499
|
+
|
|
5364
5500
|
# ============================================================
|
|
5365
5501
|
# ATLAS SEARCH TOOLS
|
|
5366
5502
|
# ============================================================
|
|
@@ -5817,6 +5953,10 @@ module Parse
|
|
|
5817
5953
|
rows = result.results.map do |obj|
|
|
5818
5954
|
row = serialize_atlas_object(obj)
|
|
5819
5955
|
row = row.select { |k, _| permitted.include?(k.to_s) } if permitted
|
|
5956
|
+
# Normalize to query_class's LLM-friendly form (compact pointers,
|
|
5957
|
+
# ISO dates, ACL stripped) instead of raw wire-form. Done before
|
|
5958
|
+
# the SDK-added score/highlights so those stay verbatim.
|
|
5959
|
+
row = ResultFormatter.simplify_object(row)
|
|
5820
5960
|
row["score"] = obj.search_score if obj.respond_to?(:search_score) && obj.search_score
|
|
5821
5961
|
if highlight_field && obj.respond_to?(:search_highlights) && obj.search_highlights
|
|
5822
5962
|
highlights = filter_atlas_highlights(obj.search_highlights, permitted)
|
|
@@ -5824,6 +5964,7 @@ module Parse
|
|
|
5824
5964
|
end
|
|
5825
5965
|
row
|
|
5826
5966
|
end
|
|
5967
|
+
stamp_source!(rows, class_name: class_name, tool: :atlas_text_search)
|
|
5827
5968
|
|
|
5828
5969
|
{
|
|
5829
5970
|
class_name: class_name,
|
|
@@ -5839,7 +5980,8 @@ module Parse
|
|
|
5839
5980
|
|
|
5840
5981
|
rows = (result.results || []).map do |obj|
|
|
5841
5982
|
row = serialize_atlas_object(obj)
|
|
5842
|
-
|
|
5983
|
+
row = row.select { |k, _| permitted.include?(k.to_s) } if permitted
|
|
5984
|
+
ResultFormatter.simplify_object(row)
|
|
5843
5985
|
end
|
|
5844
5986
|
|
|
5845
5987
|
{
|
|
@@ -5858,7 +6000,8 @@ module Parse
|
|
|
5858
6000
|
|
|
5859
6001
|
rows = (result.results || []).map do |obj|
|
|
5860
6002
|
row = serialize_atlas_object(obj)
|
|
5861
|
-
|
|
6003
|
+
row = row.select { |k, _| permitted.include?(k.to_s) } if permitted
|
|
6004
|
+
ResultFormatter.simplify_object(row)
|
|
5862
6005
|
end
|
|
5863
6006
|
|
|
5864
6007
|
{
|