parse-stack-next 5.1.1 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.env.sample +12 -0
  3. data/.env.test +4 -4
  4. data/CHANGELOG.md +545 -0
  5. data/Gemfile +3 -0
  6. data/Gemfile.lock +6 -1
  7. data/README.md +167 -38
  8. data/Rakefile +56 -10
  9. data/docs/atlas_vector_search_guide.md +110 -9
  10. data/docs/mcp_guide.md +433 -0
  11. data/docs/mongodb_direct_guide.md +66 -1
  12. data/docs/mongodb_index_optimization_guide.md +22 -1
  13. data/docs/usage_guide.md +15 -0
  14. data/lib/parse/agent/approval_gate.rb +0 -0
  15. data/lib/parse/agent/constraint_translator.rb +90 -19
  16. data/lib/parse/agent/describe.rb +1 -0
  17. data/lib/parse/agent/errors.rb +16 -0
  18. data/lib/parse/agent/mcp_client.rb +9 -0
  19. data/lib/parse/agent/mcp_dispatcher.rb +139 -7
  20. data/lib/parse/agent/mcp_rack_app.rb +621 -17
  21. data/lib/parse/agent/mcp_subscriptions.rb +607 -0
  22. data/lib/parse/agent/metadata_dsl.rb +58 -0
  23. data/lib/parse/agent/metadata_registry.rb +141 -1
  24. data/lib/parse/agent/prompt_hardening.rb +213 -0
  25. data/lib/parse/agent/result_formatter.rb +18 -3
  26. data/lib/parse/agent/tools.rb +167 -24
  27. data/lib/parse/agent.rb +692 -21
  28. data/lib/parse/client/request.rb +55 -4
  29. data/lib/parse/client/response.rb +4 -0
  30. data/lib/parse/client.rb +205 -7
  31. data/lib/parse/model/classes/installation.rb +27 -10
  32. data/lib/parse/model/classes/user.rb +8 -0
  33. data/lib/parse/model/core/actions.rb +58 -4
  34. data/lib/parse/model/core/embed_managed.rb +19 -14
  35. data/lib/parse/model/core/indexing.rb +108 -16
  36. data/lib/parse/model/core/querying.rb +29 -0
  37. data/lib/parse/model/model.rb +34 -3
  38. data/lib/parse/model/object.rb +1 -0
  39. data/lib/parse/query.rb +90 -24
  40. data/lib/parse/retrieval/agent_tool.rb +369 -0
  41. data/lib/parse/retrieval/chunk.rb +74 -0
  42. data/lib/parse/retrieval/chunker.rb +208 -0
  43. data/lib/parse/retrieval/retriever.rb +274 -0
  44. data/lib/parse/retrieval.rb +10 -0
  45. data/lib/parse/schema.rb +69 -20
  46. data/lib/parse/stack/version.rb +2 -2
  47. data/parse-stack-next.gemspec +1 -1
  48. data/scripts/docker/docker-compose.atlas.yml +14 -10
  49. data/scripts/docker/docker-compose.test.yml +24 -20
  50. data/scripts/docker/mongo-init.js +3 -3
  51. data/scripts/start-parse.sh +10 -0
  52. data/scripts/start_mcp_server.rb +1 -1
  53. data/scripts/test_server_connection.rb +1 -1
  54. data/scripts/vector_prototype/create_vector_index.js +1 -1
  55. data/scripts/vector_prototype/fetch_embeddings.py +2 -2
  56. data/scripts/vector_prototype/query_prototype.rb +1 -1
  57. data/scripts/vector_prototype/run.sh +4 -4
  58. metadata +10 -2
@@ -1096,9 +1096,18 @@ module Parse
1096
1096
  # Treat the handler list as part of your application's trust boundary:
1097
1097
  # register at boot from code you control; never accept registrations
1098
1098
  # from configuration files at runtime.
1099
- def register(name:, description:, parameters:, permission:, handler:,
1099
+ def register(name:, description:, parameters:, handler:,
1100
+ permission: nil, permissions: nil,
1100
1101
  timeout: DEFAULT_TIMEOUT, output_schema: nil, category: "custom",
1101
1102
  client_safe: false)
1103
+ # Accept `permissions:` as an alias for the canonical `permission:`
1104
+ # (Agent.new uses the plural, so callers mix them up). `permission:`
1105
+ # remains effectively required — just no longer a hard keyword so the
1106
+ # alias can satisfy it.
1107
+ permission ||= permissions
1108
+ if permission.nil?
1109
+ raise ArgumentError, "permission: is required (:readonly, :write, or :admin)"
1110
+ end
1102
1111
  unless %i[readonly write admin].include?(permission)
1103
1112
  raise ArgumentError, "permission must be :readonly, :write, or :admin (got #{permission.inspect})"
1104
1113
  end
@@ -1442,6 +1451,7 @@ module Parse
1442
1451
 
1443
1452
  # Enrich with local model metadata (descriptions, agent methods)
1444
1453
  enriched = MetadataRegistry.enriched_schemas(schemas, agent_permission: agent.permissions)
1454
+ enriched = enriched.map { |s| Parse::Agent::PromptHardening.sanitize_schema_for_llm(s) } if enriched.is_a?(Array)
1445
1455
 
1446
1456
  ResultFormatter.format_schemas(enriched)
1447
1457
  end
@@ -1823,11 +1833,26 @@ module Parse
1823
1833
  field = scope[:field].to_s
1824
1834
  value = scope[:value]
1825
1835
  # Parse Server returns camelCase field names on the wire (e.g. orgId for
1826
- # the Ruby field org_id). Check both forms so the gate works regardless
1827
- # of whether the record came from a real server or a test fake.
1836
+ # the Ruby field org_id). A mongo-direct hit (semantic_search's raw
1837
+ # $vectorSearch path) instead carries the field under its STORAGE column
1838
+ # — which is the class's explicit `field_map` alias when one is declared,
1839
+ # NOT the camelized form. Check all three forms (snake, naive-camel, and
1840
+ # the field_map alias) so this gate resolves the scope column the SAME way
1841
+ # the pre-search filter (Parse::Retrieval.wire_name) did. Otherwise a
1842
+ # field_map'd scope field reads as nil here and fails closed on records
1843
+ # that legitimately belong to the tenant. field_map values may be symbols,
1844
+ # so stringify; an unregistered/system class (find_class -> nil) falls
1845
+ # back to the snake/camel pair.
1828
1846
  camel_field = field.gsub(/_([a-z])/) { Regexp.last_match(1).upcase }
1847
+ # Assign unconditionally (the modifier-if is the RHS, yielding nil when
1848
+ # false) so neither local is ever read before initialization.
1849
+ klass = (Parse::Model.find_class(class_name) if defined?(Parse::Model))
1850
+ mapped = (klass.field_map[field.to_sym].to_s if klass.respond_to?(:field_map))
1829
1851
  rec_value = if record.is_a?(Hash)
1830
- record.key?(field) ? record[field] : record[camel_field]
1852
+ keys = [field, camel_field]
1853
+ keys << mapped if mapped && !mapped.empty?
1854
+ found = keys.find { |k| record.key?(k) }
1855
+ record[found] if found
1831
1856
  end
1832
1857
  unless rec_value == value
1833
1858
  raise Parse::Agent::AccessDenied.new(
@@ -2885,15 +2910,77 @@ module Parse
2885
2910
  response = agent.client.schema(class_name)
2886
2911
 
2887
2912
  unless response.success?
2888
- raise "Failed to fetch schema for '#{class_name}': #{response.error}"
2913
+ # Raise a ValidationError (not a bare RuntimeError) so the message
2914
+ # — including the did-you-mean hint — reaches the LLM via
2915
+ # error_response instead of being collapsed to a generic
2916
+ # "internal error" by the sanitizing StandardError rescue. A
2917
+ # mistyped class name is the common cause; suggesting near matches
2918
+ # lets the model self-correct in one retry instead of falling back
2919
+ # to a full get_all_schemas sweep.
2920
+ suggestions = suggest_class_names(class_name, agent: agent)
2921
+ hint = suggestions.empty? ? "" : " Did you mean: #{suggestions.join(", ")}?"
2922
+ raise Parse::Agent::ValidationError,
2923
+ "Could not fetch schema for '#{class_name}'.#{hint}"
2889
2924
  end
2890
2925
 
2891
2926
  # Enrich with local model metadata (descriptions, agent methods)
2892
2927
  enriched = MetadataRegistry.enriched_schema(class_name, response.result, agent_permission: agent.permissions)
2928
+ enriched = Parse::Agent::PromptHardening.sanitize_schema_for_llm(enriched)
2893
2929
 
2894
2930
  ResultFormatter.format_schema(enriched)
2895
2931
  end
2896
2932
 
2933
+ # Locally-known Parse class names usable as did-you-mean candidates:
2934
+ # MetadataRegistry-visible classes plus every loaded Parse::Object
2935
+ # subclass, minus agent_hidden classes. Cheap; only called on the
2936
+ # get_schema error path.
2937
+ def known_class_names_for_suggestions(agent = nil)
2938
+ names = []
2939
+ reg = Parse::Agent::MetadataRegistry
2940
+ names.concat(Array(reg.visible_class_names)) if reg.respond_to?(:visible_class_names)
2941
+ if defined?(Parse::Object) && Parse::Object.respond_to?(:descendants)
2942
+ Parse::Object.descendants.each do |klass|
2943
+ names << klass.parse_class if klass.respond_to?(:parse_class)
2944
+ end
2945
+ end
2946
+ hidden = reg.respond_to?(:hidden_class_names) ? Array(reg.hidden_class_names) : []
2947
+ names.compact.map(&:to_s).uniq - hidden.map(&:to_s)
2948
+ end
2949
+ module_function :known_class_names_for_suggestions
2950
+
2951
+ # Up to `limit` known class names within a small edit distance of the
2952
+ # (likely mistyped) `class_name`. Bounded threshold keeps unrelated
2953
+ # names out of the suggestion list.
2954
+ def suggest_class_names(class_name, agent: nil, limit: 3)
2955
+ target = class_name.to_s.downcase
2956
+ return [] if target.empty?
2957
+ threshold = [3, (target.length / 2.0).ceil].max
2958
+ known_class_names_for_suggestions(agent)
2959
+ .map { |name| [name, name_edit_distance(target, name.downcase)] }
2960
+ .select { |(_, dist)| dist <= threshold }
2961
+ .sort_by { |(name, dist)| [dist, name] }
2962
+ .first(limit)
2963
+ .map(&:first)
2964
+ end
2965
+ module_function :suggest_class_names
2966
+
2967
+ # Compact iterative Levenshtein distance.
2968
+ def name_edit_distance(a, b)
2969
+ return b.length if a.empty?
2970
+ return a.length if b.empty?
2971
+ prev = (0..b.length).to_a
2972
+ a.each_char.with_index do |ca, i|
2973
+ cur = [i + 1]
2974
+ b.each_char.with_index do |cb, j|
2975
+ cost = ca == cb ? 0 : 1
2976
+ cur << [cur[j] + 1, prev[j + 1] + 1, prev[j] + cost].min
2977
+ end
2978
+ prev = cur
2979
+ end
2980
+ prev[b.length]
2981
+ end
2982
+ module_function :name_edit_distance
2983
+
2897
2984
  # ============================================================
2898
2985
  # QUERY TOOLS
2899
2986
  # ============================================================
@@ -3034,11 +3121,15 @@ module Parse
3034
3121
  if normalized_format && normalized_format != "json"
3035
3122
  format_query_results_as(normalized_format, class_name, results)
3036
3123
  else
3037
- ResultFormatter.format_query_results(class_name, results,
3038
- limit: limit, skip: skip || 0,
3039
- where: where, keys: keys,
3040
- order: order, include: include,
3041
- truncated_include_fields: truncated_includes)
3124
+ formatted = ResultFormatter.format_query_results(class_name, results,
3125
+ limit: limit, skip: skip || 0,
3126
+ where: where, keys: keys,
3127
+ order: order, include: include,
3128
+ truncated_include_fields: truncated_includes)
3129
+ if formatted.is_a?(Hash) && formatted[:results].is_a?(Array)
3130
+ stamp_source!(formatted[:results], class_name: class_name, tool: :query_class)
3131
+ end
3132
+ formatted
3042
3133
  end
3043
3134
  end
3044
3135
  end
@@ -3368,12 +3459,19 @@ module Parse
3368
3459
  oid = obj.is_a?(Hash) ? (obj["objectId"] || obj[:objectId]) : obj.id
3369
3460
  h[oid] = obj
3370
3461
  end
3462
+ stamp_source!(objects_by_id.values, class_name: class_name, tool: :get_objects)
3371
3463
 
3372
3464
  missing = unique_ids.reject { |id| objects_by_id.key?(id) }
3373
3465
 
3466
+ # Normalize each row to the same LLM-friendly form query_class
3467
+ # emits (Pointers -> {_type,class,id}, Dates -> ISO, ACL stripped)
3468
+ # instead of shipping raw wire-form. Done after stamp_source! so
3469
+ # the `_source` citation survives.
3470
+ simplified = objects_by_id.transform_values { |obj| ResultFormatter.simplify_object(obj) }
3471
+
3374
3472
  envelope = {
3375
3473
  class_name: class_name,
3376
- objects: objects_by_id,
3474
+ objects: simplified,
3377
3475
  missing: missing,
3378
3476
  requested: unique_ids.size,
3379
3477
  found: objects_by_id.size,
@@ -3553,13 +3651,14 @@ module Parse
3553
3651
 
3554
3652
  # Parse Server's REST aggregate endpoint does NOT enforce per-row
3555
3653
  # ACL — only the SDK's mongo-direct path applies the `_rperm`
3556
- # `$match` injection via Parse::ACLScope. For a scoped agent
3557
- # (session_token / acl_user / acl_role), the caller's
3654
+ # `$match` injection via Parse::ACLScope. For any non-master
3655
+ # identity (session_token / acl_user / acl_role, including a
3656
+ # runtime #impersonate that cleared @acl_scope), the caller's
3558
3657
  # `mongo_direct: false` would silently bypass the agent's
3559
3658
  # declared scope; auto-promote to mongo-direct so the ACLScope
3560
3659
  # enforcement runs. Master-key agents keep their REST path
3561
3660
  # (no ACL enforcement was claimed in the first place).
3562
- if !use_mongo_direct && agent.respond_to?(:acl_scope?) && agent.acl_scope? &&
3661
+ if !use_mongo_direct && agent.respond_to?(:requires_mongo_direct?) && agent.requires_mongo_direct? &&
3563
3662
  defined?(Parse::MongoDB) && Parse::MongoDB.enabled?
3564
3663
  use_mongo_direct = true
3565
3664
  end
@@ -3614,6 +3713,9 @@ module Parse
3614
3713
  # losing information. Opt-out via `compact_pointers: false` when
3615
3714
  # the caller specifically needs the raw Parse-on-Mongo shape.
3616
3715
  pointer_map = compact_pointers ? compact_pointers!(results) : {}
3716
+ # Stamp provenance AFTER compaction/redaction. Grouped rows have
3717
+ # no objectId — `_source.object_id` is nil for those (documented).
3718
+ stamp_source!(results, class_name: class_name, tool: :aggregate)
3617
3719
 
3618
3720
  result = {
3619
3721
  class_name: class_name,
@@ -4209,10 +4311,11 @@ module Parse
4209
4311
  # Parse Server's REST aggregate endpoint does NOT enforce per-row
4210
4312
  # ACL — only the SDK's mongo-direct path applies the _rperm match
4211
4313
  # injection via Parse::ACLScope. So we must route through
4212
- # mongo-direct for ANY scoped agent (session_token, acl_user,
4213
- # acl_role), not just acl_user/acl_role. Master-key agents keep
4214
- # the REST path because they've already opted out of ACL.
4215
- use_direct = agent.respond_to?(:acl_scope?) && agent.acl_scope? &&
4314
+ # mongo-direct for ANY non-master identity (session_token,
4315
+ # acl_user, acl_role, including a runtime-impersonated agent whose
4316
+ # @acl_scope was cleared), not just acl_user/acl_role. Master-key
4317
+ # agents keep the REST path because they've already opted out of ACL.
4318
+ use_direct = agent.respond_to?(:requires_mongo_direct?) && agent.requires_mongo_direct? &&
4216
4319
  defined?(Parse::MongoDB) && Parse::MongoDB.enabled?
4217
4320
 
4218
4321
  with_timeout(tool) do
@@ -4561,10 +4664,12 @@ module Parse
4561
4664
  # clips the underlying query too.
4562
4665
  effective_pipeline, _auto_limited = ensure_aggregate_terminal_limit(scoped_pipeline)
4563
4666
 
4564
- # Route to mongo-direct under acl_user/acl_role scope; otherwise
4565
- # the existing REST path handles session_token / master-key.
4566
- use_direct = agent.respond_to?(:acl_scope_requires_direct?) &&
4567
- agent.acl_scope_requires_direct? &&
4667
+ # Route to mongo-direct for ANY non-master identity. The REST
4668
+ # aggregate endpoint enforces no ACL, so a session-token agent's
4669
+ # REST aggregate would run unscoped — only master-key agents
4670
+ # (which opted out of ACL) keep the REST path.
4671
+ use_direct = agent.respond_to?(:requires_mongo_direct?) &&
4672
+ agent.requires_mongo_direct? &&
4568
4673
  defined?(Parse::MongoDB) && Parse::MongoDB.enabled?
4569
4674
 
4570
4675
  rows = nil
@@ -5361,6 +5466,37 @@ module Parse
5361
5466
  end
5362
5467
  module_function :project_object_to_allowlist
5363
5468
 
5469
+ # Stamp each row hash with an SDK-added `_source` provenance
5470
+ # citation `{ "class", "tool", "object_id" }`. No-op unless
5471
+ # `Parse::Agent.include_source_provenance?`. MUST be called AFTER
5472
+ # field-allowlist projection and hidden-class redaction: `_source`
5473
+ # is SDK metadata, not a Parse field, so stamping last keeps it out
5474
+ # of (and safe from) those gates. Idempotent — a row already
5475
+ # carrying `_source` is left untouched. `object_id` is nil-safe
5476
+ # (aggregation/group rows have no objectId).
5477
+ #
5478
+ # @param rows [Array<Hash>] row hashes (mutated in place).
5479
+ # @param class_name [String]
5480
+ # @param tool [Symbol, String]
5481
+ # @param id_key [String] the row key holding the objectId.
5482
+ # @return [Array<Hash>] the same rows.
5483
+ def stamp_source!(rows, class_name:, tool:, id_key: "objectId")
5484
+ return rows unless Parse::Agent.include_source_provenance?
5485
+ return rows unless rows.is_a?(Array)
5486
+ rows.each do |row|
5487
+ next unless row.is_a?(Hash)
5488
+ next if row.key?("_source") || row.key?(:_source)
5489
+ oid = row[id_key] || row[id_key.to_sym]
5490
+ row["_source"] = {
5491
+ "class" => class_name.to_s,
5492
+ "tool" => tool.to_s,
5493
+ "object_id" => oid,
5494
+ }
5495
+ end
5496
+ rows
5497
+ end
5498
+ module_function :stamp_source!
5499
+
5364
5500
  # ============================================================
5365
5501
  # ATLAS SEARCH TOOLS
5366
5502
  # ============================================================
@@ -5817,6 +5953,10 @@ module Parse
5817
5953
  rows = result.results.map do |obj|
5818
5954
  row = serialize_atlas_object(obj)
5819
5955
  row = row.select { |k, _| permitted.include?(k.to_s) } if permitted
5956
+ # Normalize to query_class's LLM-friendly form (compact pointers,
5957
+ # ISO dates, ACL stripped) instead of raw wire-form. Done before
5958
+ # the SDK-added score/highlights so those stay verbatim.
5959
+ row = ResultFormatter.simplify_object(row)
5820
5960
  row["score"] = obj.search_score if obj.respond_to?(:search_score) && obj.search_score
5821
5961
  if highlight_field && obj.respond_to?(:search_highlights) && obj.search_highlights
5822
5962
  highlights = filter_atlas_highlights(obj.search_highlights, permitted)
@@ -5824,6 +5964,7 @@ module Parse
5824
5964
  end
5825
5965
  row
5826
5966
  end
5967
+ stamp_source!(rows, class_name: class_name, tool: :atlas_text_search)
5827
5968
 
5828
5969
  {
5829
5970
  class_name: class_name,
@@ -5839,7 +5980,8 @@ module Parse
5839
5980
 
5840
5981
  rows = (result.results || []).map do |obj|
5841
5982
  row = serialize_atlas_object(obj)
5842
- permitted ? row.select { |k, _| permitted.include?(k.to_s) } : row
5983
+ row = row.select { |k, _| permitted.include?(k.to_s) } if permitted
5984
+ ResultFormatter.simplify_object(row)
5843
5985
  end
5844
5986
 
5845
5987
  {
@@ -5858,7 +6000,8 @@ module Parse
5858
6000
 
5859
6001
  rows = (result.results || []).map do |obj|
5860
6002
  row = serialize_atlas_object(obj)
5861
- permitted ? row.select { |k, _| permitted.include?(k.to_s) } : row
6003
+ row = row.select { |k, _| permitted.include?(k.to_s) } if permitted
6004
+ ResultFormatter.simplify_object(row)
5862
6005
  end
5863
6006
 
5864
6007
  {