parse-stack-next 5.1.1 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.env.sample +12 -0
  3. data/.env.test +4 -4
  4. data/CHANGELOG.md +545 -0
  5. data/Gemfile +3 -0
  6. data/Gemfile.lock +6 -1
  7. data/README.md +167 -38
  8. data/Rakefile +56 -10
  9. data/docs/atlas_vector_search_guide.md +110 -9
  10. data/docs/mcp_guide.md +433 -0
  11. data/docs/mongodb_direct_guide.md +66 -1
  12. data/docs/mongodb_index_optimization_guide.md +22 -1
  13. data/docs/usage_guide.md +15 -0
  14. data/lib/parse/agent/approval_gate.rb +0 -0
  15. data/lib/parse/agent/constraint_translator.rb +90 -19
  16. data/lib/parse/agent/describe.rb +1 -0
  17. data/lib/parse/agent/errors.rb +16 -0
  18. data/lib/parse/agent/mcp_client.rb +9 -0
  19. data/lib/parse/agent/mcp_dispatcher.rb +139 -7
  20. data/lib/parse/agent/mcp_rack_app.rb +621 -17
  21. data/lib/parse/agent/mcp_subscriptions.rb +607 -0
  22. data/lib/parse/agent/metadata_dsl.rb +58 -0
  23. data/lib/parse/agent/metadata_registry.rb +141 -1
  24. data/lib/parse/agent/prompt_hardening.rb +213 -0
  25. data/lib/parse/agent/result_formatter.rb +18 -3
  26. data/lib/parse/agent/tools.rb +167 -24
  27. data/lib/parse/agent.rb +692 -21
  28. data/lib/parse/client/request.rb +55 -4
  29. data/lib/parse/client/response.rb +4 -0
  30. data/lib/parse/client.rb +205 -7
  31. data/lib/parse/model/classes/installation.rb +27 -10
  32. data/lib/parse/model/classes/user.rb +8 -0
  33. data/lib/parse/model/core/actions.rb +58 -4
  34. data/lib/parse/model/core/embed_managed.rb +19 -14
  35. data/lib/parse/model/core/indexing.rb +108 -16
  36. data/lib/parse/model/core/querying.rb +29 -0
  37. data/lib/parse/model/model.rb +34 -3
  38. data/lib/parse/model/object.rb +1 -0
  39. data/lib/parse/query.rb +90 -24
  40. data/lib/parse/retrieval/agent_tool.rb +369 -0
  41. data/lib/parse/retrieval/chunk.rb +74 -0
  42. data/lib/parse/retrieval/chunker.rb +208 -0
  43. data/lib/parse/retrieval/retriever.rb +274 -0
  44. data/lib/parse/retrieval.rb +10 -0
  45. data/lib/parse/schema.rb +69 -20
  46. data/lib/parse/stack/version.rb +2 -2
  47. data/parse-stack-next.gemspec +1 -1
  48. data/scripts/docker/docker-compose.atlas.yml +14 -10
  49. data/scripts/docker/docker-compose.test.yml +24 -20
  50. data/scripts/docker/mongo-init.js +3 -3
  51. data/scripts/start-parse.sh +10 -0
  52. data/scripts/start_mcp_server.rb +1 -1
  53. data/scripts/test_server_connection.rb +1 -1
  54. data/scripts/vector_prototype/create_vector_index.js +1 -1
  55. data/scripts/vector_prototype/fetch_embeddings.py +2 -2
  56. data/scripts/vector_prototype/query_prototype.rb +1 -1
  57. data/scripts/vector_prototype/run.sh +4 -4
  58. metadata +10 -2
@@ -44,6 +44,15 @@ module Parse
44
44
  @tenant_scope_bypasses = {}
45
45
  @tenant_scope_bypass_mutex = Mutex.new
46
46
 
47
+ # Thread-safe storage for `agent_searchable` opt-ins.
48
+ # Maps parse_class_name => { field: Symbol, filter_fields: Array<Symbol> }
49
+ @searchable_classes = {}
50
+ @searchable_mutex = Mutex.new
51
+
52
+ # Once-per-class memo for the agent-visible-but-unscoped lint warning
53
+ # (guarded by @tenant_scope_mutex). Maps parse_class_name => true.
54
+ @tenant_scope_lint_warned = {}
55
+
47
56
  # Register a class as visible to agents.
48
57
  # @param klass [Class] the model class
49
58
  def register_visible_class(klass)
@@ -581,6 +590,89 @@ module Parse
581
590
  end
582
591
  end
583
592
 
593
+ # ============================================================
594
+ # Searchable Registry (semantic_search opt-in)
595
+ # ============================================================
596
+
597
+ # Register a class as searchable via the `semantic_search` tool.
598
+ #
599
+ # @param class_name [String] the Parse class name
600
+ # @param field [Symbol] the :vector property to search
601
+ # @param filter_fields [Array<Symbol>] fields the agent may filter on
602
+ def register_searchable(class_name, field:, filter_fields: [])
603
+ @searchable_mutex.synchronize do
604
+ @searchable_classes[class_name.to_s] = {
605
+ field: field.to_sym,
606
+ filter_fields: Array(filter_fields).map(&:to_sym),
607
+ }
608
+ end
609
+ end
610
+
611
+ # @param class_name [String]
612
+ # @return [Hash, nil] { field:, filter_fields: } or nil if not opted in.
613
+ def searchable_rule(class_name)
614
+ @searchable_mutex.synchronize { @searchable_classes[class_name.to_s] }
615
+ end
616
+
617
+ # @param class_name [String]
618
+ # @return [Symbol, nil] the searchable vector field, or nil.
619
+ def searchable_field(class_name)
620
+ searchable_rule(class_name)&.fetch(:field, nil)
621
+ end
622
+
623
+ # @param class_name [String]
624
+ # @return [Array<Symbol>] the declared filter-field allowlist.
625
+ def searchable_filter_fields(class_name)
626
+ searchable_rule(class_name)&.fetch(:filter_fields, []) || []
627
+ end
628
+
629
+ # @return [Boolean] true if any class declares agent_tenant_scope.
630
+ def any_tenant_scope?
631
+ @tenant_scope_mutex.synchronize { !@tenant_scope_rules.empty? }
632
+ end
633
+
634
+ # Resolve a class name to its model class for `semantic_search`,
635
+ # enforcing the three opt-in / safety gates. Called at dispatch
636
+ # time (all classes loaded), which is why the tenant-scope cross-
637
+ # check is order-independent.
638
+ #
639
+ # @param class_name [String]
640
+ # @return [Class] the resolved Parse::Object subclass.
641
+ # @raise [Parse::Agent::ValidationError] when the class did not opt
642
+ # in via `agent_searchable` (a caller/LLM mistake).
643
+ # @raise [Parse::Agent::AccessDenied] when the class is
644
+ # `agent_hidden` (kind: :hidden_class).
645
+ # @raise [Parse::Agent::MissingTenantScope] when a tenant-aware
646
+ # deployment has a searchable class without its own tenant scope.
647
+ def resolve_searchable!(class_name)
648
+ name = class_name.to_s
649
+ rule = searchable_rule(name)
650
+ if rule.nil?
651
+ raise Parse::Agent::ValidationError,
652
+ "Class '#{name}' is not registered for semantic search. " \
653
+ "Declare `agent_searchable field: :<vector_field>` on the model."
654
+ end
655
+ if hidden?(name)
656
+ raise Parse::Agent::AccessDenied.new(
657
+ name, "Class '#{name}' is not accessible to this agent.",
658
+ kind: :hidden_class,
659
+ )
660
+ end
661
+ if any_tenant_scope? && tenant_scope_rule(name).nil?
662
+ raise Parse::Agent::MissingTenantScope,
663
+ "Class '#{name}' is searchable but declares no agent_tenant_scope " \
664
+ "while other classes do. Refusing to expose an un-scoped searchable " \
665
+ "surface in a tenant-aware deployment; add agent_tenant_scope to '#{name}'."
666
+ end
667
+ klass = find_model_class(name)
668
+ unless klass.is_a?(Class) && klass.respond_to?(:find_similar)
669
+ raise Parse::Agent::ValidationError,
670
+ "Class '#{name}' is registered searchable but no Parse::Object model " \
671
+ "with a :vector property could be resolved."
672
+ end
673
+ klass
674
+ end
675
+
584
676
  # Return the tenant scope rule for a class name, or nil if none declared.
585
677
  #
586
678
  # @param class_name [String] the Parse class name
@@ -625,7 +717,23 @@ module Parse
625
717
  # @raise [Parse::Agent::AccessDenied]
626
718
  def resolve_tenant_scope(class_name, agent)
627
719
  rule = tenant_scope_rule(class_name)
628
- return nil unless rule
720
+ unless rule
721
+ # Lint: in a tenant-aware deployment, an agent-visible class with no
722
+ # agent_tenant_scope is the silent cross-tenant case (resolve_searchable!
723
+ # raises for the search path, but the general query path passes through
724
+ # for back-compat). Warn once per class so it isn't discovered only by
725
+ # leaked rows; do not raise — a genuinely global class is legitimate.
726
+ #
727
+ # Gated to classes EXPLICITLY opted into the agent surface (via
728
+ # `agent_fields` → visible, or `agent_searchable`). resolve_tenant_scope
729
+ # runs for every class a tool touches, so without this gate the warning
730
+ # also fires for _User / _Role / _Session and incidental tables — noise
731
+ # that trains operators to ignore the signal.
732
+ if any_tenant_scope? && agent_visible_for_lint?(class_name)
733
+ warn_unscoped_agent_class!(class_name)
734
+ end
735
+ return nil
736
+ end
629
737
 
630
738
  return nil if tenant_scope_bypassed?(class_name, agent)
631
739
 
@@ -640,6 +748,38 @@ module Parse
640
748
  { field: rule[:field], value: value }
641
749
  end
642
750
 
751
+ # @!visibility private
752
+ # Whether a class is EXPLICITLY exposed to agents — declared `agent_fields`
753
+ # (registered visible) or `agent_searchable`. Used to scope the unscoped-
754
+ # class lint so it doesn't fire for system/incidental classes the agent
755
+ # merely happens to touch.
756
+ def agent_visible_for_lint?(class_name)
757
+ name = class_name.to_s
758
+ visible_class_names.include?(name) || !searchable_rule(name).nil?
759
+ end
760
+
761
+ # @!visibility private
762
+ # Emit a one-time (per class, per process) warning that an agent-visible
763
+ # class is unscoped in a tenant-aware deployment. See {resolve_tenant_scope}.
764
+ def warn_unscoped_agent_class!(class_name)
765
+ name = class_name.to_s
766
+ emit = @tenant_scope_mutex.synchronize do
767
+ next false if @tenant_scope_lint_warned[name]
768
+ @tenant_scope_lint_warned[name] = true
769
+ end
770
+ return unless emit
771
+ warn "[Parse::Agent:SECURITY] class '#{name}' is agent-visible but declares no " \
772
+ "agent_tenant_scope while other classes do — queries against it are NOT " \
773
+ "tenant-scoped and may return cross-tenant rows. Add agent_tenant_scope to " \
774
+ "'#{name}', or confirm it is intentionally global."
775
+ end
776
+
777
+ # @!visibility private
778
+ # Test-only: re-arm the per-class unscoped-class lint warnings.
779
+ def reset_tenant_scope_lint!
780
+ @tenant_scope_mutex.synchronize { @tenant_scope_lint_warned.clear }
781
+ end
782
+
643
783
  private
644
784
 
645
785
  # Find the Ruby model class for a Parse class name.
@@ -0,0 +1,213 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ module Parse
5
+ class Agent
6
+ # Sanitization primitives for prompt-injection hardening (NEW-PROMPT-6).
7
+ # A single home for the transforms applied to data that flows toward an
8
+ # LLM: schema descriptions surfaced by the schema tools, untrusted tool
9
+ # result content, and canary scanning of tool results.
10
+ #
11
+ # All functions are pure (module_function via `extend self`) and have no
12
+ # dependency on a live client.
13
+ module PromptHardening
14
+ extend self
15
+
16
+ # Identifier shape for LLM-surfaced field names: ASCII letter/underscore
17
+ # start, then up to 127 more identifier chars. NOT the secret-field
18
+ # boundary — it permits a leading underscore; `_rperm`/`_hashed_password`
19
+ # are stopped by field_allowlist / validate_keys!, untouched here. This
20
+ # only drops non-identifier names (spaces, punctuation, >128 chars,
21
+ # leading digit) that could carry injection payloads in a field name.
22
+ # The length is an injection-safety cap, not a Parse limit — it is set
23
+ # well above any realistic field name so valid identifiers aren't
24
+ # silently dropped from the schema surfaced to the LLM.
25
+ FIELD_NAME_RE = /\A[a-zA-Z_][a-zA-Z0-9_]{0,127}\z/
26
+
27
+ # Max characters retained from any LLM-surfaced description.
28
+ DESCRIPTION_CAP = 200
29
+
30
+ SCHEMA_DESC_OPEN = "<schema_description>"
31
+ SCHEMA_DESC_CLOSE = "</schema_description>"
32
+
33
+ # C0 (0x00-0x1F except \t\n) + DEL + C1 (0x7F-0x9F) + zero-width
34
+ # (200B-200D, 2060, FEFF). Stripped from descriptions so invisible
35
+ # control/format characters can't smuggle instructions past a human
36
+ # reviewer or confuse the model.
37
+ CONTROL_CHARS_RE = /[\u0000-\u0008\u000B-\u001F\u007F-\u009F\u200B-\u200D\u2060\uFEFF]/
38
+
39
+ # Sub-part 1 — sanitize an enriched schema hash before it is
40
+ # serialized toward the LLM. Returns a sanitized deep copy (input is
41
+ # not mutated). Drops fields whose names fail {FIELD_NAME_RE} (with a
42
+ # `[Parse::Agent:PROMPT]` warning), and scrubs + caps + marker-wraps
43
+ # every description / usage string (class-level, per-field, and enum
44
+ # value descriptions).
45
+ #
46
+ # @param schema [Hash]
47
+ # @return [Hash]
48
+ def sanitize_schema_for_llm(schema)
49
+ return schema unless schema.is_a?(Hash)
50
+ out = deep_dup(schema)
51
+ class_name = out["className"] || out[:className]
52
+
53
+ %w[description usage].each do |k|
54
+ out[k] = sanitize_description(out[k]) if out[k].is_a?(String)
55
+ end
56
+
57
+ fields = out["fields"] || out[:fields]
58
+ if fields.is_a?(Hash)
59
+ fields.keys.each do |fname|
60
+ unless valid_field_name?(fname)
61
+ fields.delete(fname)
62
+ warn "[Parse::Agent:PROMPT] dropped field #{fname.inspect} on " \
63
+ "#{class_name.inspect}: invalid identifier"
64
+ next
65
+ end
66
+ cfg = fields[fname]
67
+ next unless cfg.is_a?(Hash)
68
+ %w[description usage].each do |k|
69
+ cfg[k] = sanitize_description(cfg[k]) if cfg[k].is_a?(String)
70
+ end
71
+ allowed = cfg["allowed_values"] || cfg[:allowed_values]
72
+ if allowed.is_a?(Array)
73
+ allowed.each do |v|
74
+ next unless v.is_a?(Hash)
75
+ v["description"] = sanitize_description(v["description"]) if v["description"].is_a?(String)
76
+ v[:description] = sanitize_description(v[:description]) if v[:description].is_a?(String)
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ # agent_methods entries are surfaced to the LLM by format_schema exactly
83
+ # like field descriptions, and their :description / per-parameter
84
+ # description strings come from the same developer-authored DSL — so they
85
+ # get the same marker-neutralization / control-char strip / length cap.
86
+ # (format_methods emits symbol-keyed hashes; tolerate both forms.)
87
+ methods = out["agent_methods"] || out[:agent_methods]
88
+ if methods.is_a?(Array)
89
+ methods.each do |m|
90
+ next unless m.is_a?(Hash)
91
+ m["description"] = sanitize_description(m["description"]) if m["description"].is_a?(String)
92
+ m[:description] = sanitize_description(m[:description]) if m[:description].is_a?(String)
93
+ sanitize_nested_descriptions!(m["parameters"] || m[:parameters])
94
+ end
95
+ end
96
+
97
+ out
98
+ end
99
+
100
+ # @return [Boolean] whether `name` is a safe LLM-surfaceable identifier.
101
+ def valid_field_name?(name)
102
+ FIELD_NAME_RE.match?(name.to_s)
103
+ end
104
+
105
+ # Scrub control chars, cap length, and wrap a description in
106
+ # <schema_description> markers. Markers in the RAW text are neutralized
107
+ # FIRST (so a stored `</schema_description>` can't close the wrapper).
108
+ #
109
+ # @param str [String]
110
+ # @return [String]
111
+ def sanitize_description(str)
112
+ return str unless str.is_a?(String)
113
+ cleaned = scrub_marker_injection(str)
114
+ cleaned = cleaned.gsub(CONTROL_CHARS_RE, "")
115
+ cleaned = cleaned[0, DESCRIPTION_CAP] if cleaned.length > DESCRIPTION_CAP
116
+ "#{SCHEMA_DESC_OPEN}#{cleaned}#{SCHEMA_DESC_CLOSE}"
117
+ end
118
+
119
+ # Sub-part 2 — neutralize wrapper/marker strings embedded in untrusted
120
+ # content so a stored value cannot impersonate or close the
121
+ # tool-result wrapper. Idempotent: the escaped form no longer contains
122
+ # the original literal, so re-application is a no-op (content is
123
+ # re-serialized into history every turn).
124
+ #
125
+ # When `Parse::Agent.prompt_marker_strict` is true, raises instead of
126
+ # escaping (fail-closed for high-assurance deployments).
127
+ #
128
+ # @param content [String, #to_s]
129
+ # @return [String]
130
+ def scrub_marker_injection(content)
131
+ s = content.to_s
132
+ strict = Parse::Agent.prompt_marker_strict
133
+ injection_markers.each do |marker|
134
+ next unless s.include?(marker)
135
+ if strict
136
+ raise Parse::Agent::SecurityError,
137
+ "prompt_marker_strict: untrusted content contains a reserved marker"
138
+ end
139
+ s = s.gsub(marker, escape_marker(marker))
140
+ end
141
+ s
142
+ end
143
+
144
+ # Sub-part 3 — scan text for any operator-registered canary phrase.
145
+ # @param text [String]
146
+ # @return [String, nil] the matched phrase/pattern source, or nil.
147
+ def scan_for_canaries(text)
148
+ canaries = Parse::Agent.prompt_injection_canaries
149
+ return nil if canaries.nil? || canaries.empty?
150
+ s = text.to_s
151
+ return nil if s.empty?
152
+ down = s.downcase
153
+ canaries.each do |c|
154
+ case c
155
+ when Regexp
156
+ return c.source if c.match?(s)
157
+ else
158
+ phrase = c.to_s
159
+ return phrase if !phrase.empty? && down.include?(phrase.downcase)
160
+ end
161
+ end
162
+ nil
163
+ end
164
+
165
+ private
166
+
167
+ # Recursively run every `description` string nested in an agent method's
168
+ # JSON-Schema `parameters` through {#sanitize_description}, so per-parameter
169
+ # descriptions get the same hardening as field descriptions. Mutates in
170
+ # place; tolerates string- and symbol-keyed hashes and arbitrary nesting.
171
+ def sanitize_nested_descriptions!(node)
172
+ case node
173
+ when Hash
174
+ node.each do |k, v|
175
+ if (k == "description" || k == :description) && v.is_a?(String)
176
+ node[k] = sanitize_description(v)
177
+ else
178
+ sanitize_nested_descriptions!(v)
179
+ end
180
+ end
181
+ when Array
182
+ node.each { |e| sanitize_nested_descriptions!(e) }
183
+ end
184
+ end
185
+
186
+ # The literal strings scrub_marker_injection neutralizes. The MCP
187
+ # wrapper marker is resolved lazily to avoid a load-order dependency.
188
+ def injection_markers
189
+ markers = [SCHEMA_DESC_OPEN, SCHEMA_DESC_CLOSE]
190
+ if defined?(Parse::Agent::MCPClient::UNTRUSTED_TOOL_RESULT_MARKER)
191
+ markers << Parse::Agent::MCPClient::UNTRUSTED_TOOL_RESULT_MARKER
192
+ end
193
+ markers
194
+ end
195
+
196
+ # Insert a backslash after the first character so the original literal
197
+ # no longer occurs (keeps the text human-readable and idempotent).
198
+ def escape_marker(marker)
199
+ return marker if marker.length < 2
200
+ "#{marker[0]}\\#{marker[1..]}"
201
+ end
202
+
203
+ def deep_dup(obj)
204
+ case obj
205
+ when Hash then obj.each_with_object({}) { |(k, v), h| h[k] = deep_dup(v) }
206
+ when Array then obj.map { |e| deep_dup(e) }
207
+ when String then obj.dup
208
+ else obj
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end
@@ -15,6 +15,15 @@ module Parse
15
15
  # Maximum number of results to include in output
16
16
  MAX_RESULTS_DISPLAY = 50
17
17
 
18
+ # Keys stripped from every simplified data object before it reaches
19
+ # the LLM. The raw `ACL` map (per-role / per-user read/write bits) is
20
+ # operationally useless to a model reasoning over row data — the
21
+ # agent's effective read/write authority is enforced server-side
22
+ # regardless of what ACL a row carries — so surfacing it is pure
23
+ # token overhead plus a minor disclosure of role/user identifiers.
24
+ # Applied recursively (nested included records too).
25
+ DROPPED_OBJECT_KEYS = %w[ACL].freeze
26
+
18
27
  # Parse field type mappings for human-readable output
19
28
  TYPE_NAMES = {
20
29
  "String" => "string",
@@ -382,14 +391,20 @@ module Parse
382
391
  "Pointer to #{target}. Equality: #{equality}. $in/$nin: #{in_shape}."
383
392
  end
384
393
 
385
- # Simplify an object for display (resolve __type fields)
394
+ # Simplify an object for display (resolve __type fields). Strips the
395
+ # raw ACL map (see {DROPPED_OBJECT_KEYS}). Public so the query/get/
396
+ # atlas tool envelopes can route their rows through the same
397
+ # normalization query_class already uses.
386
398
  def simplify_object(obj)
387
399
  return obj unless obj.is_a?(Hash)
388
400
 
389
- obj.transform_values do |value|
390
- simplify_value(value)
401
+ obj.each_with_object({}) do |(key, value), acc|
402
+ next if DROPPED_OBJECT_KEYS.include?(key.to_s)
403
+
404
+ acc[key] = simplify_value(value)
391
405
  end
392
406
  end
407
+ public :simplify_object
393
408
 
394
409
  # Simplify a single value
395
410
  def simplify_value(value)