schemagraphy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.adoc +260 -0
- data/lib/schemagraphy/attribute_resolver.rb +48 -0
- data/lib/schemagraphy/cfgyml/definition.rb +93 -0
- data/lib/schemagraphy/cfgyml/doc_builder.rb +52 -0
- data/lib/schemagraphy/cfgyml/path_reference.rb +24 -0
- data/lib/schemagraphy/cfgyml/templates/config-property.adoc.liquid +57 -0
- data/lib/schemagraphy/cfgyml/templates/config-reference.adoc.liquid +33 -0
- data/lib/schemagraphy/cfgyml/templates/sample-config.yaml.liquid +46 -0
- data/lib/schemagraphy/cfgyml/templates/sample-property.yaml.liquid +82 -0
- data/lib/schemagraphy/data_query/json_pointer.rb +42 -0
- data/lib/schemagraphy/liquid/filters.rb +22 -0
- data/lib/schemagraphy/loader.rb +59 -0
- data/lib/schemagraphy/regexp_utils.rb +235 -0
- data/lib/schemagraphy/safe_expression.rb +189 -0
- data/lib/schemagraphy/schema_utils.rb +124 -0
- data/lib/schemagraphy/sgyml.rb +59 -0
- data/lib/schemagraphy/tag_utils.rb +32 -0
- data/lib/schemagraphy/templating.rb +104 -0
- data/lib/schemagraphy/version.rb +5 -0
- data/lib/schemagraphy.rb +17 -0
- metadata +147 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
{%- assign key = include.property[0] %}
|
|
2
|
+
{%- assign val = include.property[1] %}
|
|
3
|
+
{%- assign dflt = val.dflt %}
|
|
4
|
+
{%- assign tier = include.tier | plus: 0 %}
|
|
5
|
+
{%- assign nulled = include.nulled %}
|
|
6
|
+
{%- assign indent = '' %}
|
|
7
|
+
{%- if tier > 1 %}
|
|
8
|
+
{%- for i in (2..tier) %}{% assign indent = indent | append: ' ' %}{% endfor %}
|
|
9
|
+
{%- endif %}
|
|
10
|
+
{%- if nulled %}
|
|
11
|
+
{%- assign pfx = '# ' %}
|
|
12
|
+
{%- else %}
|
|
13
|
+
{%- assign pfx = '' %}
|
|
14
|
+
{%- endif %}
|
|
15
|
+
{%- assign cmmt = val.cmmt | default: val.desc | default: '' | strip | split: "
|
|
16
|
+
" | first %}
|
|
17
|
+
{%- if cmmt and cmmt != "" %}
|
|
18
|
+
{%- assign cmmt = cmmt | demarkupify | truncatewords: 20 | prepend: '# ' %}
|
|
19
|
+
{%- endif %}
|
|
20
|
+
{%- assign dflt_kind = dflt | sgyml_type | split: ":" | first %}
|
|
21
|
+
{%- assign dflt_class = dflt | sgyml_type | split: ":" | last %}
|
|
22
|
+
{%- assign disp = nil %}
|
|
23
|
+
{%- if dflt contains "
|
|
24
|
+
" or val.type == "Multiline" %}
|
|
25
|
+
{%- assign disp = "multiline" %}
|
|
26
|
+
{%- elsif val.type == "String" %}
|
|
27
|
+
{%- if dflt.size > 40 %}
|
|
28
|
+
{%- assign disp = "multiline" %}
|
|
29
|
+
{%- elsif dflt contains ":" or dflt contains "{" %}
|
|
30
|
+
{%- assign disp = "quoted" %}
|
|
31
|
+
{%- else %}
|
|
32
|
+
{%- assign disp = "unquoted" %}
|
|
33
|
+
{% endif %}
|
|
34
|
+
{%- elsif val.templating or val.type == "Template" or val.type == "Liquid" or val.type == "RegExp" %}
|
|
35
|
+
{%- assign disp = "multiline" %}
|
|
36
|
+
{%- elsif val.type == "Array" or val.type == "ArrayList" or dflt_class == "ArrayList" %}
|
|
37
|
+
{%- if dflt.size < 4 and (val.type == "ArrayList" or dflt_class == "ArrayList") %}
|
|
38
|
+
{%- assign disp = "sequence-flow" %}
|
|
39
|
+
{%- else %}
|
|
40
|
+
{%- assign disp = "sequence-block" %}
|
|
41
|
+
{%- endif %}
|
|
42
|
+
{%- elsif val.properties or val.type == "Map" %}
|
|
43
|
+
{%- assign disp = "mapping-block" %}
|
|
44
|
+
{%- elsif !val.type %}
|
|
45
|
+
{%- assign disp = "unquoted" %}
|
|
46
|
+
{%- elsif val.type == "String" and dflt == "" %}
|
|
47
|
+
{%- assign disp = "blank-string" %}
|
|
48
|
+
{%- else %}
|
|
49
|
+
{%- assign disp = "unquoted" %}
|
|
50
|
+
{%- endif %}
|
|
51
|
+
{%- case disp %}
|
|
52
|
+
{%- when "multiline" %}
|
|
53
|
+
{{ pfx }}{{ indent }}{{ key }}: | {{ cmmt }}
|
|
54
|
+
{%- assign ml_indent = indent | append: ' ' %}
|
|
55
|
+
{%- if nulled %}
|
|
56
|
+
{%- assign dflt_lines = dflt | split: "
|
|
57
|
+
" -%}
|
|
58
|
+
{%- for line in dflt_lines %}
|
|
59
|
+
{{ ml_indent }}# {{ line }}
|
|
60
|
+
{%- endfor %}
|
|
61
|
+
{%- else %}
|
|
62
|
+
{%- assign ml_indent_size = indent.size | plus: 2 %}
|
|
63
|
+
{{ dflt | indent: ml_indent_size, true }}
|
|
64
|
+
{%- endif %}
|
|
65
|
+
{%- when "mapping-block" %}
|
|
66
|
+
{{ pfx }}{{ indent }}{{ key }}: {{ cmmt }}
|
|
67
|
+
{%- when "sequence-block" %}
|
|
68
|
+
{{ pfx }}{{ indent }}{{ key }}: {{ cmmt }}
|
|
69
|
+
{%- for item in dflt %}
|
|
70
|
+
{{ pfx }}{{ indent }} - {{ item }}
|
|
71
|
+
{%- endfor %}
|
|
72
|
+
{%- when "sequence-flow" %}
|
|
73
|
+
{{ pfx }}{{ indent }}{{ key }}: [{{ dflt | join: ', ' }}] {{ cmmt }}
|
|
74
|
+
{%- when "blank-string" %}
|
|
75
|
+
{{ pfx }}{{ indent }}{{ key }}: "" {{ cmmt }}
|
|
76
|
+
{%- when "quoted" %}
|
|
77
|
+
{{ pfx }}{{ indent }}{{ key }}: "{{ dflt }}" {{ cmmt }}
|
|
78
|
+
{%- when "unquoted" %}
|
|
79
|
+
{{ pfx }}{{ indent }}{{ key }}: {{ dflt }} {{ cmmt }}
|
|
80
|
+
{%- else %}
|
|
81
|
+
{{ pfx }}{{ indent }}# UNIDENTIFIED
|
|
82
|
+
{%- endcase %}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SchemaGraphy
|
|
4
|
+
module DataQuery
|
|
5
|
+
# Resolves JSON Pointer queries against a Hash or Array.
|
|
6
|
+
module JSONPointer
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def resolve data, pointer
|
|
10
|
+
return data if pointer.nil? || pointer == ''
|
|
11
|
+
raise ArgumentError, "Invalid JSON Pointer: #{pointer}" unless pointer.start_with?('/')
|
|
12
|
+
|
|
13
|
+
tokens = pointer.split('/')[1..]
|
|
14
|
+
tokens.reduce(data) do |current, token|
|
|
15
|
+
key = unescape(token)
|
|
16
|
+
resolve_token(current, key, pointer)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def resolve_token current, key, pointer
|
|
21
|
+
case current
|
|
22
|
+
when Array
|
|
23
|
+
index = Integer(key, 10)
|
|
24
|
+
current.fetch(index)
|
|
25
|
+
when Hash
|
|
26
|
+
return current.fetch(key) if current.key?(key)
|
|
27
|
+
return current.fetch(key.to_sym) if current.key?(key.to_sym)
|
|
28
|
+
|
|
29
|
+
raise KeyError, "JSON Pointer not found: #{pointer}"
|
|
30
|
+
else
|
|
31
|
+
raise KeyError, "JSON Pointer not found: #{pointer}"
|
|
32
|
+
end
|
|
33
|
+
rescue ArgumentError, IndexError, KeyError # rubocop:disable Lint/ShadowedException
|
|
34
|
+
raise KeyError, "JSON Pointer not found: #{pointer}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def unescape token
|
|
38
|
+
token.gsub('~1', '/').gsub('~0', '~')
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'liquid'
|
|
4
|
+
|
|
5
|
+
module SchemaGraphy
|
|
6
|
+
# Liquid filter wrappers for SchemaGraphy domain operations.
|
|
7
|
+
#
|
|
8
|
+
# These filters are registered globally when SchemaGraphy is loaded and are
|
|
9
|
+
# available in any Liquid rendering environment that has required this gem.
|
|
10
|
+
# Following the DocOps Lab convention, each gem in the ecosystem registers
|
|
11
|
+
# its own domain-specific filters here rather than delegating to AsciiSourcerer.
|
|
12
|
+
module Filters
|
|
13
|
+
# Classifies a value by its SGYML type.
|
|
14
|
+
# @return [String] A "Kind:Class" type string (e.g. "Scalar:String", "Compound:ArrayList").
|
|
15
|
+
# @see SchemaGraphy::SGYML.classify
|
|
16
|
+
def sgyml_type input
|
|
17
|
+
SchemaGraphy::SGYML.classify(input)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
Liquid::Template.register_filter(SchemaGraphy::Filters)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
require 'psych'
|
|
5
|
+
require_relative 'attribute_resolver'
|
|
6
|
+
|
|
7
|
+
module SchemaGraphy
|
|
8
|
+
# The Loader class provides methods for loading YAML files while preserving
|
|
9
|
+
# custom tags and resolving attribute references.
|
|
10
|
+
class Loader
|
|
11
|
+
# Load a YAML file and resolve AsciiDoc attribute references like `\{attribute_name}`.
|
|
12
|
+
#
|
|
13
|
+
# @param path [String] The path to the YAML file.
|
|
14
|
+
# @param attrs [Hash] The AsciiDoc attributes to use for resolution.
|
|
15
|
+
# @return [Hash] The loaded YAML data with attributes resolved.
|
|
16
|
+
def self.load_yaml_with_attributes path, attrs = {}
|
|
17
|
+
raw_data = load_yaml_with_tags(path)
|
|
18
|
+
AttributeResolver.resolve_attributes!(raw_data, attrs)
|
|
19
|
+
raw_data
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Load a YAML file, preserving any custom tags (e.g., `!foo`).
|
|
23
|
+
# Custom tags are attached to the data structure.
|
|
24
|
+
#
|
|
25
|
+
# @param path [String] The path to the YAML file.
|
|
26
|
+
# @return [Hash] The loaded YAML data with custom tags attached.
|
|
27
|
+
def self.load_yaml_with_tags path
|
|
28
|
+
return {} if File.empty?(path)
|
|
29
|
+
|
|
30
|
+
data = Psych.load_file(path, aliases: true, permitted_classes: [Date, Time])
|
|
31
|
+
ast = Psych.parse_file(path)
|
|
32
|
+
attach_tags(ast.root, data)
|
|
33
|
+
data
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Recursively attach YAML tags to the loaded data structure for template processing.
|
|
37
|
+
#
|
|
38
|
+
# @param node [Psych::Nodes::Node] The current AST node.
|
|
39
|
+
# @param data [Object] The data corresponding to the current node.
|
|
40
|
+
# @api private
|
|
41
|
+
def self.attach_tags node, data
|
|
42
|
+
return unless node.is_a?(Psych::Nodes::Mapping)
|
|
43
|
+
|
|
44
|
+
node.children.each_slice(2) do |key_node, val_node|
|
|
45
|
+
key = key_node.value
|
|
46
|
+
|
|
47
|
+
if val_node.respond_to?(:tag) && val_node.tag && data[key].is_a?(String)
|
|
48
|
+
normalized_tag = val_node.tag.sub(/^!+/, '').sub(/^.*:/, '')
|
|
49
|
+
data[key] = {
|
|
50
|
+
'value' => data[key],
|
|
51
|
+
'__tag__' => normalized_tag
|
|
52
|
+
}
|
|
53
|
+
elsif data[key].is_a?(Hash)
|
|
54
|
+
attach_tags(val_node, data[key])
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'to_regexp'
|
|
4
|
+
|
|
5
|
+
module SchemaGraphy
|
|
6
|
+
# A utility module for robustly parsing and using regular expressions.
|
|
7
|
+
# It handles various formats, including literals and plain strings,
|
|
8
|
+
# and provides helpers for extracting captured content.
|
|
9
|
+
module RegexpUtils
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
# Parse a regex pattern string using the `to_regexp` gem for robust parsing.
|
|
13
|
+
# Handles `/pattern/flags`, `%r{pattern}flags`, and plain text formats.
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# parse_pattern("/^hello.*$/im")
|
|
17
|
+
# # => { pattern: "^hello.*$", flags: "im", regexp: /^hello.*$/im, options: 6 }
|
|
18
|
+
#
|
|
19
|
+
# @example
|
|
20
|
+
# parse_pattern("hello world")
|
|
21
|
+
# # => { pattern: "hello world", flags: "", regexp: /hello world/, options: 0 }
|
|
22
|
+
#
|
|
23
|
+
# @example
|
|
24
|
+
# parse_pattern("hello world", "i")
|
|
25
|
+
# # => { pattern: "hello world", flags: "i", regexp: /hello world/i, options: 1 }
|
|
26
|
+
#
|
|
27
|
+
# @param input [String] The input string, e.g., "/pattern/flags" or "plain pattern".
|
|
28
|
+
# @param default_flags [String] Default flags to apply if none are specified (default: "").
|
|
29
|
+
# @return [Hash, nil] A hash with `:pattern`, `:flags`, `:regexp`, and `:options`, or `nil`.
|
|
30
|
+
def parse_pattern input, default_flags = ''
|
|
31
|
+
return nil if input.nil? || input.to_s.strip.empty?
|
|
32
|
+
|
|
33
|
+
input_str = input.to_s.strip
|
|
34
|
+
|
|
35
|
+
# Remove surrounding quotes that might come from YAML parsing
|
|
36
|
+
clean_input = input_str.gsub(/^["']|["']$/, '')
|
|
37
|
+
|
|
38
|
+
# Manual parsing for /pattern/flags format (common in YAML configs)
|
|
39
|
+
if clean_input =~ %r{^/(.+)/([a-z]*)$}
|
|
40
|
+
pattern_str = Regexp.last_match(1)
|
|
41
|
+
flags_str = Regexp.last_match(2)
|
|
42
|
+
options = flags_to_options(flags_str)
|
|
43
|
+
|
|
44
|
+
begin
|
|
45
|
+
regexp_obj = Regexp.new(pattern_str, options)
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
pattern: pattern_str,
|
|
49
|
+
flags: flags_str,
|
|
50
|
+
regexp: regexp_obj,
|
|
51
|
+
options: options
|
|
52
|
+
}
|
|
53
|
+
rescue RegexpError => e
|
|
54
|
+
raise RegexpError, "Invalid regex pattern '#{input}': #{e.message}"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Heuristic to detect if it's a Regexp literal
|
|
59
|
+
is_literal = clean_input.start_with?('%r{')
|
|
60
|
+
|
|
61
|
+
if is_literal
|
|
62
|
+
# Try to parse as regex literal using to_regexp
|
|
63
|
+
begin
|
|
64
|
+
regexp_obj = clean_input.to_regexp(detect: true)
|
|
65
|
+
|
|
66
|
+
# Extract pattern and flags from the compiled regexp
|
|
67
|
+
pattern_str = regexp_obj.source
|
|
68
|
+
flags_str = extract_flags_from_regexp(regexp_obj)
|
|
69
|
+
|
|
70
|
+
{
|
|
71
|
+
pattern: pattern_str,
|
|
72
|
+
flags: flags_str,
|
|
73
|
+
regexp: regexp_obj,
|
|
74
|
+
options: regexp_obj.options
|
|
75
|
+
}
|
|
76
|
+
rescue RegexpError => e
|
|
77
|
+
# Malformed literal is an error
|
|
78
|
+
raise RegexpError, "Invalid regex literal '#{input}': #{e.message}"
|
|
79
|
+
end
|
|
80
|
+
else
|
|
81
|
+
# Treat as plain pattern string with default flags
|
|
82
|
+
flags_str = default_flags.to_s
|
|
83
|
+
options = flags_to_options(flags_str)
|
|
84
|
+
|
|
85
|
+
begin
|
|
86
|
+
regexp_obj = Regexp.new(clean_input, options)
|
|
87
|
+
|
|
88
|
+
{
|
|
89
|
+
pattern: clean_input,
|
|
90
|
+
flags: flags_str,
|
|
91
|
+
regexp: regexp_obj,
|
|
92
|
+
options: options
|
|
93
|
+
}
|
|
94
|
+
rescue RegexpError => e
|
|
95
|
+
raise RegexpError, "Invalid regex pattern '#{input}': #{e.message}"
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# @note Not yet implemented.
|
|
101
|
+
# Future enhancement to parse structured pattern definitions from a Hash.
|
|
102
|
+
# @param pattern_hash [Hash] A hash with 'pattern' and 'flags' keys.
|
|
103
|
+
# @raise [NotImplementedError] Always raises this error.
|
|
104
|
+
def parse_structured_pattern pattern_hash
|
|
105
|
+
# TODO: Implement structured pattern parsing
|
|
106
|
+
# pattern_hash should have 'pattern' and 'flags' keys
|
|
107
|
+
# flags can be string or array
|
|
108
|
+
raise NotImplementedError, 'Structured pattern parsing not yet implemented'
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# @note Not yet implemented.
|
|
112
|
+
# Future enhancement to parse custom YAML tags for regular expressions.
|
|
113
|
+
# @param tagged_input [String] The input string with a YAML tag.
|
|
114
|
+
# @param tag_type [Symbol] The type of tag, e.g., `:literal` or `:pattern`.
|
|
115
|
+
# @raise [NotImplementedError] Always raises this error.
|
|
116
|
+
def parse_tagged_pattern tagged_input, tag_type
|
|
117
|
+
# TODO: Implement custom YAML tag parsing
|
|
118
|
+
# tag_type would be :literal or :pattern
|
|
119
|
+
raise NotImplementedError, 'Tagged pattern parsing not yet implemented'
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Convert a flags string (ex: "im") to a Regexp options integer.
|
|
123
|
+
#
|
|
124
|
+
# @param flags [String] String containing regex flags.
|
|
125
|
+
# @return [Integer] Regexp options integer.
|
|
126
|
+
def flags_to_options flags
|
|
127
|
+
options = 0
|
|
128
|
+
flags = flags.to_s
|
|
129
|
+
|
|
130
|
+
options |= Regexp::IGNORECASE if flags.include?('i')
|
|
131
|
+
options |= Regexp::MULTILINE if flags.include?('m')
|
|
132
|
+
options |= Regexp::EXTENDED if flags.include?('x')
|
|
133
|
+
|
|
134
|
+
# NOTE: 'g' (global) and 'o' (once) are not standard Ruby flags
|
|
135
|
+
# encoding flags ('n', 'e', 's', 'u') are handled by to_regexp
|
|
136
|
+
|
|
137
|
+
options
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Extract a flags string from a compiled Regexp object.
|
|
141
|
+
#
|
|
142
|
+
# @param regexp [Regexp] A compiled regexp object.
|
|
143
|
+
# @return [String] String representation of the flags (e.g., "im").
|
|
144
|
+
def extract_flags_from_regexp regexp
|
|
145
|
+
flags = ''
|
|
146
|
+
flags += 'i' if regexp.options.anybits?(Regexp::IGNORECASE)
|
|
147
|
+
flags += 'm' if regexp.options.anybits?(Regexp::MULTILINE)
|
|
148
|
+
flags += 'x' if regexp.options.anybits?(Regexp::EXTENDED)
|
|
149
|
+
flags
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Create a Regexp object from a pattern string and explicit flags.
|
|
153
|
+
#
|
|
154
|
+
# @param pattern [String] The regex pattern (without delimiters).
|
|
155
|
+
# @param flags [String] The flags string (ex: "im").
|
|
156
|
+
# @return [Regexp] The compiled Regexp object.
|
|
157
|
+
def create_regexp pattern, flags = ''
|
|
158
|
+
options = flags_to_options(flags)
|
|
159
|
+
Regexp.new(pattern, options)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Extract content using named or positional capture groups.
|
|
163
|
+
#
|
|
164
|
+
# @param text [String] The text to match against.
|
|
165
|
+
# @param pattern_info [Hash] The hash result from `parse_pattern`.
|
|
166
|
+
# @param capture_name [String] The name of the capture group to extract (optional).
|
|
167
|
+
# @return [String, nil] The extracted text, or `nil` if no match is found.
|
|
168
|
+
def extract_capture text, pattern_info, capture_name = nil
|
|
169
|
+
return nil unless text && pattern_info
|
|
170
|
+
|
|
171
|
+
regexp = pattern_info[:regexp]
|
|
172
|
+
match = text.match(regexp)
|
|
173
|
+
|
|
174
|
+
return nil unless match
|
|
175
|
+
|
|
176
|
+
if capture_name && match.names.include?(capture_name.to_s)
|
|
177
|
+
# Extract named capture group
|
|
178
|
+
match[capture_name.to_s]
|
|
179
|
+
elsif match.captures.any?
|
|
180
|
+
# Extract first capture group
|
|
181
|
+
match[1]
|
|
182
|
+
else
|
|
183
|
+
# Return the entire match
|
|
184
|
+
match[0]
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Extract all named capture groups as a hash or positional captures as an array.
|
|
189
|
+
#
|
|
190
|
+
# @param text [String] The text to match against.
|
|
191
|
+
# @param pattern_info [Hash] The hash result from `parse_pattern`.
|
|
192
|
+
# @return [Hash, Array, nil] A hash of named captures, an array of positional captures, or `nil`.
|
|
193
|
+
def extract_all_captures text, pattern_info
|
|
194
|
+
return nil unless text && pattern_info
|
|
195
|
+
|
|
196
|
+
regexp = pattern_info[:regexp]
|
|
197
|
+
match = text.match(regexp)
|
|
198
|
+
|
|
199
|
+
return nil unless match
|
|
200
|
+
|
|
201
|
+
if match.names.any?
|
|
202
|
+
# Return hash of named captures
|
|
203
|
+
match.names.to_h do |name|
|
|
204
|
+
[name, match[name]]
|
|
205
|
+
end
|
|
206
|
+
else
|
|
207
|
+
# Return array of positional captures
|
|
208
|
+
match.captures
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# A convenience method that combines parsing and a single extraction.
|
|
213
|
+
#
|
|
214
|
+
# @param text [String] The text to match against.
|
|
215
|
+
# @param pattern_input [String] The pattern string (with or without /flags/).
|
|
216
|
+
# @param capture_name [String] Name of the capture group to extract (optional).
|
|
217
|
+
# @param default_flags [String] Default flags if the pattern has no flags.
|
|
218
|
+
# @return [String, nil] The extracted text, or `nil` if no match is found.
|
|
219
|
+
def parse_and_extract text, pattern_input, capture_name = nil, default_flags = ''
|
|
220
|
+
pattern_info = parse_pattern(pattern_input, default_flags)
|
|
221
|
+
extract_capture(text, pattern_info, capture_name)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# A convenience method that combines parsing and extraction of all captures.
|
|
225
|
+
#
|
|
226
|
+
# @param text [String] The text to match against.
|
|
227
|
+
# @param pattern_input [String] The pattern string (with or without /flags/).
|
|
228
|
+
# @param default_flags [String] Default flags if the pattern has no flags.
|
|
229
|
+
# @return [Hash, Array, nil] All captured content, or `nil` if no match is found.
|
|
230
|
+
def parse_and_extract_all text, pattern_input, default_flags = ''
|
|
231
|
+
pattern_info = parse_pattern(pattern_input, default_flags)
|
|
232
|
+
extract_all_captures(text, pattern_info)
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'prism'
|
|
4
|
+
require 'timeout'
|
|
5
|
+
|
|
6
|
+
module SchemaGraphy
|
|
7
|
+
# Provides a simple, deny-by-exception sandbox for mapping expressions.
|
|
8
|
+
# It validates code by walking the Abstract Syntax Tree (AST) and blocking
|
|
9
|
+
# known dangerous operations, rather than attempting to allowlist safe ones.
|
|
10
|
+
class AstGate
|
|
11
|
+
# A list of dangerous bareword methods that are blocked.
|
|
12
|
+
BLOCKED_BAREWORDS = %w[
|
|
13
|
+
eval instance_eval class_eval module_eval binding
|
|
14
|
+
require require_relative load autoload
|
|
15
|
+
system exec spawn fork backtick `
|
|
16
|
+
open ObjectSpace GC Thread Process at_exit
|
|
17
|
+
].freeze
|
|
18
|
+
|
|
19
|
+
# A list of AST node types that are explicitly disallowed.
|
|
20
|
+
DISALLOWED_NODES = %i[
|
|
21
|
+
# Definitions and meta-programming
|
|
22
|
+
def_node class_node module_node define_node alias_node undef_node
|
|
23
|
+
# Globals and constants paths
|
|
24
|
+
global_variable_read_node constant_path_node
|
|
25
|
+
# Shell and backticks
|
|
26
|
+
x_string_node interpolated_x_string_node
|
|
27
|
+
].freeze
|
|
28
|
+
|
|
29
|
+
# A list of constants that are considered dangerous and are blocked.
|
|
30
|
+
DANGEROUS_CONSTANTS = %w[
|
|
31
|
+
Kernel Object Module Class File FileUtils IO Dir Process Open3 PTY Thread
|
|
32
|
+
SystemSignal Signal Gem Net HTTP TCPSocket UDPSocket Socket ObjectSpace GC
|
|
33
|
+
].freeze
|
|
34
|
+
|
|
35
|
+
# Validates the given code by parsing it and walking the AST.
|
|
36
|
+
#
|
|
37
|
+
# @param code [String] The Ruby code to validate.
|
|
38
|
+
# @param context_keys [Array<Symbol>] A list of keys available in the execution context.
|
|
39
|
+
# @raise [SyntaxError] if the code has syntax errors.
|
|
40
|
+
# @raise [SecurityError] if the code contains disallowed operations.
|
|
41
|
+
def self.validate! code, context_keys: []
|
|
42
|
+
result = Prism.parse(code)
|
|
43
|
+
raise SyntaxError, result.errors.map(&:message).join(', ') if result.errors.any?
|
|
44
|
+
|
|
45
|
+
walk(result.value, context_keys: context_keys)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @api private
|
|
49
|
+
# Recursively walks the AST, checking for disallowed nodes and operations.
|
|
50
|
+
#
|
|
51
|
+
# @param node [Prism::Node] The current AST node.
|
|
52
|
+
# @param context_keys [Array<Symbol>] A list of keys available in the execution context.
|
|
53
|
+
# @raise [SecurityError] if a disallowed operation is found.
|
|
54
|
+
def self.walk node, context_keys: []
|
|
55
|
+
return unless node.is_a?(Prism::Node)
|
|
56
|
+
|
|
57
|
+
type = node.type
|
|
58
|
+
raise SecurityError, "node not allowed: #{type}" if DISALLOWED_NODES.include?(type)
|
|
59
|
+
|
|
60
|
+
case node
|
|
61
|
+
when Prism::CallNode
|
|
62
|
+
# Block dangerous barewords (system, eval, etc.)
|
|
63
|
+
if node.receiver.nil? && BLOCKED_BAREWORDS.include?(node.name.to_s)
|
|
64
|
+
raise SecurityError, "method not allowed: #{node.name}"
|
|
65
|
+
end
|
|
66
|
+
# Block dangerous constants and constant paths
|
|
67
|
+
if node.receiver.is_a?(Prism::ConstantReadNode) && DANGEROUS_CONSTANTS.include?(node.receiver.name.to_s)
|
|
68
|
+
raise SecurityError, "unsafe constant: #{node.receiver.name}"
|
|
69
|
+
end
|
|
70
|
+
raise SecurityError, 'unsafe constant path' if node.receiver.is_a?(Prism::ConstantPathNode)
|
|
71
|
+
|
|
72
|
+
when Prism::ConstantReadNode
|
|
73
|
+
# Allow only core Ruby constants defined in SafeTransform
|
|
74
|
+
const_name = node.name.to_s
|
|
75
|
+
unless SafeTransform::CORE_CONSTANTS.key?(const_name.to_sym)
|
|
76
|
+
raise SecurityError, "constant not allowed: #{const_name}"
|
|
77
|
+
end
|
|
78
|
+
when Prism::ConstantPathNode, Prism::GlobalVariableReadNode
|
|
79
|
+
raise SecurityError, 'constant paths and global variables are not allowed'
|
|
80
|
+
when Prism::DefNode, Prism::ClassNode, Prism::ModuleNode
|
|
81
|
+
raise SecurityError, 'method, class, and module definitions are not allowed'
|
|
82
|
+
when Prism::BackReferenceReadNode, Prism::XStringNode, Prism::InterpolatedXStringNode
|
|
83
|
+
raise SecurityError, 'shell commands and backticks are not allowed'
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
node.child_nodes.each { |child| walk(child, context_keys: context_keys) if child }
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Provides a sandboxed environment for executing Ruby code.
|
|
91
|
+
# Inherits from `BasicObject` for a minimal namespace and uses `instance_eval`
|
|
92
|
+
# to run code within its own context. All code is validated by {AstGate} before execution.
|
|
93
|
+
class SafeTransform < BasicObject
|
|
94
|
+
# A minimal set of core Ruby constants exposed to the sandboxed environment.
|
|
95
|
+
CORE_CONSTANTS = {
|
|
96
|
+
Array: ::Array,
|
|
97
|
+
Hash: ::Hash,
|
|
98
|
+
String: ::String,
|
|
99
|
+
Integer: ::Integer,
|
|
100
|
+
Float: ::Float,
|
|
101
|
+
TrueClass: ::TrueClass,
|
|
102
|
+
FalseClass: ::FalseClass,
|
|
103
|
+
NilClass: ::NilClass,
|
|
104
|
+
Symbol: ::Symbol,
|
|
105
|
+
Numeric: ::Numeric,
|
|
106
|
+
Regexp: ::Regexp
|
|
107
|
+
}.freeze
|
|
108
|
+
|
|
109
|
+
CORE_CONSTANTS.each do |name, ref|
|
|
110
|
+
const_set(name, ref) unless const_defined?(name, false)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# @param context [Hash] A hash of data to be made available in the sandbox.
|
|
114
|
+
def initialize context = {}
|
|
115
|
+
@context = context
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Executes the given code within the sandboxed environment.
|
|
119
|
+
#
|
|
120
|
+
# @param code [String] The Ruby code to execute.
|
|
121
|
+
# @return [Object] The result of the executed code.
|
|
122
|
+
# @raise [Timeout::Error] if the execution time exceeds the limit.
|
|
123
|
+
# @raise [SecurityError] if the code contains disallowed operations.
|
|
124
|
+
def transform code
|
|
125
|
+
::Timeout.timeout(0.25) do
|
|
126
|
+
AstGate.validate!(code, context_keys: @context.keys)
|
|
127
|
+
instance_eval(code)
|
|
128
|
+
end
|
|
129
|
+
rescue ::Timeout::Error
|
|
130
|
+
::Kernel.raise ::StandardError, 'transform timed out'
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Adds a key-value pair to the execution context.
|
|
134
|
+
#
|
|
135
|
+
# @param key [String, Symbol] The key to add.
|
|
136
|
+
# @param value [Object] The value to associate with the key.
|
|
137
|
+
def add_context key, value
|
|
138
|
+
@context[key.to_s] = value
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Safely traverses a nested object using a dot-separated path.
|
|
142
|
+
#
|
|
143
|
+
# @param obj [Object] The object to traverse.
|
|
144
|
+
# @param path [String] The dot-separated path (e.g., "a.b.c").
|
|
145
|
+
# @return [Object, nil] The value at the specified path, or `nil`.
|
|
146
|
+
def dig_path obj, path
|
|
147
|
+
keys = path.to_s.split('.')
|
|
148
|
+
keys.reduce(obj) { |memo, key| memo.respond_to?(:[]) ? memo[key] : nil }
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def to_s
|
|
152
|
+
'#<SchemaGraphy::SafeTransform>'
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
# Handles access to variables in the context.
|
|
158
|
+
def method_missing(name, *args, &block)
|
|
159
|
+
key = name.to_s
|
|
160
|
+
if @context.key?(key) && args.empty? && block.nil?
|
|
161
|
+
@context[key]
|
|
162
|
+
else
|
|
163
|
+
::Kernel.raise ::NoMethodError, "undefined method `#{name}` for #{self}"
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def respond_to_missing? name, include_private = false
|
|
168
|
+
@context.key?(name.to_s) || super
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Disable methods that could be used to break out of the sandbox.
|
|
172
|
+
|
|
173
|
+
def instance_exec(*_args)
|
|
174
|
+
::Kernel.raise ::NoMethodError, 'disabled'
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def method(*_args)
|
|
178
|
+
::Kernel.raise ::NoMethodError, 'disabled'
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def singleton_class(*_args)
|
|
182
|
+
::Kernel.raise ::NoMethodError, 'disabled'
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def define_singleton_method(*_args)
|
|
186
|
+
::Kernel.raise ::NoMethodError, 'disabled'
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|