ruby-rego 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. checksums.yaml +7 -0
  2. data/.reek.yml +80 -0
  3. data/.vscode/extensions.json +19 -0
  4. data/.vscode/launch.json +35 -0
  5. data/.vscode/settings.json +25 -0
  6. data/.vscode/tasks.json +117 -0
  7. data/.yardopts +12 -0
  8. data/ARCHITECTURE.md +39 -0
  9. data/CHANGELOG.md +25 -0
  10. data/CODE_OF_CONDUCT.md +10 -0
  11. data/LICENSE.txt +21 -0
  12. data/README.md +183 -0
  13. data/RELEASING.md +37 -0
  14. data/Rakefile +38 -0
  15. data/SECURITY.md +26 -0
  16. data/Steepfile +10 -0
  17. data/TODO.md +35 -0
  18. data/benchmark/builtin_calls.rb +29 -0
  19. data/benchmark/complex_policy.rb +19 -0
  20. data/benchmark/comprehensions.rb +19 -0
  21. data/benchmark/simple_rules.rb +20 -0
  22. data/examples/README.md +27 -0
  23. data/examples/sample_config.yaml +2 -0
  24. data/examples/simple_policy.rego +7 -0
  25. data/examples/validation_policy.rego +11 -0
  26. data/exe/rego-validate +6 -0
  27. data/lib/ruby/rego/ast/base.rb +95 -0
  28. data/lib/ruby/rego/ast/binary_op.rb +64 -0
  29. data/lib/ruby/rego/ast/call.rb +27 -0
  30. data/lib/ruby/rego/ast/composite.rb +48 -0
  31. data/lib/ruby/rego/ast/comprehension.rb +63 -0
  32. data/lib/ruby/rego/ast/every.rb +37 -0
  33. data/lib/ruby/rego/ast/import.rb +32 -0
  34. data/lib/ruby/rego/ast/literal.rb +70 -0
  35. data/lib/ruby/rego/ast/module.rb +32 -0
  36. data/lib/ruby/rego/ast/package.rb +22 -0
  37. data/lib/ruby/rego/ast/query.rb +63 -0
  38. data/lib/ruby/rego/ast/reference.rb +58 -0
  39. data/lib/ruby/rego/ast/rule.rb +114 -0
  40. data/lib/ruby/rego/ast/unary_op.rb +42 -0
  41. data/lib/ruby/rego/ast/variable.rb +22 -0
  42. data/lib/ruby/rego/ast.rb +17 -0
  43. data/lib/ruby/rego/builtins/aggregates.rb +124 -0
  44. data/lib/ruby/rego/builtins/base.rb +95 -0
  45. data/lib/ruby/rego/builtins/collections/array_ops.rb +103 -0
  46. data/lib/ruby/rego/builtins/collections/object_ops.rb +120 -0
  47. data/lib/ruby/rego/builtins/collections/set_ops.rb +51 -0
  48. data/lib/ruby/rego/builtins/collections.rb +137 -0
  49. data/lib/ruby/rego/builtins/comparisons/casts.rb +139 -0
  50. data/lib/ruby/rego/builtins/comparisons.rb +84 -0
  51. data/lib/ruby/rego/builtins/numeric_helpers.rb +56 -0
  52. data/lib/ruby/rego/builtins/registry.rb +199 -0
  53. data/lib/ruby/rego/builtins/registry_helpers.rb +27 -0
  54. data/lib/ruby/rego/builtins/strings/case_ops.rb +22 -0
  55. data/lib/ruby/rego/builtins/strings/concat.rb +19 -0
  56. data/lib/ruby/rego/builtins/strings/formatting.rb +35 -0
  57. data/lib/ruby/rego/builtins/strings/helpers.rb +62 -0
  58. data/lib/ruby/rego/builtins/strings/number_helpers.rb +48 -0
  59. data/lib/ruby/rego/builtins/strings/search.rb +63 -0
  60. data/lib/ruby/rego/builtins/strings/split.rb +19 -0
  61. data/lib/ruby/rego/builtins/strings/substring.rb +22 -0
  62. data/lib/ruby/rego/builtins/strings/trim.rb +42 -0
  63. data/lib/ruby/rego/builtins/strings/trim_helpers.rb +62 -0
  64. data/lib/ruby/rego/builtins/strings.rb +58 -0
  65. data/lib/ruby/rego/builtins/types.rb +89 -0
  66. data/lib/ruby/rego/call_name.rb +55 -0
  67. data/lib/ruby/rego/cli.rb +1122 -0
  68. data/lib/ruby/rego/compiled_module.rb +114 -0
  69. data/lib/ruby/rego/compiler.rb +1097 -0
  70. data/lib/ruby/rego/environment/overrides.rb +33 -0
  71. data/lib/ruby/rego/environment/reference_resolution.rb +86 -0
  72. data/lib/ruby/rego/environment.rb +230 -0
  73. data/lib/ruby/rego/environment_pool.rb +71 -0
  74. data/lib/ruby/rego/error_handling.rb +58 -0
  75. data/lib/ruby/rego/error_payload.rb +34 -0
  76. data/lib/ruby/rego/errors.rb +196 -0
  77. data/lib/ruby/rego/evaluator/assignment_support.rb +126 -0
  78. data/lib/ruby/rego/evaluator/binding_helpers.rb +60 -0
  79. data/lib/ruby/rego/evaluator/comprehension_evaluator.rb +182 -0
  80. data/lib/ruby/rego/evaluator/expression_dispatch.rb +45 -0
  81. data/lib/ruby/rego/evaluator/expression_evaluator.rb +492 -0
  82. data/lib/ruby/rego/evaluator/object_literal_evaluator.rb +52 -0
  83. data/lib/ruby/rego/evaluator/operator_evaluator.rb +163 -0
  84. data/lib/ruby/rego/evaluator/query_node_builder.rb +38 -0
  85. data/lib/ruby/rego/evaluator/reference_key_resolver.rb +50 -0
  86. data/lib/ruby/rego/evaluator/reference_resolver.rb +352 -0
  87. data/lib/ruby/rego/evaluator/rule_evaluator/bindings.rb +70 -0
  88. data/lib/ruby/rego/evaluator/rule_evaluator.rb +550 -0
  89. data/lib/ruby/rego/evaluator/rule_value_provider.rb +56 -0
  90. data/lib/ruby/rego/evaluator/variable_collector.rb +221 -0
  91. data/lib/ruby/rego/evaluator.rb +174 -0
  92. data/lib/ruby/rego/lexer/number_reader.rb +68 -0
  93. data/lib/ruby/rego/lexer/stream.rb +137 -0
  94. data/lib/ruby/rego/lexer/string_reader.rb +90 -0
  95. data/lib/ruby/rego/lexer/template_string_reader.rb +62 -0
  96. data/lib/ruby/rego/lexer.rb +206 -0
  97. data/lib/ruby/rego/location.rb +73 -0
  98. data/lib/ruby/rego/memoization.rb +67 -0
  99. data/lib/ruby/rego/parser/collections.rb +173 -0
  100. data/lib/ruby/rego/parser/expressions.rb +216 -0
  101. data/lib/ruby/rego/parser/precedence.rb +42 -0
  102. data/lib/ruby/rego/parser/query.rb +139 -0
  103. data/lib/ruby/rego/parser/references.rb +115 -0
  104. data/lib/ruby/rego/parser/rules.rb +310 -0
  105. data/lib/ruby/rego/parser.rb +210 -0
  106. data/lib/ruby/rego/policy.rb +50 -0
  107. data/lib/ruby/rego/result.rb +91 -0
  108. data/lib/ruby/rego/token.rb +206 -0
  109. data/lib/ruby/rego/unifier.rb +451 -0
  110. data/lib/ruby/rego/value.rb +379 -0
  111. data/lib/ruby/rego/version.rb +7 -0
  112. data/lib/ruby/rego/with_modifiers/with_modifier.rb +37 -0
  113. data/lib/ruby/rego/with_modifiers/with_modifier_applier.rb +48 -0
  114. data/lib/ruby/rego/with_modifiers/with_modifier_builtin_override.rb +128 -0
  115. data/lib/ruby/rego/with_modifiers/with_modifier_context.rb +120 -0
  116. data/lib/ruby/rego/with_modifiers/with_modifier_path_key_resolver.rb +42 -0
  117. data/lib/ruby/rego/with_modifiers/with_modifier_path_override.rb +99 -0
  118. data/lib/ruby/rego/with_modifiers/with_modifier_root_scope.rb +58 -0
  119. data/lib/ruby/rego.rb +72 -0
  120. data/sig/objspace.rbs +4 -0
  121. data/sig/psych.rbs +7 -0
  122. data/sig/rego_validate.rbs +382 -0
  123. data/sig/ruby/rego.rbs +2150 -0
  124. metadata +172 -0
@@ -0,0 +1,221 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ruby
4
+ module Rego
5
+ class Evaluator
6
+ # Shared traversal helpers for variable collection.
7
+ module VariableCollectorHelpers
8
+ NODE_COLLECTORS = {
9
+ AST::Variable => ->(node, collector) { collector.send(:add_name, node.name) },
10
+ AST::SomeDecl => ->(node, collector) { collector.send(:collect_some_decl, node) },
11
+ AST::Every => ->(node, collector) { collector.send(:collect_every, node) },
12
+ AST::QueryLiteral => ->(node, collector) { collector.send(:collect_node, node.expression) },
13
+ AST::ArrayComprehension => lambda do |node, collector|
14
+ collector.send(:collect_comprehension, [node.term], node.body)
15
+ end,
16
+ AST::SetComprehension => lambda do |node, collector|
17
+ collector.send(:collect_comprehension, [node.term], node.body)
18
+ end,
19
+ AST::ObjectComprehension => lambda do |node, collector|
20
+ key_node, value_node = node.term
21
+ collector.send(:collect_comprehension, [key_node, value_node], node.body)
22
+ end
23
+ }.freeze
24
+
25
+ CHILDREN_EXTRACTORS = {
26
+ AST::Reference => ->(node) { [node.base] + node.path.map(&:value) },
27
+ AST::BinaryOp => ->(node) { [node.left, node.right] },
28
+ AST::UnaryOp => ->(node) { [node.operand] },
29
+ AST::ArrayLiteral => :elements.to_proc,
30
+ AST::SetLiteral => :elements.to_proc,
31
+ AST::ObjectLiteral => lambda do |node|
32
+ node.pairs.flat_map { |key_node, value_node| [key_node, value_node] }
33
+ end,
34
+ AST::ArrayComprehension => ->(node) { [node.term] },
35
+ AST::SetComprehension => ->(node) { [node.term] },
36
+ AST::ObjectComprehension => lambda do |node|
37
+ key_node, value_node = node.term
38
+ [key_node, value_node]
39
+ end,
40
+ AST::Call => ->(node) { node.args.dup }
41
+ }.freeze
42
+
43
+ def self.collector_for(node)
44
+ NODE_COLLECTORS[node.class]
45
+ end
46
+
47
+ def self.children_for(node)
48
+ extractor = CHILDREN_EXTRACTORS[node.class]
49
+ return [] unless extractor
50
+
51
+ extractor.call(node)
52
+ end
53
+
54
+ def self.comprehension_node?(node)
55
+ node.is_a?(AST::ArrayComprehension) || node.is_a?(AST::SetComprehension) || node.is_a?(AST::ObjectComprehension)
56
+ end
57
+ end
58
+
59
+ # Collects variable names that become bound inside query bodies.
60
+ class BoundVariableCollector
61
+ def initialize
62
+ @explicit_names = [] # @type var @explicit_names: Array[String]
63
+ @unify_names = [] # @type var @unify_names: Array[String]
64
+ end
65
+
66
+ # @param literals [Array<Object>]
67
+ # @return [Array<String>]
68
+ def collect(literals)
69
+ collect_details(literals)[:all]
70
+ end
71
+
72
+ # @param literals [Array<Object>]
73
+ # @return [Hash<Symbol, Array<String>>]
74
+ # :reek:TooManyStatements
75
+ def collect_details(literals)
76
+ reset
77
+ Array(literals).each { |literal| collect_from_literal(literal) }
78
+ explicit = explicit_names.uniq
79
+ unification = unify_names.uniq
80
+ { explicit: explicit, unification: unification, all: (explicit + unification).uniq }
81
+ end
82
+
83
+ private
84
+
85
+ attr_reader :explicit_names, :unify_names
86
+
87
+ def reset
88
+ explicit_names.clear
89
+ unify_names.clear
90
+ end
91
+
92
+ def collect_from_literal(literal)
93
+ case literal
94
+ in AST::SomeDecl[variables:]
95
+ variables.each { |variable| explicit_names << variable.name }
96
+ in AST::QueryLiteral[expression:]
97
+ collect_from_expression(expression)
98
+ else
99
+ nil
100
+ end
101
+ end
102
+
103
+ # :reek:FeatureEnvy
104
+ # :reek:TooManyStatements
105
+ def collect_from_expression(expression)
106
+ return unless expression.is_a?(AST::BinaryOp)
107
+
108
+ operator = expression.operator
109
+ left = expression.left
110
+ collect_explicit_variables(left) if operator == :assign
111
+ return unless operator == :unify
112
+
113
+ collect_unification_variables(left)
114
+ collect_unification_variables(expression.right)
115
+ end
116
+
117
+ def collect_explicit_variables(node)
118
+ collect_all_variables(node, explicit_names)
119
+ end
120
+
121
+ def collect_unification_variables(node)
122
+ collect_all_variables(node, unify_names)
123
+ end
124
+
125
+ # :reek:FeatureEnvy
126
+ def collect_all_variables(node, target)
127
+ return unless node
128
+ return target << node.name if node.is_a?(AST::Variable)
129
+ return if VariableCollectorHelpers.comprehension_node?(node)
130
+
131
+ VariableCollectorHelpers.children_for(node).each do |child|
132
+ collect_all_variables(child, target)
133
+ end
134
+ end
135
+ end
136
+
137
+ # Collects variable names referenced in expressions and query literals.
138
+ # :reek:TooManyMethods
139
+ class VariableCollector
140
+ def initialize
141
+ @names = [] # @type var @names: Array[String]
142
+ @local_scopes = [] # @type var @local_scopes: Array[Array[String]]
143
+ end
144
+
145
+ # @param node [Object]
146
+ # @return [Array<String>]
147
+ def collect(node)
148
+ collect_node(node)
149
+ names
150
+ end
151
+
152
+ # @param literals [Array<Object>]
153
+ # @return [Array<String>]
154
+ def collect_literals(literals)
155
+ Array(literals).each { |literal| collect_node(literal) }
156
+ names
157
+ end
158
+
159
+ private
160
+
161
+ attr_reader :names
162
+
163
+ def collect_node(node)
164
+ return unless node
165
+
166
+ handler = VariableCollectorHelpers.collector_for(node)
167
+ return handler.call(node, self) if handler
168
+
169
+ VariableCollectorHelpers.children_for(node).each { |child| collect_node(child) }
170
+ end
171
+
172
+ def collect_comprehension(term_nodes, body_literals)
173
+ locals = BoundVariableCollector.new.collect(body_literals)
174
+ with_locals(locals) { collect_comprehension_body(term_nodes, body_literals) }
175
+ end
176
+
177
+ def collect_comprehension_body(term_nodes, body_literals)
178
+ term_nodes.each { |term_node| collect_node(term_node) }
179
+ Array(body_literals).each { |literal| collect_node(literal) }
180
+ end
181
+
182
+ def collect_some_decl(node)
183
+ node.variables.each { |variable| add_name(variable.name) }
184
+ collection = node.collection
185
+ collect_node(collection) if collection
186
+ end
187
+
188
+ # :reek:TooManyStatements
189
+ def collect_every(node)
190
+ collect_node(node.domain)
191
+ body = Array(node.body)
192
+ locals = BoundVariableCollector.new.collect(body)
193
+ locals.concat(every_variable_names(node))
194
+ with_locals(locals.uniq) { body.each { |literal| collect_node(literal) } }
195
+ end
196
+
197
+ # :reek:UtilityFunction
198
+ def every_variable_names(node)
199
+ [node.key_var, node.value_var].compact.map(&:name)
200
+ end
201
+
202
+ def add_name(name)
203
+ return if local_name?(name)
204
+
205
+ names << name
206
+ end
207
+
208
+ def with_locals(names)
209
+ @local_scopes << names
210
+ yield
211
+ ensure
212
+ @local_scopes.pop
213
+ end
214
+
215
+ def local_name?(name)
216
+ @local_scopes.any? { |scope| scope.include?(name) }
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ast"
4
+ require_relative "environment"
5
+ require_relative "errors"
6
+ require_relative "result"
7
+ require_relative "value"
8
+ require_relative "unifier"
9
+ require_relative "compiled_module"
10
+ require_relative "compiler"
11
+ require_relative "evaluator/operator_evaluator"
12
+ require_relative "evaluator/assignment_support"
13
+ require_relative "evaluator/binding_helpers"
14
+ require_relative "evaluator/expression_dispatch"
15
+ require_relative "evaluator/object_literal_evaluator"
16
+ require_relative "evaluator/comprehension_evaluator"
17
+ require_relative "evaluator/rule_value_provider"
18
+ require_relative "evaluator/reference_resolver"
19
+ require_relative "evaluator/reference_key_resolver"
20
+ require_relative "evaluator/expression_evaluator"
21
+ require_relative "evaluator/variable_collector"
22
+ require_relative "evaluator/rule_evaluator"
23
+ require_relative "evaluator/query_node_builder"
24
+ require_relative "with_modifiers/with_modifier"
25
+ require_relative "with_modifiers/with_modifier_applier"
26
+
27
+ module Ruby
28
+ module Rego
29
+ # Evaluates compiled Rego modules against input and data.
30
+ class Evaluator
31
+ # Builds an evaluator with a preconfigured environment.
32
+ #
33
+ # @param compiled_module [#rules_by_name, #package_path] compiled module
34
+ # @param environment [Environment] preconfigured environment
35
+ # @return [Evaluator]
36
+ def self.from_environment(compiled_module, environment)
37
+ evaluator = allocate
38
+ evaluator.send(:initialize_with_environment, compiled_module, environment)
39
+ evaluator
40
+ end
41
+
42
+ # Build an evaluator directly from an AST module.
43
+ #
44
+ # @param ast_module [AST::Module] parsed module
45
+ # @param options [Hash] evaluator options (input, data, compiler)
46
+ # @return [Evaluator] evaluator instance
47
+ def self.from_ast(ast_module, options = {})
48
+ default_input = {} # @type var default_input: Hash[untyped, untyped]
49
+ default_data = {} # @type var default_data: Hash[untyped, untyped]
50
+ options = { input: default_input, data: default_data, compiler: Compiler.new }.merge(options)
51
+ new(options[:compiler].compile(ast_module), input: options[:input], data: options[:data])
52
+ end
53
+
54
+ # Create an evaluator for a compiled module.
55
+ #
56
+ # @param compiled_module [#rules_by_name, #package_path] compiled module
57
+ # @param input [Object] input document
58
+ # @param data [Object] data document
59
+ def initialize(compiled_module, input: {}, data: {})
60
+ @compiled_module = compiled_module
61
+ rules_by_name = compiled_module.rules_by_name
62
+ package_path = compiled_module.package_path
63
+ @environment = Environment.new(input: input, data: data, rules: rules_by_name)
64
+ @expression_evaluator, @rule_evaluator = build_evaluators(rules_by_name, package_path)
65
+ end
66
+
67
+ # The compiled module being evaluated.
68
+ #
69
+ # @return [#rules_by_name, #package_path]
70
+ attr_reader :compiled_module
71
+
72
+ # The environment used to evaluate expressions and rules.
73
+ #
74
+ # @return [Environment]
75
+ attr_reader :environment
76
+
77
+ # Evaluate either a query path or all rules.
78
+ #
79
+ # @param query [Object, nil] query path (e.g. "data.package.rule")
80
+ # @return [Result] evaluation result
81
+ def evaluate(query = nil)
82
+ environment.memoization.reset!
83
+ value, bindings = query ? evaluate_query(query) : [evaluate_rules, nil]
84
+ ResultBuilder.new(value, bindings).build
85
+ end
86
+
87
+ private
88
+
89
+ attr_reader :expression_evaluator, :rule_evaluator
90
+
91
+ def build_evaluators(rules_by_name, package_path)
92
+ rule_value_provider = RuleValueProvider.new(
93
+ rules_by_name: rules_by_name,
94
+ memoization: environment.memoization
95
+ )
96
+ expression_evaluator = build_expression_evaluator(rule_value_provider, package_path)
97
+ rule_evaluator = build_rule_evaluator(expression_evaluator, rule_value_provider)
98
+ expression_evaluator.attach_query_evaluator(rule_evaluator)
99
+ [expression_evaluator, rule_evaluator]
100
+ end
101
+
102
+ def build_expression_evaluator(rule_value_provider, package_path)
103
+ ExpressionEvaluator.new(
104
+ environment: @environment,
105
+ reference_resolver: ReferenceResolver.new(
106
+ environment: @environment,
107
+ package_path: package_path,
108
+ rule_value_provider: rule_value_provider,
109
+ imports: compiled_module.imports,
110
+ memoization: environment.memoization
111
+ )
112
+ )
113
+ end
114
+
115
+ def build_rule_evaluator(expression_evaluator, rule_value_provider)
116
+ RuleEvaluator.new(
117
+ environment: @environment,
118
+ expression_evaluator: expression_evaluator
119
+ ).tap { |evaluator| rule_value_provider.attach(evaluator) }
120
+ end
121
+
122
+ def evaluate_rules
123
+ results = {} # @type var results: Hash[String, Value]
124
+ environment.rules.each do |name, rules|
125
+ results[name] = rule_evaluator.evaluate_group(rules)
126
+ end
127
+ results
128
+ end
129
+
130
+ def evaluate_query(query)
131
+ node = QueryNodeBuilder.new(query).build
132
+ bindings = bindings_for_query(node)
133
+ value = expression_evaluator.evaluate(node)
134
+ [value, bindings]
135
+ end
136
+
137
+ def bindings_for_query(node)
138
+ expression_evaluator.eval_with_unification(node, environment).first || {}
139
+ end
140
+
141
+ def eval_node(node)
142
+ expression_evaluator.evaluate(node)
143
+ end
144
+
145
+ def initialize_with_environment(compiled_module, environment)
146
+ @compiled_module = compiled_module
147
+ rules_by_name = compiled_module.rules_by_name
148
+ package_path = compiled_module.package_path
149
+ @environment = environment
150
+ @expression_evaluator, @rule_evaluator = build_evaluators(rules_by_name, package_path)
151
+ end
152
+ private :initialize_with_environment
153
+ end
154
+
155
+ # Builds result objects from evaluation outputs.
156
+ class ResultBuilder
157
+ def initialize(value, bindings)
158
+ @value = value
159
+ @bindings = bindings
160
+ end
161
+
162
+ def build
163
+ success = !value.is_a?(UndefinedValue)
164
+ return Result.new(value: value, success: success) unless bindings
165
+
166
+ Result.new(value: value, success: success, bindings: bindings)
167
+ end
168
+
169
+ private
170
+
171
+ attr_reader :bindings, :value
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ruby
4
+ module Rego
5
+ # Lexer helpers for numeric literals.
6
+ class Lexer
7
+ private
8
+
9
+ def read_number
10
+ start = capture_position
11
+ buffer = read_number_prefix
12
+ buffer << read_fractional_part
13
+ buffer << read_exponent_part
14
+ build_token(TokenType::NUMBER, parse_number(buffer, start), start)
15
+ end
16
+
17
+ def read_number_prefix
18
+ buffer = +""
19
+ buffer << advance
20
+ raise_error("Invalid number literal", capture_position, length: 1) if buffer == "0" && digit?(current_char)
21
+ buffer << read_digits
22
+ buffer
23
+ end
24
+
25
+ def read_fractional_part
26
+ return "" unless current_char == "."
27
+
28
+ raise_error("Invalid number literal", capture_position, length: 1) unless digit?(peek(1))
29
+ buffer = +""
30
+ buffer << advance
31
+ buffer << read_digits
32
+ buffer
33
+ end
34
+
35
+ def read_exponent_part
36
+ return "" unless exponent_start?
37
+
38
+ buffer = +""
39
+ buffer << advance
40
+ buffer << read_exponent_sign
41
+ raise_error("Invalid number exponent", capture_position, length: 1) unless digit?(current_char)
42
+ buffer << read_digits
43
+ buffer
44
+ end
45
+
46
+ def read_exponent_sign
47
+ sign = current_char
48
+ return "" unless sign && SIGN_CHARS.include?(sign)
49
+
50
+ advance
51
+ end
52
+
53
+ def read_digits
54
+ digits = +""
55
+ digits << advance while digit?(current_char)
56
+ digits
57
+ end
58
+
59
+ def parse_number(buffer, start)
60
+ return Float(buffer) if buffer.include?(".") || buffer.match?(/[eE]/)
61
+
62
+ Integer(buffer, 10)
63
+ rescue ArgumentError
64
+ raise_error("Invalid number literal", start, length: buffer.length)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ruby
4
+ module Rego
5
+ # Stream helpers for lexer traversal and errors.
6
+ # :reek:InstanceVariableAssumption
7
+ class Lexer
8
+ private
9
+
10
+ def advance
11
+ char = current_char
12
+ raise_unexpected_eof if char.nil?
13
+
14
+ return advance_line_break if char == "\r"
15
+ return advance_newline if char == "\n"
16
+
17
+ increment_position(1)
18
+ char
19
+ end
20
+
21
+ def advance_line_break
22
+ increment_line(peek == "\n" ? 2 : 1)
23
+ "\n"
24
+ end
25
+
26
+ def advance_newline
27
+ increment_line(1)
28
+ "\n"
29
+ end
30
+
31
+ def increment_line(count)
32
+ @position += count
33
+ @offset += count
34
+ @line += 1
35
+ @column = 1
36
+ end
37
+
38
+ def increment_position(count)
39
+ @position += count
40
+ @offset += count
41
+ @column += count
42
+ end
43
+
44
+ def peek(distance = 1)
45
+ source[@position + distance]
46
+ end
47
+
48
+ def match?(expected)
49
+ return false unless current_char == expected
50
+
51
+ advance
52
+ true
53
+ end
54
+
55
+ def current_char
56
+ source[@position]
57
+ end
58
+
59
+ def eof?
60
+ @position >= source.length
61
+ end
62
+
63
+ def capture_position
64
+ { line: line, column: column, offset: offset }
65
+ end
66
+
67
+ def build_token(type, value, start)
68
+ start_offset = start.fetch(:offset) || 0
69
+ length = offset - start_offset
70
+ location = Location.new(
71
+ line: start.fetch(:line),
72
+ column: start.fetch(:column),
73
+ offset: start_offset,
74
+ length: length
75
+ )
76
+ Token.new(type: type, value: value, location: location)
77
+ end
78
+
79
+ def identifier_start?(char)
80
+ !!(char && char.match?(IDENTIFIER_START))
81
+ end
82
+
83
+ def identifier_part?(char)
84
+ !!(char && char.match?(IDENTIFIER_PART))
85
+ end
86
+
87
+ def digit?(char)
88
+ !!(char && char.match?(DIGIT))
89
+ end
90
+
91
+ def hex_digit?(char)
92
+ !!(char && char.match?(HEX_DIGIT))
93
+ end
94
+
95
+ def whitespace?(char)
96
+ return false if char.nil?
97
+
98
+ WHITESPACE_CHARS.include?(char)
99
+ end
100
+
101
+ def newline?(char)
102
+ return false if char.nil?
103
+
104
+ NEWLINE_CHARS.include?(char)
105
+ end
106
+
107
+ def exponent_start?
108
+ char = current_char
109
+ !!(char && EXPONENT_CHARS.include?(char))
110
+ end
111
+
112
+ def span_length_from(start)
113
+ start_offset = start.fetch(:offset) || 0
114
+ offset - start_offset
115
+ end
116
+
117
+ def raise_unterminated_string(start)
118
+ raise_error("Unterminated string literal", start, length: span_length_from(start))
119
+ end
120
+
121
+ def raise_unterminated_raw_string(start)
122
+ raise_error("Unterminated raw string literal", start, length: span_length_from(start))
123
+ end
124
+
125
+ def raise_unexpected_eof
126
+ raise_error("Unexpected end of input", capture_position, length: 0)
127
+ end
128
+
129
+ def raise_error(message, position, length: nil)
130
+ line_value = position.fetch(:line)
131
+ column_value = position.fetch(:column)
132
+ offset_value = position.fetch(:offset) || 0
133
+ raise LexerError.new(message, line: line_value, column: column_value, offset: offset_value, length: length)
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ruby
4
+ module Rego
5
+ # Lexer helpers for string literals.
6
+ class Lexer
7
+ TEMPLATE_ESCAPE = "\u0000"
8
+
9
+ private
10
+
11
+ # rubocop:disable Metrics/MethodLength
12
+ def read_string
13
+ start = capture_position
14
+ advance
15
+ buffer = +""
16
+ until eof?
17
+ char_position = capture_position
18
+ char = advance
19
+ return build_token(TokenType::STRING, buffer, start) if char == "\""
20
+
21
+ break if char == "\n"
22
+
23
+ buffer << (char == "\\" ? read_escape_sequence(char_position) : char)
24
+ end
25
+ raise_unterminated_string(start)
26
+ end
27
+
28
+ def read_raw_string
29
+ start = capture_position
30
+ advance
31
+ buffer = +""
32
+ until eof?
33
+ char = advance
34
+ return build_token(TokenType::RAW_STRING, buffer, start) if char == "`"
35
+
36
+ if char == "\\" && current_char == "{"
37
+ advance
38
+ buffer << "{"
39
+ else
40
+ buffer << char
41
+ end
42
+ end
43
+ raise_unterminated_raw_string(start)
44
+ end
45
+ # rubocop:enable Metrics/MethodLength
46
+
47
+ # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
48
+ def read_escape_sequence(backslash_position)
49
+ char = current_char
50
+ raise_unterminated_string(backslash_position) if char.nil? || char == "\n"
51
+
52
+ advance
53
+
54
+ case char
55
+ when "\"", "\\", "/", "{"
56
+ char
57
+ when "b"
58
+ "\b"
59
+ when "f"
60
+ "\f"
61
+ when "n"
62
+ "\n"
63
+ when "r"
64
+ "\r"
65
+ when "t"
66
+ "\t"
67
+ when "u"
68
+ read_unicode_escape
69
+ else
70
+ raise_error("Invalid escape sequence: \\#{char}", backslash_position, length: 2)
71
+ end
72
+ end
73
+ # rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity
74
+
75
+ def read_unicode_escape
76
+ hex = +""
77
+
78
+ 4.times do
79
+ char = current_char.to_s
80
+ raise_error("Invalid unicode escape sequence", capture_position, length: 1) unless hex_digit?(char)
81
+
82
+ advance
83
+ hex << char
84
+ end
85
+
86
+ [hex.to_i(16)].pack("U")
87
+ end
88
+ end
89
+ end
90
+ end