natsuzora 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +55 -0
  4. data/CHANGELOG.md +62 -0
  5. data/Rakefile +75 -0
  6. data/lib/natsuzora/ast.rb +94 -0
  7. data/lib/natsuzora/context.rb +96 -0
  8. data/lib/natsuzora/contract/ast/any.rb +20 -0
  9. data/lib/natsuzora/contract/ast/list.rb +28 -0
  10. data/lib/natsuzora/contract/ast/node.rb +16 -0
  11. data/lib/natsuzora/contract/ast/record.rb +33 -0
  12. data/lib/natsuzora/contract/ast/ref.rb +27 -0
  13. data/lib/natsuzora/contract/ast/scalar.rb +60 -0
  14. data/lib/natsuzora/contract/ast.rb +38 -0
  15. data/lib/natsuzora/contract/compiled_lexer.rb +15 -0
  16. data/lib/natsuzora/contract/diff_marker.rb +15 -0
  17. data/lib/natsuzora/contract/document.rb +45 -0
  18. data/lib/natsuzora/contract/field.rb +62 -0
  19. data/lib/natsuzora/contract/parse_error.rb +16 -0
  20. data/lib/natsuzora/contract/parser.rb +362 -0
  21. data/lib/natsuzora/contract/scalar_type.rb +17 -0
  22. data/lib/natsuzora/contract/type_def.rb +39 -0
  23. data/lib/natsuzora/contract/type_ref_resolver.rb +56 -0
  24. data/lib/natsuzora/contract/validation_target.rb +13 -0
  25. data/lib/natsuzora/contract/validator.rb +179 -0
  26. data/lib/natsuzora/contract.rb +23 -0
  27. data/lib/natsuzora/data/lexers/contract.lkt1 +1 -0
  28. data/lib/natsuzora/data/lexers/template.lkt1 +1 -0
  29. data/lib/natsuzora/data_normalizable.rb +31 -0
  30. data/lib/natsuzora/errors.rb +37 -0
  31. data/lib/natsuzora/html_escape.rb +21 -0
  32. data/lib/natsuzora/lexer/compiled_lexer.rb +15 -0
  33. data/lib/natsuzora/lexer/token_processor.rb +156 -0
  34. data/lib/natsuzora/lexer.rb +95 -0
  35. data/lib/natsuzora/lexer_loader.rb +15 -0
  36. data/lib/natsuzora/lexers/contract.rb +24 -0
  37. data/lib/natsuzora/lexers/template.rb +31 -0
  38. data/lib/natsuzora/parser.rb +419 -0
  39. data/lib/natsuzora/payload.rb +35 -0
  40. data/lib/natsuzora/renderer.rb +132 -0
  41. data/lib/natsuzora/template.rb +34 -0
  42. data/lib/natsuzora/template_loader.rb +118 -0
  43. data/lib/natsuzora/token.rb +20 -0
  44. data/lib/natsuzora/validator.rb +73 -0
  45. data/lib/natsuzora/value.rb +73 -0
  46. data/lib/natsuzora/version.rb +5 -0
  47. data/lib/natsuzora.rb +30 -0
  48. metadata +105 -0
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require_relative 'type_ref_resolver'
5
+ require_relative 'validation_target'
6
+ require_relative 'ast/any'
7
+ require_relative 'ast/scalar'
8
+ require_relative 'ast/record'
9
+ require_relative 'ast/list'
10
+ require_relative 'ast/ref'
11
+
12
+ module Natsuzora
13
+ module Contract
14
+ # Error that can occur during validation.
15
+ class ValidationError < StandardError
16
+ attr_reader :path, :error_message
17
+
18
+ def initialize(message, path = '$')
19
+ @path = path
20
+ @error_message = message
21
+ super("#{message} at #{path}")
22
+ end
23
+ end
24
+
25
+ # Validator for JSON data against contracts.
26
+ class Validator
27
+ MAX_VALIDATE_DEPTH = 64
28
+
29
+ # Validate JSON data against a contract.
30
+ def self.validate(contract, data)
31
+ new.validate_node(contract, data, [])
32
+ end
33
+
34
+ # Validate JSON data against a contract file with diff markers.
35
+ def self.validate_with_target(document, data, target: ValidationTarget::CURRENT)
36
+ contract = build_contract_for_target(document, target)
37
+ validate(contract, data)
38
+ end
39
+
40
+ def validate_node(contract, data, path)
41
+ raise ValidationError.new('validation depth exceeded', render_path(path)) if path.length > MAX_VALIDATE_DEPTH
42
+
43
+ case contract
44
+ when AST::Any
45
+ nil # any value is valid
46
+ when AST::Scalar
47
+ validate_scalar(contract, data, path)
48
+ when AST::Record
49
+ validate_object(contract, data, path)
50
+ when AST::List
51
+ validate_array(contract, data, path)
52
+ when AST::Ref
53
+ raise ValidationError.new("unresolved type reference '#{contract.name}'", render_path(path))
54
+ else
55
+ raise ArgumentError, "Unknown contract type: #{contract.class}"
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def validate_scalar(contract, data, path)
62
+ return validate_null(contract, path) if data.nil?
63
+ return nil if scalar_value_valid?(contract, data, path)
64
+
65
+ raise ValidationError.new("expected #{contract.scalar_type}", render_path(path))
66
+ end
67
+
68
+ def validate_null(contract, path)
69
+ return nil if contract.nullable?
70
+
71
+ raise ValidationError.new('null is not allowed', render_path(path))
72
+ end
73
+
74
+ def scalar_value_valid?(contract, data, path)
75
+ return false unless contract.accepts?(data)
76
+
77
+ ensure_not_empty!(contract, data, path) if data.is_a?(String)
78
+ true
79
+ end
80
+
81
+ def ensure_not_empty!(contract, data, path)
82
+ return unless contract.required? && data.empty?
83
+
84
+ raise ValidationError.new('empty string is not allowed', render_path(path))
85
+ end
86
+
87
+ def validate_object(contract, data, path)
88
+ unless data.is_a?(Hash)
89
+ raise ValidationError.new('expected object', render_path(path))
90
+ end
91
+
92
+ # Check required fields
93
+ contract.required.each do |key|
94
+ unless data.key?(key) || data.key?(key.to_sym)
95
+ raise ValidationError.new('missing required property', render_path(path + [[:key, key]]))
96
+ end
97
+ end
98
+
99
+ # Validate properties
100
+ contract.properties.each do |key, child_contract|
101
+ value = if data.key?(key)
102
+ data[key]
103
+ elsif data.key?(key.to_sym)
104
+ data[key.to_sym]
105
+ end
106
+ next if value.nil? && !data.key?(key) && !data.key?(key.to_sym)
107
+
108
+ validate_node(child_contract, value, path + [[:key, key]])
109
+ end
110
+
111
+ nil
112
+ end
113
+
114
+ def validate_array(contract, data, path)
115
+ unless data.is_a?(Array)
116
+ raise ValidationError.new('expected array', render_path(path))
117
+ end
118
+
119
+ data.each_with_index do |item, idx|
120
+ validate_node(contract.items, item, path + [[:index, idx]])
121
+ end
122
+
123
+ nil
124
+ end
125
+
126
+ def render_path(path)
127
+ result = '$'
128
+ path.each do |(type, value)|
129
+ case type
130
+ when :key
131
+ result += ".#{value}"
132
+ when :index
133
+ result += "[#{value}]"
134
+ end
135
+ end
136
+ result
137
+ end
138
+
139
+ class << self
140
+ private
141
+
142
+ def build_contract_for_target(document, target)
143
+ resolver = TypeRefResolver.new(
144
+ document.types,
145
+ target: target,
146
+ on_missing: ->(name) { raise ValidationError, "undefined type '#{name}'" },
147
+ on_unavailable: ->(name) { raise ValidationError, "type '#{name}' is not available for #{target}" },
148
+ on_cyclic: ->(name) { raise ValidationError, "cyclic type reference '#{name}'" }
149
+ )
150
+
151
+ properties = {}
152
+ required = []
153
+
154
+ document.fields.each do |name, field|
155
+ contract = field.for_target(target)
156
+ next unless contract
157
+
158
+ properties[name] = resolver.resolve(contract)
159
+ required << name
160
+ end
161
+
162
+ AST::Record.new(properties, required)
163
+ end
164
+ end
165
+ end
166
+
167
+ # Validate JSON data against a contract.
168
+ def self.validate(contract, data) # rubocop:disable Naming/PredicateMethod
169
+ Validator.validate(contract, data)
170
+ true
171
+ end
172
+
173
+ # Validate JSON data against a contract file with diff markers.
174
+ def self.validate_with_target(document, data, target: ValidationTarget::CURRENT) # rubocop:disable Naming/PredicateMethod
175
+ Validator.validate_with_target(document, data, target: target)
176
+ true
177
+ end
178
+ end
179
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'contract/parse_error'
4
+ require_relative 'contract/diff_marker'
5
+ require_relative 'contract/validation_target'
6
+ require_relative 'contract/scalar_type'
7
+ require_relative 'contract/ast'
8
+ require_relative 'contract/field'
9
+ require_relative 'contract/type_def'
10
+ require_relative 'contract/type_ref_resolver'
11
+ require_relative 'contract/document'
12
+ require_relative 'contract/compiled_lexer'
13
+ require_relative 'contract/parser'
14
+ require_relative 'contract/validator'
15
+
16
+ module Natsuzora
17
+ # Contract notation (formerly the standalone `subaru` gem).
18
+ # Parses `.ntzc` files into a {Contract} tree, validates JSON data against
19
+ # contracts, and supports two-generation diff markers (`+`, `-`, `*`).
20
+ module Contract
21
+ class Error < StandardError; end
22
+ end
23
+ end
@@ -0,0 +1 @@
1
+ {"format":"lkt1","codec":"deflate+base64","kind":"program","table_version":2,"uncompressed_len":2276,"sha256":"375d869fdc099409a5c07a3eb293c87d6a8a31c886cb9d545d77a297dc1c2ef1","data":"eJzlletrE0EUxc/sw7TbByolFAnSVkVoEayPCv1gs92d6pp9pLubpA/KUm3rq76tr1LIn5b/rJ7dxBiXRv0QpNgLP+7cOTPD3rMw41bieagABCYxniShjGRYl3ZyM0lQcPy66Tr2L8ItCr317Vx9J1ffzdULufoea90Mw6AB3QrcwIcRVKWfLIemJTFiuUEkO8VoV6jIGGM/pbQcWq3JKHa4f0SuWa7pmVmhVd1aBN1zfCYtis0QBSvwPOnHGI7XqzLxTU/CcGzOOCuOpO7Lhuv4Ekojoi+Fnd297YP9j73DdtRgYIE59U/JZsRxFpmfAsPMelcDtBupcD+bEYsQhxBHEJsQWxBLENMQcxCz6RZR4qEquiHQJ5R+wn8WStYpbWt3nPfn70J048epfSNb3rMhH4PrbDDRdmfQ/vy+f/EH/05T5P0p0p2ejzx1//NfRufqOtEZgRPNOSN+9d45bFlcAGZg8RZXF4Fzh8DQEWBsAqNbwPgScH4auDgHTMwCxSvApAlcWiYWsYkkK+QBeUgc8ohUiEs84pOAVMkqCUlEYlIjddIga2SdbJBtoPSYPCE7ZJfskafkGXlOXpCXZJ+8Iq/JG/KWvCPvyQfC5610QD6Rz+QL+Uq+EQO4zGdtaoxMkY5R168C19hjNsHvn7Fpma2VUUSZ4zQ3mZvMLeYWioJreEEVBXWVuqCuUhfUVeoKdY26Ql1rpn+AucXMWk//CtfrZWau16nzPHqGCeCYfvMt5aDDd/X9jB8="}
@@ -0,0 +1 @@
1
+ {"format":"lkt1","codec":"deflate+base64","kind":"program","table_version":2,"uncompressed_len":2226,"sha256":"f670fe5c25083a9642759f0aa0cd16670997d72b94f1323c676001b403fba8d3","data":"eJzdlW1PFEkQx/89s7M7sICIZj0vJqfx8YUCPmEuhuw2M427x+ws7OwCSsgGFZ/FB/SezCX7/r4Un+C+kvevnj3pAyX4ztjkx1R11VRXV0/1Jgudq/ABKJzCWK/XNplpL5u4N93rodRIl3XSiP9nuEaDq1/fo9/Yo9/co8/s0W9RL3TMagelRdOOTNpBIdZZHUGUtDKDQt0qWSKP0Tmd3ul108xE3bbBiFUbaZR0Y4OyWY0S3dSdRitFsLDSa8xjiI9umpgsQ4miSRjRCjqqWx+dwY9bHa7WajY1ArPU1QnCpa7JbJzhlXqjY7JFHRkEjVjS81B6uPlo4/2LdxiWxHtPt7Y237rTMlwTEALePxiH92ENZboGH9b4t/4X1HmoK/BEOgs1pc7R+SgZt0HURzvs+TA+nyeJ78wXHDlw5KIjlxw5dOQhRx627wMnrLw7X3bkEUcedeQxRz7iyOM2D+CHM7KBpxL5Pf95W7KHbUl+U1Z9IG5iKNlKydyQuJTFMPJCFhPr2EMJv2njztgqMNS+Ku3KXl52/CQLq0d25k9bjTyrL7+3Kx+u0lJJ4IIkqSRxT3L2JdOC7DLYtl5/D/Ip7T/ZQ6yXn9CVA09ZTguYlIXVhnhKHt72p2RsoZ9Y18MWcITclFhqOy/NJNRlqFmoqtjVKUbx8ZVDfRqDCe+LA3mBDrR/S8OzeLZdOVSF1dlNUjas9r/0manvceS1ySsjhRgFIsyd51fLz3p0nc19FpiYAo7zvqiwa39kR53hl3uJH+D0JHDrMvDzLHC7Csyy5apsiSqjVE8TDeg5EpGYGDJP7pA6aZBfyAJJSJOkpEUWyRJpk4x0SJcskxWySu6Se+Q+YSNp3kaal4p+TNhT+hl5Tthz+iXhRaBfkdfkDeHdptlDmr8O+lfyG/md/EF4LelBkerMH8dYnRpvC/4YlCaoT1OXO1S6knPz4sM9o4J+FHu1sIIdPvthRSHie+WrYlO1KPbBOfr4tTBV9PH7YezvhHEBIesB1ieUj69Z6J9mTaK4UAvj4s5/tqGB7WJu67u24YFtKrcxZgDXXh7YZ6w9yONWfJGZp8/cAtGZW5E5+sytWAs9cL7Y55N+9D/OLc/lew8/Dvb/L/ErdH4="}
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ # Mixin that gives the including class a `normalize_data(data)` method.
5
+ module DataNormalizable
6
+ # performs only pure transformations:
7
+ # - Symbol Hash keys are converted to Strings (recursively).
8
+ # - Whole-number Float values are converted to Integers.
9
+ def normalize_data(data)
10
+ case data
11
+ when Hash
12
+ data.transform_keys(&:to_s).transform_values { |v| normalize_data(v) }
13
+ when Array
14
+ data.map { |v| normalize_data(v) }
15
+ when Float
16
+ normalize_float(data)
17
+ else
18
+ data
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ # Convert finite whole-number Float to Integer; pass through anything else.
25
+ def normalize_float(value)
26
+ return value.to_i if value.finite? && value == value.to_i
27
+
28
+ value
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ class Error < StandardError
5
+ attr_reader :line, :column
6
+
7
+ def initialize(message, line: nil, column: nil)
8
+ @line = line
9
+ @column = column
10
+ super(build_message(message))
11
+ end
12
+
13
+ private
14
+
15
+ def build_message(message)
16
+ return message unless line
17
+
18
+ column ? "#{message} at line #{line}, column #{column}" : "#{message} at line #{line}"
19
+ end
20
+ end
21
+
22
+ class LexerError < Error; end
23
+
24
+ class ParseError < Error; end
25
+
26
+ class ReservedWordError < ParseError; end
27
+
28
+ class RenderError < Error; end
29
+
30
+ class UndefinedVariableError < RenderError; end
31
+
32
+ class TypeError < RenderError; end
33
+
34
+ class IncludeError < RenderError; end
35
+
36
+ class ShadowingError < RenderError; end
37
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ module HtmlEscape
5
+ ESCAPE_MAP = {
6
+ '&' => '&amp;',
7
+ '<' => '&lt;',
8
+ '>' => '&gt;',
9
+ '"' => '&quot;',
10
+ "'" => '&#39;'
11
+ }.freeze
12
+
13
+ ESCAPE_REGEXP = /[&<>"']/
14
+
15
+ class << self
16
+ def escape(string)
17
+ string.gsub(ESCAPE_REGEXP, ESCAPE_MAP)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../lexer_loader'
4
+
5
+ module Natsuzora
6
+ class Lexer
7
+ module CompiledLexer
8
+ LEXER_PATH = File.expand_path('../data/lexers/template.lkt1', __dir__)
9
+
10
+ def self.instance
11
+ @instance ||= LexerLoader.load_compiled(LEXER_PATH)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ class Lexer
5
+ # Processes tokens to handle whitespace control and comments.
6
+ #
7
+ # Responsibilities:
8
+ # - Consume DASH tokens and apply trim rules
9
+ # - Consume comment tags entirely
10
+ # - Detect unclosed comments
11
+ class TokenProcessor
12
+ def initialize(tokens)
13
+ @tokens = tokens
14
+ @result = []
15
+ @strip_next_text = false
16
+ end
17
+
18
+ def process
19
+ idx = 0
20
+
21
+ while idx < @tokens.length
22
+ token = @tokens[idx]
23
+
24
+ if token.type == :TEXT
25
+ append_text(token)
26
+ idx += 1
27
+ else
28
+ idx = process_tag(idx)
29
+ end
30
+ end
31
+
32
+ @result
33
+ end
34
+
35
+ private
36
+
37
+ def process_tag(start_idx)
38
+ close_idx = find_close_index(start_idx)
39
+ tag_tokens = close_idx ? @tokens[start_idx..close_idx] : @tokens[start_idx..]
40
+
41
+ apply_left_trim(tag_tokens)
42
+ apply_right_trim(tag_tokens)
43
+
44
+ if comment_tag?(tag_tokens)
45
+ raise_unclosed_comment!(tag_tokens) unless close_idx
46
+ return close_idx + 1
47
+ end
48
+
49
+ emit_tag_tokens(tag_tokens)
50
+ close_idx ? close_idx + 1 : @tokens.length
51
+ end
52
+
53
+ def append_text(token)
54
+ text_value = token.value
55
+
56
+ if @strip_next_text
57
+ @strip_next_text = false
58
+ text_value = strip_leading_whitespace_if_blank_line(text_value)
59
+ end
60
+
61
+ return if text_value.empty?
62
+
63
+ @result << Token.new(:TEXT, text_value, line: token.line, column: token.column)
64
+ end
65
+
66
+ def find_close_index(start_idx)
67
+ idx = start_idx
68
+ while idx < @tokens.length
69
+ return idx if @tokens[idx].type == :CLOSE
70
+
71
+ idx += 1
72
+ end
73
+ nil
74
+ end
75
+
76
+ def apply_left_trim(tag_tokens)
77
+ strip_trailing_from_last_text_if_blank_line if tag_tokens.first&.type == :DASH
78
+ end
79
+
80
+ def apply_right_trim(tag_tokens)
81
+ close_idx = tag_tokens.index { |token| token.type == :CLOSE }
82
+ @strip_next_text = true if close_idx&.positive? && tag_tokens[close_idx - 1].type == :DASH
83
+ end
84
+
85
+ def comment_tag?(tag_tokens)
86
+ first = tag_tokens.first
87
+ return false unless first
88
+
89
+ return true if first.type == :PERCENT
90
+
91
+ first.type == :DASH && tag_tokens[1]&.type == :PERCENT
92
+ end
93
+
94
+ def emit_tag_tokens(tag_tokens)
95
+ tag_tokens.each do |token|
96
+ next if token.type == :DASH
97
+
98
+ @result << token
99
+ end
100
+ end
101
+
102
+ def strip_trailing_from_last_text_if_blank_line
103
+ last_idx = @result.rindex { |token| token.type == :TEXT }
104
+ return unless last_idx
105
+
106
+ last_text = @result[last_idx]
107
+ value = last_text.value
108
+ line_start = same_line_start_offset(value)
109
+ trailing_segment = value[line_start..] || ''
110
+ return unless horizontal_whitespace_only?(trailing_segment)
111
+
112
+ stripped = value[0...line_start]
113
+ @result[last_idx] = Token.new(:TEXT, stripped, line: last_text.line, column: last_text.column)
114
+ end
115
+
116
+ def strip_leading_whitespace_if_blank_line(text)
117
+ bytes = text.bytes
118
+ idx = skip_leading_horizontal_whitespace(bytes)
119
+ return '' if idx >= bytes.length
120
+
121
+ newline_advance = leading_newline_advance(bytes, idx)
122
+ return text unless newline_advance
123
+
124
+ text[(idx + newline_advance)..] || ''
125
+ end
126
+
127
+ def same_line_start_offset(value)
128
+ line_break_idx = [value.rindex("\n"), value.rindex("\r")].compact.max
129
+ line_break_idx ? line_break_idx + 1 : 0
130
+ end
131
+
132
+ def horizontal_whitespace_only?(segment)
133
+ segment.match?(/\A[ \t]*\z/)
134
+ end
135
+
136
+ def skip_leading_horizontal_whitespace(bytes)
137
+ idx = 0
138
+ idx += 1 while idx < bytes.length && (bytes[idx] == 0x20 || bytes[idx] == 0x09)
139
+ idx
140
+ end
141
+
142
+ def leading_newline_advance(bytes, idx)
143
+ return 1 if bytes[idx] == 0x0A # \n
144
+
145
+ return nil unless bytes[idx] == 0x0D # \r
146
+
147
+ bytes[idx + 1] == 0x0A ? 2 : 1
148
+ end
149
+
150
+ def raise_unclosed_comment!(tag_tokens)
151
+ comment_token = tag_tokens.find { |token| token.type == :PERCENT } || tag_tokens.first
152
+ raise LexerError.new('Unclosed comment', line: comment_token.line, column: comment_token.column)
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lexer/compiled_lexer'
4
+ require_relative 'lexer/token_processor'
5
+ require_relative 'token'
6
+ require_relative 'errors'
7
+
8
+ module Natsuzora
9
+ # Lexer for Natsuzora template language using LexerKit
10
+ #
11
+ # Responsibilities:
12
+ # - Escape sequence processing ({[{]} -> {[)
13
+ # - Whitespace control via TokenProcessor ({[- and -]})
14
+ # - Error handling for invalid characters
15
+ # - EOF token addition
16
+ class Lexer
17
+ ESCAPE_SEQUENCE = '{[{]}'
18
+ ESCAPED_VALUE = '{['
19
+
20
+ def initialize(source)
21
+ @source = source
22
+ end
23
+
24
+ def tokenize
25
+ stream = CompiledLexer.instance.stream(@source)
26
+ mapped_tokens = map_tokens_from_stream(stream)
27
+ processed_tokens = TokenProcessor.new(mapped_tokens).process
28
+ add_eof(processed_tokens)
29
+ processed_tokens
30
+ end
31
+
32
+ private
33
+
34
+ def map_tokens_from_stream(stream)
35
+ result = []
36
+
37
+ until stream.eof?
38
+ name = stream.token_name
39
+ text = stream.text
40
+
41
+ case name
42
+ when :TEXT
43
+ text_value = process_text_value(text)
44
+ unless text_value.empty?
45
+ line, col = stream.line_col
46
+ result << Token.new(:TEXT, text_value, line: line, column: col)
47
+ end
48
+
49
+ when :INVALID
50
+ line, col = stream.line_col
51
+ raise LexerError.new("Unexpected character: '#{text}'", line: line, column: col)
52
+
53
+ else
54
+ line, col = stream.line_col
55
+ result << Token.new(name, text, line: line, column: col)
56
+ end
57
+
58
+ stream.advance
59
+ end
60
+
61
+ result
62
+ end
63
+
64
+ def process_text_value(text)
65
+ text.gsub(ESCAPE_SEQUENCE, ESCAPED_VALUE)
66
+ end
67
+
68
+ def add_eof(tokens)
69
+ if tokens.empty?
70
+ tokens << Token.new(:EOF, nil, line: 1, column: 1)
71
+ else
72
+ last = tokens.last
73
+ line, column = position_after_value(last)
74
+ tokens << Token.new(:EOF, nil, line: line, column: column)
75
+ end
76
+ end
77
+
78
+ def position_after_value(token)
79
+ line = token.line
80
+ column = token.column
81
+ value = token.value || ''
82
+
83
+ value.each_char do |char|
84
+ if char == "\n"
85
+ line += 1
86
+ column = 1
87
+ else
88
+ column += 1
89
+ end
90
+ end
91
+
92
+ [line, column]
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lexer_kit'
4
+
5
+ module Natsuzora
6
+ module LexerLoader
7
+ def self.load_compiled(path)
8
+ unless File.file?(path)
9
+ raise LoadError, "Precompiled lexer is missing: #{path}. Run `rake lexers:compile`."
10
+ end
11
+
12
+ LexerKit.load_lexer(path)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lexer_kit'
4
+
5
+ LexerKit.build do
6
+ token :ARROW, '->'
7
+ token :COLON, ':'
8
+ token :OPEN_BRACE, '{'
9
+ token :CLOSE_BRACE, '}'
10
+ token :OPEN_BRACKET, '['
11
+ token :CLOSE_BRACKET, ']'
12
+ token :QUESTION, '?'
13
+ token :EXCLAMATION, '!'
14
+ token :PLUS, '+'
15
+ token :MINUS, '-'
16
+ token :STAR, '*'
17
+
18
+ token :COMMENT, /#[^\n]*/
19
+ token :TYPE_NAME, /[A-Z][a-zA-Z0-9_]*/
20
+ token :IDENTIFIER, /[a-z][a-zA-Z0-9_]*/
21
+ token :NEWLINE, /\n/
22
+
23
+ token :WS, /[ \t\r]+/, skip: true
24
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lexer_kit'
4
+
5
+ LexerKit.build do
6
+ delimited :TEXT, delimiter: '{[', escape: '{[{]}' do
7
+ token :PERCENT, '%'
8
+ token :DASH, '-'
9
+ token :CLOSE, ']}', pop: true
10
+ token :HASH, '#'
11
+ token :SLASH, '/'
12
+
13
+ token :BANG_UNSECURE, '!unsecure'
14
+ token :BANG_INCLUDE, '!include'
15
+ token :EXCLAMATION, '!'
16
+
17
+ token :KW_IF, 'if'
18
+ token :KW_UNLESS, 'unless'
19
+ token :KW_ELSE, 'else'
20
+ token :KW_EACH, 'each'
21
+ token :KW_AS, 'as'
22
+
23
+ token :DOT, '.'
24
+ token :COMMA, ','
25
+ token :EQUAL, '='
26
+ token :QUESTION, '?'
27
+
28
+ token :WHITESPACE, /[ \t\r\n]+/
29
+ token :IDENT, /[A-Za-z][A-Za-z0-9_]*/
30
+ end
31
+ end