natsuzora 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +55 -0
- data/CHANGELOG.md +62 -0
- data/Rakefile +75 -0
- data/lib/natsuzora/ast.rb +94 -0
- data/lib/natsuzora/context.rb +96 -0
- data/lib/natsuzora/contract/ast/any.rb +20 -0
- data/lib/natsuzora/contract/ast/list.rb +28 -0
- data/lib/natsuzora/contract/ast/node.rb +16 -0
- data/lib/natsuzora/contract/ast/record.rb +33 -0
- data/lib/natsuzora/contract/ast/ref.rb +27 -0
- data/lib/natsuzora/contract/ast/scalar.rb +60 -0
- data/lib/natsuzora/contract/ast.rb +38 -0
- data/lib/natsuzora/contract/compiled_lexer.rb +15 -0
- data/lib/natsuzora/contract/diff_marker.rb +15 -0
- data/lib/natsuzora/contract/document.rb +45 -0
- data/lib/natsuzora/contract/field.rb +62 -0
- data/lib/natsuzora/contract/parse_error.rb +16 -0
- data/lib/natsuzora/contract/parser.rb +362 -0
- data/lib/natsuzora/contract/scalar_type.rb +17 -0
- data/lib/natsuzora/contract/type_def.rb +39 -0
- data/lib/natsuzora/contract/type_ref_resolver.rb +56 -0
- data/lib/natsuzora/contract/validation_target.rb +13 -0
- data/lib/natsuzora/contract/validator.rb +179 -0
- data/lib/natsuzora/contract.rb +23 -0
- data/lib/natsuzora/data/lexers/contract.lkt1 +1 -0
- data/lib/natsuzora/data/lexers/template.lkt1 +1 -0
- data/lib/natsuzora/data_normalizable.rb +31 -0
- data/lib/natsuzora/errors.rb +37 -0
- data/lib/natsuzora/html_escape.rb +21 -0
- data/lib/natsuzora/lexer/compiled_lexer.rb +15 -0
- data/lib/natsuzora/lexer/token_processor.rb +156 -0
- data/lib/natsuzora/lexer.rb +95 -0
- data/lib/natsuzora/lexer_loader.rb +15 -0
- data/lib/natsuzora/lexers/contract.rb +24 -0
- data/lib/natsuzora/lexers/template.rb +31 -0
- data/lib/natsuzora/parser.rb +419 -0
- data/lib/natsuzora/payload.rb +35 -0
- data/lib/natsuzora/renderer.rb +132 -0
- data/lib/natsuzora/template.rb +34 -0
- data/lib/natsuzora/template_loader.rb +118 -0
- data/lib/natsuzora/token.rb +20 -0
- data/lib/natsuzora/validator.rb +73 -0
- data/lib/natsuzora/value.rb +73 -0
- data/lib/natsuzora/version.rb +5 -0
- data/lib/natsuzora.rb +30 -0
- metadata +105 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require_relative 'type_ref_resolver'
|
|
5
|
+
require_relative 'validation_target'
|
|
6
|
+
require_relative 'ast/any'
|
|
7
|
+
require_relative 'ast/scalar'
|
|
8
|
+
require_relative 'ast/record'
|
|
9
|
+
require_relative 'ast/list'
|
|
10
|
+
require_relative 'ast/ref'
|
|
11
|
+
|
|
12
|
+
module Natsuzora
|
|
13
|
+
module Contract
|
|
14
|
+
# Error that can occur during validation.
|
|
15
|
+
class ValidationError < StandardError
|
|
16
|
+
attr_reader :path, :error_message
|
|
17
|
+
|
|
18
|
+
def initialize(message, path = '$')
|
|
19
|
+
@path = path
|
|
20
|
+
@error_message = message
|
|
21
|
+
super("#{message} at #{path}")
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Validator for JSON data against contracts.
|
|
26
|
+
class Validator
|
|
27
|
+
MAX_VALIDATE_DEPTH = 64
|
|
28
|
+
|
|
29
|
+
# Validate JSON data against a contract.
|
|
30
|
+
def self.validate(contract, data)
|
|
31
|
+
new.validate_node(contract, data, [])
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Validate JSON data against a contract file with diff markers.
|
|
35
|
+
def self.validate_with_target(document, data, target: ValidationTarget::CURRENT)
|
|
36
|
+
contract = build_contract_for_target(document, target)
|
|
37
|
+
validate(contract, data)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def validate_node(contract, data, path)
|
|
41
|
+
raise ValidationError.new('validation depth exceeded', render_path(path)) if path.length > MAX_VALIDATE_DEPTH
|
|
42
|
+
|
|
43
|
+
case contract
|
|
44
|
+
when AST::Any
|
|
45
|
+
nil # any value is valid
|
|
46
|
+
when AST::Scalar
|
|
47
|
+
validate_scalar(contract, data, path)
|
|
48
|
+
when AST::Record
|
|
49
|
+
validate_object(contract, data, path)
|
|
50
|
+
when AST::List
|
|
51
|
+
validate_array(contract, data, path)
|
|
52
|
+
when AST::Ref
|
|
53
|
+
raise ValidationError.new("unresolved type reference '#{contract.name}'", render_path(path))
|
|
54
|
+
else
|
|
55
|
+
raise ArgumentError, "Unknown contract type: #{contract.class}"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def validate_scalar(contract, data, path)
|
|
62
|
+
return validate_null(contract, path) if data.nil?
|
|
63
|
+
return nil if scalar_value_valid?(contract, data, path)
|
|
64
|
+
|
|
65
|
+
raise ValidationError.new("expected #{contract.scalar_type}", render_path(path))
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def validate_null(contract, path)
|
|
69
|
+
return nil if contract.nullable?
|
|
70
|
+
|
|
71
|
+
raise ValidationError.new('null is not allowed', render_path(path))
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def scalar_value_valid?(contract, data, path)
|
|
75
|
+
return false unless contract.accepts?(data)
|
|
76
|
+
|
|
77
|
+
ensure_not_empty!(contract, data, path) if data.is_a?(String)
|
|
78
|
+
true
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def ensure_not_empty!(contract, data, path)
|
|
82
|
+
return unless contract.required? && data.empty?
|
|
83
|
+
|
|
84
|
+
raise ValidationError.new('empty string is not allowed', render_path(path))
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def validate_object(contract, data, path)
|
|
88
|
+
unless data.is_a?(Hash)
|
|
89
|
+
raise ValidationError.new('expected object', render_path(path))
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Check required fields
|
|
93
|
+
contract.required.each do |key|
|
|
94
|
+
unless data.key?(key) || data.key?(key.to_sym)
|
|
95
|
+
raise ValidationError.new('missing required property', render_path(path + [[:key, key]]))
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Validate properties
|
|
100
|
+
contract.properties.each do |key, child_contract|
|
|
101
|
+
value = if data.key?(key)
|
|
102
|
+
data[key]
|
|
103
|
+
elsif data.key?(key.to_sym)
|
|
104
|
+
data[key.to_sym]
|
|
105
|
+
end
|
|
106
|
+
next if value.nil? && !data.key?(key) && !data.key?(key.to_sym)
|
|
107
|
+
|
|
108
|
+
validate_node(child_contract, value, path + [[:key, key]])
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def validate_array(contract, data, path)
|
|
115
|
+
unless data.is_a?(Array)
|
|
116
|
+
raise ValidationError.new('expected array', render_path(path))
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
data.each_with_index do |item, idx|
|
|
120
|
+
validate_node(contract.items, item, path + [[:index, idx]])
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
nil
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def render_path(path)
|
|
127
|
+
result = '$'
|
|
128
|
+
path.each do |(type, value)|
|
|
129
|
+
case type
|
|
130
|
+
when :key
|
|
131
|
+
result += ".#{value}"
|
|
132
|
+
when :index
|
|
133
|
+
result += "[#{value}]"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
result
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
class << self
|
|
140
|
+
private
|
|
141
|
+
|
|
142
|
+
def build_contract_for_target(document, target)
|
|
143
|
+
resolver = TypeRefResolver.new(
|
|
144
|
+
document.types,
|
|
145
|
+
target: target,
|
|
146
|
+
on_missing: ->(name) { raise ValidationError, "undefined type '#{name}'" },
|
|
147
|
+
on_unavailable: ->(name) { raise ValidationError, "type '#{name}' is not available for #{target}" },
|
|
148
|
+
on_cyclic: ->(name) { raise ValidationError, "cyclic type reference '#{name}'" }
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
properties = {}
|
|
152
|
+
required = []
|
|
153
|
+
|
|
154
|
+
document.fields.each do |name, field|
|
|
155
|
+
contract = field.for_target(target)
|
|
156
|
+
next unless contract
|
|
157
|
+
|
|
158
|
+
properties[name] = resolver.resolve(contract)
|
|
159
|
+
required << name
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
AST::Record.new(properties, required)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Validate JSON data against a contract.
|
|
168
|
+
def self.validate(contract, data) # rubocop:disable Naming/PredicateMethod
|
|
169
|
+
Validator.validate(contract, data)
|
|
170
|
+
true
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Validate JSON data against a contract file with diff markers.
|
|
174
|
+
def self.validate_with_target(document, data, target: ValidationTarget::CURRENT) # rubocop:disable Naming/PredicateMethod
|
|
175
|
+
Validator.validate_with_target(document, data, target: target)
|
|
176
|
+
true
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'contract/parse_error'
|
|
4
|
+
require_relative 'contract/diff_marker'
|
|
5
|
+
require_relative 'contract/validation_target'
|
|
6
|
+
require_relative 'contract/scalar_type'
|
|
7
|
+
require_relative 'contract/ast'
|
|
8
|
+
require_relative 'contract/field'
|
|
9
|
+
require_relative 'contract/type_def'
|
|
10
|
+
require_relative 'contract/type_ref_resolver'
|
|
11
|
+
require_relative 'contract/document'
|
|
12
|
+
require_relative 'contract/compiled_lexer'
|
|
13
|
+
require_relative 'contract/parser'
|
|
14
|
+
require_relative 'contract/validator'
|
|
15
|
+
|
|
16
|
+
module Natsuzora
|
|
17
|
+
# Contract notation (formerly the standalone `subaru` gem).
|
|
18
|
+
# Parses `.ntzc` files into a {Contract} tree, validates JSON data against
|
|
19
|
+
# contracts, and supports two-generation diff markers (`+`, `-`, `*`).
|
|
20
|
+
module Contract
|
|
21
|
+
class Error < StandardError; end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"format":"lkt1","codec":"deflate+base64","kind":"program","table_version":2,"uncompressed_len":2276,"sha256":"375d869fdc099409a5c07a3eb293c87d6a8a31c886cb9d545d77a297dc1c2ef1","data":"eJzlletrE0EUxc/sw7TbByolFAnSVkVoEayPCv1gs92d6pp9pLubpA/KUm3rq76tr1LIn5b/rJ7dxBiXRv0QpNgLP+7cOTPD3rMw41bieagABCYxniShjGRYl3ZyM0lQcPy66Tr2L8ItCr317Vx9J1ffzdULufoea90Mw6AB3QrcwIcRVKWfLIemJTFiuUEkO8VoV6jIGGM/pbQcWq3JKHa4f0SuWa7pmVmhVd1aBN1zfCYtis0QBSvwPOnHGI7XqzLxTU/CcGzOOCuOpO7Lhuv4Ekojoi+Fnd297YP9j73DdtRgYIE59U/JZsRxFpmfAsPMelcDtBupcD+bEYsQhxBHEJsQWxBLENMQcxCz6RZR4qEquiHQJ5R+wn8WStYpbWt3nPfn70J048epfSNb3rMhH4PrbDDRdmfQ/vy+f/EH/05T5P0p0p2ejzx1//NfRufqOtEZgRPNOSN+9d45bFlcAGZg8RZXF4Fzh8DQEWBsAqNbwPgScH4auDgHTMwCxSvApAlcWiYWsYkkK+QBeUgc8ohUiEs84pOAVMkqCUlEYlIjddIga2SdbJBtoPSYPCE7ZJfskafkGXlOXpCXZJ+8Iq/JG/KWvCPvyQfC5610QD6Rz+QL+Uq+EQO4zGdtaoxMkY5R168C19hjNsHvn7Fpma2VUUSZ4zQ3mZvMLeYWioJreEEVBXWVuqCuUhfUVeoKdY26Ql1rpn+AucXMWk//CtfrZWau16nzPHqGCeCYfvMt5aDDd/X9jB8="}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"format":"lkt1","codec":"deflate+base64","kind":"program","table_version":2,"uncompressed_len":2226,"sha256":"f670fe5c25083a9642759f0aa0cd16670997d72b94f1323c676001b403fba8d3","data":"eJzdlW1PFEkQx/89s7M7sICIZj0vJqfx8YUCPmEuhuw2M427x+ws7OwCSsgGFZ/FB/SezCX7/r4Un+C+kvevnj3pAyX4ztjkx1R11VRXV0/1Jgudq/ABKJzCWK/XNplpL5u4N93rodRIl3XSiP9nuEaDq1/fo9/Yo9/co8/s0W9RL3TMagelRdOOTNpBIdZZHUGUtDKDQt0qWSKP0Tmd3ul108xE3bbBiFUbaZR0Y4OyWY0S3dSdRitFsLDSa8xjiI9umpgsQ4miSRjRCjqqWx+dwY9bHa7WajY1ArPU1QnCpa7JbJzhlXqjY7JFHRkEjVjS81B6uPlo4/2LdxiWxHtPt7Y237rTMlwTEALePxiH92ENZboGH9b4t/4X1HmoK/BEOgs1pc7R+SgZt0HURzvs+TA+nyeJ78wXHDlw5KIjlxw5dOQhRx627wMnrLw7X3bkEUcedeQxRz7iyOM2D+CHM7KBpxL5Pf95W7KHbUl+U1Z9IG5iKNlKydyQuJTFMPJCFhPr2EMJv2njztgqMNS+Ku3KXl52/CQLq0d25k9bjTyrL7+3Kx+u0lJJ4IIkqSRxT3L2JdOC7DLYtl5/D/Ip7T/ZQ6yXn9CVA09ZTguYlIXVhnhKHt72p2RsoZ9Y18MWcITclFhqOy/NJNRlqFmoqtjVKUbx8ZVDfRqDCe+LA3mBDrR/S8OzeLZdOVSF1dlNUjas9r/0manvceS1ySsjhRgFIsyd51fLz3p0nc19FpiYAo7zvqiwa39kR53hl3uJH+D0JHDrMvDzLHC7Csyy5apsiSqjVE8TDeg5EpGYGDJP7pA6aZBfyAJJSJOkpEUWyRJpk4x0SJcskxWySu6Se+Q+YSNp3kaal4p+TNhT+hl5Tthz+iXhRaBfkdfkDeHdptlDmr8O+lfyG/md/EF4LelBkerMH8dYnRpvC/4YlCaoT1OXO1S6knPz4sM9o4J+FHu1sIIdPvthRSHie+WrYlO1KPbBOfr4tTBV9PH7YezvhHEBIesB1ieUj69Z6J9mTaK4UAvj4s5/tqGB7WJu67u24YFtKrcxZgDXXh7YZ6w9yONWfJGZp8/cAtGZW5E5+sytWAs9cL7Y55N+9D/OLc/lew8/Dvb/L/ErdH4="}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Natsuzora
|
|
4
|
+
# Mixin that gives the including class a `normalize_data(data)` method.
|
|
5
|
+
module DataNormalizable
|
|
6
|
+
# performs only pure transformations:
|
|
7
|
+
# - Symbol Hash keys are converted to Strings (recursively).
|
|
8
|
+
# - Whole-number Float values are converted to Integers.
|
|
9
|
+
def normalize_data(data)
|
|
10
|
+
case data
|
|
11
|
+
when Hash
|
|
12
|
+
data.transform_keys(&:to_s).transform_values { |v| normalize_data(v) }
|
|
13
|
+
when Array
|
|
14
|
+
data.map { |v| normalize_data(v) }
|
|
15
|
+
when Float
|
|
16
|
+
normalize_float(data)
|
|
17
|
+
else
|
|
18
|
+
data
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
# Convert finite whole-number Float to Integer; pass through anything else.
|
|
25
|
+
def normalize_float(value)
|
|
26
|
+
return value.to_i if value.finite? && value == value.to_i
|
|
27
|
+
|
|
28
|
+
value
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Natsuzora
|
|
4
|
+
class Error < StandardError
|
|
5
|
+
attr_reader :line, :column
|
|
6
|
+
|
|
7
|
+
def initialize(message, line: nil, column: nil)
|
|
8
|
+
@line = line
|
|
9
|
+
@column = column
|
|
10
|
+
super(build_message(message))
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def build_message(message)
|
|
16
|
+
return message unless line
|
|
17
|
+
|
|
18
|
+
column ? "#{message} at line #{line}, column #{column}" : "#{message} at line #{line}"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
class LexerError < Error; end
|
|
23
|
+
|
|
24
|
+
class ParseError < Error; end
|
|
25
|
+
|
|
26
|
+
class ReservedWordError < ParseError; end
|
|
27
|
+
|
|
28
|
+
class RenderError < Error; end
|
|
29
|
+
|
|
30
|
+
class UndefinedVariableError < RenderError; end
|
|
31
|
+
|
|
32
|
+
class TypeError < RenderError; end
|
|
33
|
+
|
|
34
|
+
class IncludeError < RenderError; end
|
|
35
|
+
|
|
36
|
+
class ShadowingError < RenderError; end
|
|
37
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Natsuzora
|
|
4
|
+
module HtmlEscape
|
|
5
|
+
ESCAPE_MAP = {
|
|
6
|
+
'&' => '&',
|
|
7
|
+
'<' => '<',
|
|
8
|
+
'>' => '>',
|
|
9
|
+
'"' => '"',
|
|
10
|
+
"'" => '''
|
|
11
|
+
}.freeze
|
|
12
|
+
|
|
13
|
+
ESCAPE_REGEXP = /[&<>"']/
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
def escape(string)
|
|
17
|
+
string.gsub(ESCAPE_REGEXP, ESCAPE_MAP)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../lexer_loader'
|
|
4
|
+
|
|
5
|
+
module Natsuzora
|
|
6
|
+
class Lexer
|
|
7
|
+
module CompiledLexer
|
|
8
|
+
LEXER_PATH = File.expand_path('../data/lexers/template.lkt1', __dir__)
|
|
9
|
+
|
|
10
|
+
def self.instance
|
|
11
|
+
@instance ||= LexerLoader.load_compiled(LEXER_PATH)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Natsuzora
|
|
4
|
+
class Lexer
|
|
5
|
+
# Processes tokens to handle whitespace control and comments.
|
|
6
|
+
#
|
|
7
|
+
# Responsibilities:
|
|
8
|
+
# - Consume DASH tokens and apply trim rules
|
|
9
|
+
# - Consume comment tags entirely
|
|
10
|
+
# - Detect unclosed comments
|
|
11
|
+
class TokenProcessor
|
|
12
|
+
def initialize(tokens)
|
|
13
|
+
@tokens = tokens
|
|
14
|
+
@result = []
|
|
15
|
+
@strip_next_text = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def process
|
|
19
|
+
idx = 0
|
|
20
|
+
|
|
21
|
+
while idx < @tokens.length
|
|
22
|
+
token = @tokens[idx]
|
|
23
|
+
|
|
24
|
+
if token.type == :TEXT
|
|
25
|
+
append_text(token)
|
|
26
|
+
idx += 1
|
|
27
|
+
else
|
|
28
|
+
idx = process_tag(idx)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
@result
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def process_tag(start_idx)
|
|
38
|
+
close_idx = find_close_index(start_idx)
|
|
39
|
+
tag_tokens = close_idx ? @tokens[start_idx..close_idx] : @tokens[start_idx..]
|
|
40
|
+
|
|
41
|
+
apply_left_trim(tag_tokens)
|
|
42
|
+
apply_right_trim(tag_tokens)
|
|
43
|
+
|
|
44
|
+
if comment_tag?(tag_tokens)
|
|
45
|
+
raise_unclosed_comment!(tag_tokens) unless close_idx
|
|
46
|
+
return close_idx + 1
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
emit_tag_tokens(tag_tokens)
|
|
50
|
+
close_idx ? close_idx + 1 : @tokens.length
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def append_text(token)
|
|
54
|
+
text_value = token.value
|
|
55
|
+
|
|
56
|
+
if @strip_next_text
|
|
57
|
+
@strip_next_text = false
|
|
58
|
+
text_value = strip_leading_whitespace_if_blank_line(text_value)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
return if text_value.empty?
|
|
62
|
+
|
|
63
|
+
@result << Token.new(:TEXT, text_value, line: token.line, column: token.column)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def find_close_index(start_idx)
|
|
67
|
+
idx = start_idx
|
|
68
|
+
while idx < @tokens.length
|
|
69
|
+
return idx if @tokens[idx].type == :CLOSE
|
|
70
|
+
|
|
71
|
+
idx += 1
|
|
72
|
+
end
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def apply_left_trim(tag_tokens)
|
|
77
|
+
strip_trailing_from_last_text_if_blank_line if tag_tokens.first&.type == :DASH
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def apply_right_trim(tag_tokens)
|
|
81
|
+
close_idx = tag_tokens.index { |token| token.type == :CLOSE }
|
|
82
|
+
@strip_next_text = true if close_idx&.positive? && tag_tokens[close_idx - 1].type == :DASH
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def comment_tag?(tag_tokens)
|
|
86
|
+
first = tag_tokens.first
|
|
87
|
+
return false unless first
|
|
88
|
+
|
|
89
|
+
return true if first.type == :PERCENT
|
|
90
|
+
|
|
91
|
+
first.type == :DASH && tag_tokens[1]&.type == :PERCENT
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def emit_tag_tokens(tag_tokens)
|
|
95
|
+
tag_tokens.each do |token|
|
|
96
|
+
next if token.type == :DASH
|
|
97
|
+
|
|
98
|
+
@result << token
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def strip_trailing_from_last_text_if_blank_line
|
|
103
|
+
last_idx = @result.rindex { |token| token.type == :TEXT }
|
|
104
|
+
return unless last_idx
|
|
105
|
+
|
|
106
|
+
last_text = @result[last_idx]
|
|
107
|
+
value = last_text.value
|
|
108
|
+
line_start = same_line_start_offset(value)
|
|
109
|
+
trailing_segment = value[line_start..] || ''
|
|
110
|
+
return unless horizontal_whitespace_only?(trailing_segment)
|
|
111
|
+
|
|
112
|
+
stripped = value[0...line_start]
|
|
113
|
+
@result[last_idx] = Token.new(:TEXT, stripped, line: last_text.line, column: last_text.column)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def strip_leading_whitespace_if_blank_line(text)
|
|
117
|
+
bytes = text.bytes
|
|
118
|
+
idx = skip_leading_horizontal_whitespace(bytes)
|
|
119
|
+
return '' if idx >= bytes.length
|
|
120
|
+
|
|
121
|
+
newline_advance = leading_newline_advance(bytes, idx)
|
|
122
|
+
return text unless newline_advance
|
|
123
|
+
|
|
124
|
+
text[(idx + newline_advance)..] || ''
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def same_line_start_offset(value)
|
|
128
|
+
line_break_idx = [value.rindex("\n"), value.rindex("\r")].compact.max
|
|
129
|
+
line_break_idx ? line_break_idx + 1 : 0
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def horizontal_whitespace_only?(segment)
|
|
133
|
+
segment.match?(/\A[ \t]*\z/)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def skip_leading_horizontal_whitespace(bytes)
|
|
137
|
+
idx = 0
|
|
138
|
+
idx += 1 while idx < bytes.length && (bytes[idx] == 0x20 || bytes[idx] == 0x09)
|
|
139
|
+
idx
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def leading_newline_advance(bytes, idx)
|
|
143
|
+
return 1 if bytes[idx] == 0x0A # \n
|
|
144
|
+
|
|
145
|
+
return nil unless bytes[idx] == 0x0D # \r
|
|
146
|
+
|
|
147
|
+
bytes[idx + 1] == 0x0A ? 2 : 1
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def raise_unclosed_comment!(tag_tokens)
|
|
151
|
+
comment_token = tag_tokens.find { |token| token.type == :PERCENT } || tag_tokens.first
|
|
152
|
+
raise LexerError.new('Unclosed comment', line: comment_token.line, column: comment_token.column)
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lexer/compiled_lexer'
|
|
4
|
+
require_relative 'lexer/token_processor'
|
|
5
|
+
require_relative 'token'
|
|
6
|
+
require_relative 'errors'
|
|
7
|
+
|
|
8
|
+
module Natsuzora
|
|
9
|
+
# Lexer for Natsuzora template language using LexerKit
|
|
10
|
+
#
|
|
11
|
+
# Responsibilities:
|
|
12
|
+
# - Escape sequence processing ({[{]} -> {[)
|
|
13
|
+
# - Whitespace control via TokenProcessor ({[- and -]})
|
|
14
|
+
# - Error handling for invalid characters
|
|
15
|
+
# - EOF token addition
|
|
16
|
+
class Lexer
|
|
17
|
+
ESCAPE_SEQUENCE = '{[{]}'
|
|
18
|
+
ESCAPED_VALUE = '{['
|
|
19
|
+
|
|
20
|
+
def initialize(source)
|
|
21
|
+
@source = source
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def tokenize
|
|
25
|
+
stream = CompiledLexer.instance.stream(@source)
|
|
26
|
+
mapped_tokens = map_tokens_from_stream(stream)
|
|
27
|
+
processed_tokens = TokenProcessor.new(mapped_tokens).process
|
|
28
|
+
add_eof(processed_tokens)
|
|
29
|
+
processed_tokens
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def map_tokens_from_stream(stream)
|
|
35
|
+
result = []
|
|
36
|
+
|
|
37
|
+
until stream.eof?
|
|
38
|
+
name = stream.token_name
|
|
39
|
+
text = stream.text
|
|
40
|
+
|
|
41
|
+
case name
|
|
42
|
+
when :TEXT
|
|
43
|
+
text_value = process_text_value(text)
|
|
44
|
+
unless text_value.empty?
|
|
45
|
+
line, col = stream.line_col
|
|
46
|
+
result << Token.new(:TEXT, text_value, line: line, column: col)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
when :INVALID
|
|
50
|
+
line, col = stream.line_col
|
|
51
|
+
raise LexerError.new("Unexpected character: '#{text}'", line: line, column: col)
|
|
52
|
+
|
|
53
|
+
else
|
|
54
|
+
line, col = stream.line_col
|
|
55
|
+
result << Token.new(name, text, line: line, column: col)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
stream.advance
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
result
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def process_text_value(text)
|
|
65
|
+
text.gsub(ESCAPE_SEQUENCE, ESCAPED_VALUE)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def add_eof(tokens)
|
|
69
|
+
if tokens.empty?
|
|
70
|
+
tokens << Token.new(:EOF, nil, line: 1, column: 1)
|
|
71
|
+
else
|
|
72
|
+
last = tokens.last
|
|
73
|
+
line, column = position_after_value(last)
|
|
74
|
+
tokens << Token.new(:EOF, nil, line: line, column: column)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def position_after_value(token)
|
|
79
|
+
line = token.line
|
|
80
|
+
column = token.column
|
|
81
|
+
value = token.value || ''
|
|
82
|
+
|
|
83
|
+
value.each_char do |char|
|
|
84
|
+
if char == "\n"
|
|
85
|
+
line += 1
|
|
86
|
+
column = 1
|
|
87
|
+
else
|
|
88
|
+
column += 1
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
[line, column]
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'lexer_kit'
|
|
4
|
+
|
|
5
|
+
module Natsuzora
|
|
6
|
+
module LexerLoader
|
|
7
|
+
def self.load_compiled(path)
|
|
8
|
+
unless File.file?(path)
|
|
9
|
+
raise LoadError, "Precompiled lexer is missing: #{path}. Run `rake lexers:compile`."
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
LexerKit.load_lexer(path)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'lexer_kit'
|
|
4
|
+
|
|
5
|
+
LexerKit.build do
|
|
6
|
+
token :ARROW, '->'
|
|
7
|
+
token :COLON, ':'
|
|
8
|
+
token :OPEN_BRACE, '{'
|
|
9
|
+
token :CLOSE_BRACE, '}'
|
|
10
|
+
token :OPEN_BRACKET, '['
|
|
11
|
+
token :CLOSE_BRACKET, ']'
|
|
12
|
+
token :QUESTION, '?'
|
|
13
|
+
token :EXCLAMATION, '!'
|
|
14
|
+
token :PLUS, '+'
|
|
15
|
+
token :MINUS, '-'
|
|
16
|
+
token :STAR, '*'
|
|
17
|
+
|
|
18
|
+
token :COMMENT, /#[^\n]*/
|
|
19
|
+
token :TYPE_NAME, /[A-Z][a-zA-Z0-9_]*/
|
|
20
|
+
token :IDENTIFIER, /[a-z][a-zA-Z0-9_]*/
|
|
21
|
+
token :NEWLINE, /\n/
|
|
22
|
+
|
|
23
|
+
token :WS, /[ \t\r]+/, skip: true
|
|
24
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'lexer_kit'
|
|
4
|
+
|
|
5
|
+
LexerKit.build do
|
|
6
|
+
delimited :TEXT, delimiter: '{[', escape: '{[{]}' do
|
|
7
|
+
token :PERCENT, '%'
|
|
8
|
+
token :DASH, '-'
|
|
9
|
+
token :CLOSE, ']}', pop: true
|
|
10
|
+
token :HASH, '#'
|
|
11
|
+
token :SLASH, '/'
|
|
12
|
+
|
|
13
|
+
token :BANG_UNSECURE, '!unsecure'
|
|
14
|
+
token :BANG_INCLUDE, '!include'
|
|
15
|
+
token :EXCLAMATION, '!'
|
|
16
|
+
|
|
17
|
+
token :KW_IF, 'if'
|
|
18
|
+
token :KW_UNLESS, 'unless'
|
|
19
|
+
token :KW_ELSE, 'else'
|
|
20
|
+
token :KW_EACH, 'each'
|
|
21
|
+
token :KW_AS, 'as'
|
|
22
|
+
|
|
23
|
+
token :DOT, '.'
|
|
24
|
+
token :COMMA, ','
|
|
25
|
+
token :EQUAL, '='
|
|
26
|
+
token :QUESTION, '?'
|
|
27
|
+
|
|
28
|
+
token :WHITESPACE, /[ \t\r\n]+/
|
|
29
|
+
token :IDENT, /[A-Za-z][A-Za-z0-9_]*/
|
|
30
|
+
end
|
|
31
|
+
end
|