natsuzora 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +55 -0
  4. data/CHANGELOG.md +62 -0
  5. data/Rakefile +75 -0
  6. data/lib/natsuzora/ast.rb +94 -0
  7. data/lib/natsuzora/context.rb +96 -0
  8. data/lib/natsuzora/contract/ast/any.rb +20 -0
  9. data/lib/natsuzora/contract/ast/list.rb +28 -0
  10. data/lib/natsuzora/contract/ast/node.rb +16 -0
  11. data/lib/natsuzora/contract/ast/record.rb +33 -0
  12. data/lib/natsuzora/contract/ast/ref.rb +27 -0
  13. data/lib/natsuzora/contract/ast/scalar.rb +60 -0
  14. data/lib/natsuzora/contract/ast.rb +38 -0
  15. data/lib/natsuzora/contract/compiled_lexer.rb +15 -0
  16. data/lib/natsuzora/contract/diff_marker.rb +15 -0
  17. data/lib/natsuzora/contract/document.rb +45 -0
  18. data/lib/natsuzora/contract/field.rb +62 -0
  19. data/lib/natsuzora/contract/parse_error.rb +16 -0
  20. data/lib/natsuzora/contract/parser.rb +362 -0
  21. data/lib/natsuzora/contract/scalar_type.rb +17 -0
  22. data/lib/natsuzora/contract/type_def.rb +39 -0
  23. data/lib/natsuzora/contract/type_ref_resolver.rb +56 -0
  24. data/lib/natsuzora/contract/validation_target.rb +13 -0
  25. data/lib/natsuzora/contract/validator.rb +179 -0
  26. data/lib/natsuzora/contract.rb +23 -0
  27. data/lib/natsuzora/data/lexers/contract.lkt1 +1 -0
  28. data/lib/natsuzora/data/lexers/template.lkt1 +1 -0
  29. data/lib/natsuzora/data_normalizable.rb +31 -0
  30. data/lib/natsuzora/errors.rb +37 -0
  31. data/lib/natsuzora/html_escape.rb +21 -0
  32. data/lib/natsuzora/lexer/compiled_lexer.rb +15 -0
  33. data/lib/natsuzora/lexer/token_processor.rb +156 -0
  34. data/lib/natsuzora/lexer.rb +95 -0
  35. data/lib/natsuzora/lexer_loader.rb +15 -0
  36. data/lib/natsuzora/lexers/contract.rb +24 -0
  37. data/lib/natsuzora/lexers/template.rb +31 -0
  38. data/lib/natsuzora/parser.rb +419 -0
  39. data/lib/natsuzora/payload.rb +35 -0
  40. data/lib/natsuzora/renderer.rb +132 -0
  41. data/lib/natsuzora/template.rb +34 -0
  42. data/lib/natsuzora/template_loader.rb +118 -0
  43. data/lib/natsuzora/token.rb +20 -0
  44. data/lib/natsuzora/validator.rb +73 -0
  45. data/lib/natsuzora/value.rb +73 -0
  46. data/lib/natsuzora/version.rb +5 -0
  47. data/lib/natsuzora.rb +30 -0
  48. metadata +105 -0
@@ -0,0 +1,419 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ class Parser
5
+ def initialize(tokens)
6
+ @tokens = tokens
7
+ @pos = 0
8
+ end
9
+
10
+ def parse
11
+ nodes = parse_nodes
12
+ AST::Template.new(nodes, line: 1, column: 1)
13
+ end
14
+
15
+ private
16
+
17
+ def parse_nodes(stop_types: [:EOF])
18
+ nodes = []
19
+ nodes << parse_node until stop_types.include?(current_type)
20
+ nodes
21
+ end
22
+
23
+ def parse_node
24
+ case current_type
25
+ when :TEXT
26
+ node = parse_text
27
+ parse_tag_content_if_present
28
+ node
29
+ when :HASH, :SLASH, :BANG_UNSECURE, :BANG_INCLUDE, :IDENT, :KW_IF, :KW_UNLESS, :KW_ELSE, :KW_EACH, :KW_AS,
30
+ :WHITESPACE, :QUESTION, :EXCLAMATION, :DOT, :EQUAL, :COMMA
31
+ parse_tag_content
32
+ else
33
+ unexpected_token!
34
+ end
35
+ end
36
+
37
+ def parse_text
38
+ token = consume(:TEXT)
39
+ AST::Text.new(token.value, line: token.line, column: token.column)
40
+ end
41
+
42
+ def parse_tag_content_if_present
43
+ # No-op: comments are now handled by TokenProcessor
44
+ end
45
+
46
+ def parse_tag_content
47
+ first_token = current_token
48
+
49
+ check_no_whitespace_before_special(first_token) if current_type == :WHITESPACE
50
+
51
+ skip_whitespace
52
+
53
+ case current_type
54
+ when :HASH
55
+ parse_block_open
56
+ when :SLASH
57
+ unexpected_token!('Unexpected block close')
58
+ when :BANG_UNSECURE
59
+ parse_unsecure_output
60
+ when :BANG_INCLUDE
61
+ parse_include
62
+ else
63
+ parse_variable_node
64
+ end
65
+ end
66
+
67
+ def check_no_whitespace_before_special(ws_token)
68
+ saved_pos = @pos
69
+ skip_whitespace
70
+ if %i[HASH SLASH BANG_UNSECURE BANG_INCLUDE].include?(current_type)
71
+ raise ParseError.new(
72
+ "Whitespace not allowed before '#{current_token.value}' after tag open",
73
+ line: ws_token.line,
74
+ column: ws_token.column
75
+ )
76
+ end
77
+ @pos = saved_pos
78
+ end
79
+
80
+ def parse_block_open
81
+ consume(:HASH)
82
+ skip_whitespace
83
+
84
+ case current_type
85
+ when :KW_IF
86
+ parse_if_block
87
+ when :KW_UNLESS
88
+ parse_unless_block
89
+ when :KW_EACH
90
+ parse_each_block
91
+ when :KW_ELSE
92
+ unexpected_token!("Unexpected 'else' without 'if'")
93
+ else
94
+ unexpected_token!
95
+ end
96
+ end
97
+
98
+ def parse_if_block
99
+ token = consume(:KW_IF)
100
+ line = token.line
101
+ column = token.column
102
+
103
+ consume_required_whitespace
104
+ condition = parse_path
105
+ skip_whitespace
106
+ consume(:CLOSE)
107
+
108
+ then_nodes = parse_if_body
109
+ else_nodes = nil
110
+
111
+ if else_open?
112
+ consume_else
113
+ else_nodes = parse_if_body
114
+ end
115
+
116
+ consume_block_close(:KW_IF)
117
+
118
+ AST::IfBlock.new(
119
+ condition: condition,
120
+ then_nodes: then_nodes,
121
+ else_nodes: else_nodes,
122
+ line: line,
123
+ column: column
124
+ )
125
+ end
126
+
127
+ def parse_if_body
128
+ nodes = []
129
+ nodes << parse_node until block_close?(:KW_IF) || else_open?
130
+ nodes
131
+ end
132
+
133
+ def parse_unless_block
134
+ token = consume(:KW_UNLESS)
135
+ line = token.line
136
+ column = token.column
137
+
138
+ consume_required_whitespace
139
+ condition = parse_path
140
+ skip_whitespace
141
+ consume(:CLOSE)
142
+
143
+ body_nodes = parse_unless_body
144
+
145
+ consume_block_close(:KW_UNLESS)
146
+
147
+ AST::UnlessBlock.new(
148
+ condition: condition,
149
+ body_nodes: body_nodes,
150
+ line: line,
151
+ column: column
152
+ )
153
+ end
154
+
155
+ def parse_unless_body
156
+ nodes = []
157
+ nodes << parse_node until block_close?(:KW_UNLESS)
158
+ nodes
159
+ end
160
+
161
+ def else_open?
162
+ return false unless current_type == :HASH
163
+
164
+ saved_pos = @pos
165
+ advance_token # hash
166
+ skip_whitespace
167
+ result = current_type == :KW_ELSE
168
+ @pos = saved_pos
169
+ result
170
+ end
171
+
172
+ def consume_else
173
+ consume(:HASH)
174
+ skip_whitespace
175
+ consume(:KW_ELSE)
176
+ skip_whitespace
177
+ consume(:CLOSE)
178
+ end
179
+
180
+ def parse_each_block
181
+ token = consume(:KW_EACH)
182
+ line = token.line
183
+ column = token.column
184
+
185
+ consume_required_whitespace
186
+ collection = parse_path
187
+ consume_required_whitespace
188
+ consume(:KW_AS)
189
+ consume_required_whitespace
190
+ item_name = parse_identifier_with_validation
191
+
192
+ skip_whitespace
193
+ consume(:CLOSE)
194
+
195
+ body_nodes = parse_each_body
196
+
197
+ consume_block_close(:KW_EACH)
198
+
199
+ AST::EachBlock.new(
200
+ collection: collection,
201
+ item_name: item_name,
202
+ body_nodes: body_nodes,
203
+ line: line,
204
+ column: column
205
+ )
206
+ end
207
+
208
+ def parse_each_body
209
+ nodes = []
210
+ nodes << parse_node until block_close?(:KW_EACH)
211
+ nodes
212
+ end
213
+
214
+ def parse_unsecure_output
215
+ token = consume(:BANG_UNSECURE)
216
+ line = token.line
217
+ column = token.column
218
+
219
+ consume_required_whitespace
220
+ path = parse_path
221
+ skip_whitespace
222
+ consume(:CLOSE)
223
+
224
+ AST::UnsecureOutput.new(path: path, line: line, column: column)
225
+ end
226
+
227
+ def parse_include
228
+ token = consume(:BANG_INCLUDE)
229
+ line = token.line
230
+ column = token.column
231
+
232
+ consume_required_whitespace
233
+ name = parse_include_name
234
+ args = parse_include_args
235
+ skip_whitespace
236
+ consume(:CLOSE)
237
+
238
+ AST::Include.new(name: name, args: args, line: line, column: column)
239
+ end
240
+
241
+ def parse_include_name
242
+ first_token = current_token
243
+ unless current_type == :SLASH
244
+ raise ParseError.new("Include name must start with '/'", line: first_token.line, column: first_token.column)
245
+ end
246
+
247
+ segments = [parse_include_segment]
248
+ segments << parse_include_segment while current_type == :SLASH
249
+
250
+ path = segments.join
251
+ Validator.validate_include_name_syntax!(path, line: first_token.line, column: first_token.column)
252
+ path
253
+ end
254
+
255
+ def parse_include_segment
256
+ consume(:SLASH)
257
+
258
+ token = current_token
259
+ if current_type == :INVALID
260
+ raise LexerError.new("Invalid character in include path: '#{token.value}'",
261
+ line: token.line, column: token.column)
262
+ end
263
+ unless current_type == :IDENT
264
+ raise ParseError.new('Expected identifier after /', line: token.line, column: token.column)
265
+ end
266
+
267
+ ident_token = consume(:IDENT)
268
+ if ident_token.value.start_with?('_')
269
+ raise LexerError.new("Include segment cannot start with underscore: #{ident_token.value}",
270
+ line: ident_token.line, column: ident_token.column)
271
+ end
272
+
273
+ "/#{ident_token.value}"
274
+ end
275
+
276
+ def parse_include_args
277
+ args = {}
278
+
279
+ while current_type == :WHITESPACE
280
+ skip_whitespace
281
+ break unless current_type == :IDENT
282
+
283
+ key, value, key_token = parse_include_arg
284
+ if args.key?(key)
285
+ raise ParseError.new("Duplicate include argument: #{key}", line: key_token.line, column: key_token.column)
286
+ end
287
+
288
+ args[key] = value
289
+ end
290
+
291
+ args
292
+ end
293
+
294
+ def parse_include_arg
295
+ key_token = consume(:IDENT)
296
+ Validator.validate_identifier!(key_token.value, line: key_token.line, column: key_token.column)
297
+
298
+ skip_whitespace
299
+ consume(:EQUAL)
300
+ skip_whitespace
301
+ value = parse_path
302
+
303
+ [key_token.value, value, key_token]
304
+ end
305
+
306
+ def parse_variable_node
307
+ path = parse_path(allow_modifier: true)
308
+ skip_whitespace
309
+ consume(:CLOSE)
310
+ path
311
+ end
312
+
313
+ def parse_path(allow_modifier: false)
314
+ first_token = current_token
315
+ segments = [parse_identifier_with_validation]
316
+
317
+ while current_type == :DOT
318
+ consume(:DOT)
319
+ segments << parse_identifier_with_validation
320
+ end
321
+
322
+ modifier = nil
323
+ modifier = parse_modifier if allow_modifier
324
+
325
+ AST::Variable.new(segments, modifier: modifier, line: first_token.line, column: first_token.column)
326
+ end
327
+
328
+ def parse_modifier
329
+ case current_type
330
+ when :QUESTION
331
+ advance_token
332
+ :nullable
333
+ when :EXCLAMATION
334
+ advance_token
335
+ :required
336
+ end
337
+ end
338
+
339
+ def parse_identifier_with_validation
340
+ token = current_token
341
+
342
+ if keyword_token?(token)
343
+ advance_token
344
+ raise ReservedWordError.new("'#{token.value}' is a reserved word", line: token.line, column: token.column)
345
+ end
346
+
347
+ token = consume(:IDENT)
348
+
349
+ if Token::RESERVED_WORDS.include?(token.value)
350
+ raise ReservedWordError.new("'#{token.value}' is a reserved word", line: token.line, column: token.column)
351
+ end
352
+
353
+ Validator.validate_identifier!(token.value, line: token.line, column: token.column)
354
+ token.value
355
+ end
356
+
357
+ def keyword_token?(token)
358
+ return false if token.nil?
359
+
360
+ %i[KW_IF KW_UNLESS KW_ELSE KW_EACH KW_AS].include?(token.type)
361
+ end
362
+
363
+ def block_close?(keyword = nil)
364
+ return false unless current_type == :SLASH
365
+
366
+ return true unless keyword
367
+
368
+ saved_pos = @pos
369
+ advance_token # slash
370
+ skip_whitespace
371
+ result = current_type == keyword
372
+ @pos = saved_pos
373
+ result
374
+ end
375
+
376
+ def consume_block_close(keyword)
377
+ consume(:SLASH)
378
+ skip_whitespace
379
+ consume(keyword)
380
+ skip_whitespace
381
+ consume(:CLOSE)
382
+ end
383
+
384
+ def current_token
385
+ @tokens[@pos]
386
+ end
387
+
388
+ def current_type
389
+ current_token&.type
390
+ end
391
+
392
+ def advance_token
393
+ @pos += 1
394
+ end
395
+
396
+ def consume(type)
397
+ token = current_token
398
+ unexpected_token!("Expected #{type}") if token.nil? || token.type != type
399
+ advance_token
400
+ token
401
+ end
402
+
403
+ def consume_required_whitespace
404
+ unexpected_token!('Expected whitespace') unless current_type == :WHITESPACE
405
+ skip_whitespace
406
+ end
407
+
408
+ def skip_whitespace
409
+ advance_token while current_type == :WHITESPACE
410
+ end
411
+
412
+ def unexpected_token!(message = nil)
413
+ token = current_token
414
+ msg = message || 'Unexpected token'
415
+ msg = "#{msg}: #{token.type}" if token
416
+ raise ParseError.new(msg, line: token&.line, column: token&.column)
417
+ end
418
+ end
419
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ # Wraps raw host data prepared for a single {Template#render} call.
5
+ #
6
+ # The class is the explicit boundary between untrusted host data
7
+ # (Symbol-keyed Hashes, host-side numeric types, etc.) and the value
8
+ # space that {Renderer} and {Context} consume.
9
+ #
10
+ # On construction:
11
+ #
12
+ # 1. Adapts host data via the {DataNormalizable} mixin
13
+ # (Symbol→String keys, whole-number Float→Integer). Pure
14
+ # transformation; never raises.
15
+ # 2. Asserts conformance to Natsuzora's value type system via
16
+ # {Validator.validate_data!}. Raises {Natsuzora::TypeError} on any
17
+ # residual violation (Float left over, Integer outside the safe
18
+ # range, NaN/Infinity).
19
+ #
20
+ # If `new` returns, `#data` is guaranteed to conform; downstream
21
+ # components trust the result without further validation.
22
+ class Payload
23
+ include DataNormalizable
24
+
25
+ # @return [Hash] adapted and validated root data
26
+ attr_reader :data
27
+
28
+ def initialize(raw_data)
29
+ raise Natsuzora::TypeError, 'Root data must be an object' unless raw_data.is_a?(Hash)
30
+
31
+ @data = normalize_data(raw_data)
32
+ Validator.validate_data!(@data)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ class Renderer
5
+ MAX_RENDER_DEPTH = 1024
6
+ MAX_OUTPUT_BYTES = 50 * 1024 * 1024 # 50 MB
7
+
8
+ def initialize(ast, template_loader: nil)
9
+ @ast = ast
10
+ @template_loader = template_loader
11
+ end
12
+
13
+ def render(data)
14
+ @context = Context.new(data)
15
+ @depth = 0
16
+ output = render_nodes(@ast.nodes)
17
+ raise RenderError, "output exceeded #{MAX_OUTPUT_BYTES} bytes" if output.bytesize > MAX_OUTPUT_BYTES
18
+
19
+ output
20
+ end
21
+
22
+ private
23
+
24
+ def render_nodes(nodes)
25
+ @depth += 1
26
+ raise RenderError, "render depth exceeded #{MAX_RENDER_DEPTH}" if @depth > MAX_RENDER_DEPTH
27
+
28
+ begin
29
+ nodes.map { |node| render_node(node) }.join
30
+ ensure
31
+ @depth -= 1
32
+ end
33
+ end
34
+
35
+ def render_node(node)
36
+ case node
37
+ when AST::Text
38
+ render_text(node)
39
+ when AST::Variable
40
+ render_variable(node)
41
+ when AST::IfBlock
42
+ render_if(node)
43
+ when AST::UnlessBlock
44
+ render_unless(node)
45
+ when AST::EachBlock
46
+ render_each(node)
47
+ when AST::UnsecureOutput
48
+ render_unsecure_output(node)
49
+ when AST::Include
50
+ render_include(node)
51
+ else
52
+ raise RenderError, "Unknown node type: #{node.class}"
53
+ end
54
+ end
55
+
56
+ def render_text(node)
57
+ node.content
58
+ end
59
+
60
+ def render_variable(node)
61
+ value = @context.resolve(node)
62
+ str = stringify_with_modifier(value, node.modifier)
63
+ HtmlEscape.escape(str)
64
+ end
65
+
66
+ def stringify_with_modifier(value, modifier)
67
+ case modifier
68
+ when :nullable
69
+ Value.stringify_nullable(value)
70
+ when :required
71
+ Value.stringify_required(value)
72
+ else
73
+ Value.stringify(value)
74
+ end
75
+ end
76
+
77
+ def render_if(node)
78
+ value = @context.resolve(node.condition)
79
+ if Value.truthy?(value)
80
+ render_nodes(node.then_nodes)
81
+ elsif node.else_nodes
82
+ render_nodes(node.else_nodes)
83
+ else
84
+ ''
85
+ end
86
+ end
87
+
88
+ def render_unless(node)
89
+ value = @context.resolve(node.condition)
90
+ if Value.truthy?(value)
91
+ ''
92
+ else
93
+ render_nodes(node.body_nodes)
94
+ end
95
+ end
96
+
97
+ def render_each(node)
98
+ collection = @context.resolve(node.collection)
99
+ Value.ensure_array!(collection)
100
+
101
+ buffer = +''
102
+ collection.each do |item|
103
+ bindings = { node.item_name => item }
104
+ buffer << @context.with_scope(bindings) { render_nodes(node.body_nodes) }
105
+ raise RenderError, "output exceeded #{MAX_OUTPUT_BYTES} bytes" if buffer.bytesize > MAX_OUTPUT_BYTES
106
+ end
107
+ buffer
108
+ end
109
+
110
+ def render_unsecure_output(node)
111
+ value = @context.resolve(node.path)
112
+ Value.stringify(value) # No escaping
113
+ end
114
+
115
+ def render_include(node)
116
+ raise IncludeError, 'Template loader not configured for include' unless @template_loader
117
+
118
+ partial_ast = @template_loader.load(node.name)
119
+
120
+ bindings = {}
121
+ node.args.each do |key, var|
122
+ bindings[key] = @context.resolve(var)
123
+ end
124
+
125
+ @template_loader.with_include(node.name) do
126
+ @context.with_scope(bindings, include_scope: true) do
127
+ render_nodes(partial_ast.nodes)
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsuzora
4
+ # Top-level entry that compiles a template source and renders against a
5
+ # {Payload}.
6
+ #
7
+ # `Template#render` accepts only a {Payload}, which is the explicit
8
+ # boundary between untrusted host data and the internal value space.
9
+ # Callers wishing to render from a raw Hash should either use the
10
+ # convenience facade {Natsuzora.render} or wrap the Hash in
11
+ # `Natsuzora::Payload.new(...)` themselves.
12
+ class Template
13
+ attr_reader :ast
14
+
15
+ def initialize(source, include_root: nil)
16
+ @source = source
17
+ @loader = include_root && TemplateLoader.new(include_root)
18
+ @ast = parse_ruby(source)
19
+ end
20
+
21
+ # @param payload [Natsuzora::Payload] prepared render input
22
+ # @return [String] rendered output
23
+ def render(payload)
24
+ Renderer.new(@ast, template_loader: @loader).render(payload.data)
25
+ end
26
+
27
+ private
28
+
29
+ def parse_ruby(source)
30
+ tokens = Lexer.new(source).tokenize
31
+ Parser.new(tokens).parse
32
+ end
33
+ end
34
+ end