better_html 0.0.12 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/lib/better_html.rb +0 -2
  3. data/lib/better_html/ast/iterator.rb +32 -0
  4. data/lib/better_html/ast/node.rb +14 -0
  5. data/lib/better_html/better_erb/runtime_checks.rb +3 -3
  6. data/lib/better_html/config.rb +12 -0
  7. data/lib/better_html/parser.rb +286 -0
  8. data/lib/better_html/test_helper/ruby_expr.rb +8 -5
  9. data/lib/better_html/test_helper/safe_erb_tester.rb +121 -108
  10. data/lib/better_html/test_helper/safe_lodash_tester.rb +44 -42
  11. data/lib/better_html/tokenizer/base_erb.rb +79 -0
  12. data/lib/better_html/tokenizer/html_erb.rb +31 -0
  13. data/lib/better_html/{node_iterator → tokenizer}/html_lodash.rb +30 -34
  14. data/lib/better_html/tokenizer/javascript_erb.rb +15 -0
  15. data/lib/better_html/{node_iterator → tokenizer}/location.rb +9 -3
  16. data/lib/better_html/tokenizer/token.rb +16 -0
  17. data/lib/better_html/tokenizer/token_array.rb +54 -0
  18. data/lib/better_html/tree/attribute.rb +31 -0
  19. data/lib/better_html/tree/attributes_list.rb +25 -0
  20. data/lib/better_html/tree/tag.rb +39 -0
  21. data/lib/better_html/version.rb +1 -1
  22. data/test/better_html/parser_test.rb +279 -0
  23. data/test/better_html/test_helper/safe_erb_tester_test.rb +11 -0
  24. data/test/better_html/test_helper/safe_lodash_tester_test.rb +11 -1
  25. data/test/better_html/tokenizer/html_erb_test.rb +158 -0
  26. data/test/better_html/tokenizer/html_lodash_test.rb +98 -0
  27. data/test/better_html/tokenizer/location_test.rb +57 -0
  28. data/test/better_html/tokenizer/token_array_test.rb +144 -0
  29. data/test/better_html/tokenizer/token_test.rb +15 -0
  30. metadata +45 -30
  31. data/lib/better_html/node_iterator.rb +0 -144
  32. data/lib/better_html/node_iterator/attribute.rb +0 -34
  33. data/lib/better_html/node_iterator/base.rb +0 -27
  34. data/lib/better_html/node_iterator/cdata.rb +0 -8
  35. data/lib/better_html/node_iterator/comment.rb +0 -8
  36. data/lib/better_html/node_iterator/content_node.rb +0 -13
  37. data/lib/better_html/node_iterator/element.rb +0 -26
  38. data/lib/better_html/node_iterator/html_erb.rb +0 -70
  39. data/lib/better_html/node_iterator/javascript_erb.rb +0 -55
  40. data/lib/better_html/node_iterator/text.rb +0 -8
  41. data/lib/better_html/node_iterator/token.rb +0 -8
  42. data/lib/better_html/tree.rb +0 -113
  43. data/test/better_html/node_iterator/html_erb_test.rb +0 -116
  44. data/test/better_html/node_iterator/html_lodash_test.rb +0 -132
  45. data/test/better_html/node_iterator/location_test.rb +0 -36
  46. data/test/better_html/node_iterator_test.rb +0 -221
  47. data/test/better_html/tree_test.rb +0 -110
@@ -1,4 +1,6 @@
1
1
  require 'better_html/test_helper/safety_error'
2
+ require 'better_html/ast/iterator'
3
+ require 'better_html/tree/tag'
2
4
 
3
5
  module BetterHtml
4
6
  module TestHelper
@@ -51,7 +53,7 @@ EOF
51
53
  @data = data
52
54
  @config = config
53
55
  @errors = Errors.new
54
- @nodes = BetterHtml::NodeIterator.new(data, template_language: :lodash)
56
+ @parser = BetterHtml::Parser.new(data, template_language: :lodash)
55
57
  validate!
56
58
  end
57
59
 
@@ -60,73 +62,73 @@ EOF
60
62
  end
61
63
 
62
64
  def validate!
63
- @nodes.each_with_index do |node, index|
64
- case node
65
- when BetterHtml::NodeIterator::Element
66
- validate_element(node)
67
-
68
- if node.name == 'script' && !node.closing?
69
- add_error(
70
- "No script tags allowed nested in lodash templates",
71
- location: node.name_parts.first.location
72
- )
73
- end
74
- when BetterHtml::NodeIterator::CData, BetterHtml::NodeIterator::Comment
75
- validate_no_statements(node)
65
+ @parser.nodes_with_type(:tag).each do |tag_node|
66
+ tag = Tree::Tag.from_node(tag_node)
67
+ validate_tag_attributes(tag)
68
+ validate_no_statements(tag_node)
69
+
70
+ if tag.name == 'script' && !tag.closing?
71
+ add_error(
72
+ "No script tags allowed nested in lodash templates",
73
+ location: tag_node.loc
74
+ )
76
75
  end
77
76
  end
77
+
78
+ @parser.nodes_with_type(:cdata, :comment).each do |node|
79
+ validate_no_statements(node)
80
+ end
78
81
  end
79
82
 
80
- def validate_element(element)
81
- element.attributes.each do |attribute|
82
- attribute.name_parts.each do |token|
83
- add_no_statement_error(attribute, token) if token.type == :stmt
83
+ def lodash_nodes(node)
84
+ Enumerator.new do |yielder|
85
+ next if node.nil?
86
+ node.descendants(:lodash).each do |lodash_node|
87
+ indicator_node, code_node = *lodash_node
88
+ yielder.yield(lodash_node, indicator_node, code_node)
84
89
  end
90
+ end
91
+ end
92
+
93
+ def validate_tag_attributes(tag)
94
+ tag.attributes.each do |attribute|
95
+ lodash_nodes(attribute.value_node).each do |lodash_node, indicator_node, code_node|
96
+ next if indicator_node.nil?
85
97
 
86
- attribute.value_parts.each do |token|
87
- case token.type
88
- when :stmt
89
- add_no_statement_error(attribute, token)
90
- when :expr_literal
91
- validate_tag_expression(element, attribute.name, token)
92
- when :expr_escaped
98
+ if indicator_node.loc.source == '='
99
+ validate_tag_expression(attribute, lodash_node)
100
+ elsif indicator_node.loc.source == '!'
93
101
  add_error(
94
102
  "lodash interpolation with '[%!' inside html attribute is never safe",
95
- location: token.location
103
+ location: lodash_node.loc
96
104
  )
97
105
  end
98
106
  end
99
107
  end
100
108
  end
101
109
 
102
- def validate_tag_expression(node, attr_name, value_token)
103
- if javascript_attribute_name?(attr_name) && !lodash_safe_javascript_expression?(value_token.code.strip)
110
+ def validate_tag_expression(attribute, lodash_node)
111
+ _, code_node = *lodash_node
112
+ source = code_node.loc.source.strip
113
+ if @config.javascript_attribute_name?(attribute.name) && !@config.lodash_safe_javascript_expression?(source)
104
114
  add_error(
105
115
  "lodash interpolation in javascript attribute "\
106
- "`#{attr_name}` must call `JSON.stringify(#{value_token.code.strip})`",
107
- location: value_token.location
116
+ "`#{attribute.name}` must call `JSON.stringify(#{source})`",
117
+ location: lodash_node.loc
108
118
  )
109
119
  end
110
120
  end
111
121
 
112
- def javascript_attribute_name?(name)
113
- @config.javascript_attribute_names.any?{ |other| other === name }
114
- end
115
-
116
- def lodash_safe_javascript_expression?(code)
117
- @config.lodash_safe_javascript_expression.any?{ |other| other === code }
118
- end
119
-
120
122
  def validate_no_statements(node)
121
- node.content_parts.each do |token|
122
- add_no_statement_error(node, token) if token.type == :stmt
123
+ lodash_nodes(node).each do |lodash_node, indicator_node, code_node|
124
+ add_no_statement_error(lodash_node.loc) if indicator_node.nil?
123
125
  end
124
126
  end
125
127
 
126
- def add_no_statement_error(node, token)
128
+ def add_no_statement_error(loc)
127
129
  add_error(
128
130
  "javascript statement not allowed here; did you mean '[%=' ?",
129
- location: token.location
131
+ location: loc
130
132
  )
131
133
  end
132
134
  end
@@ -0,0 +1,79 @@
1
+ require 'erubi'
2
+ require_relative 'token'
3
+ require_relative 'location'
4
+
5
+ module BetterHtml
6
+ module Tokenizer
7
+ class BaseErb < ::Erubi::Engine
8
+ REGEXP_WITHOUT_TRIM = /<%(={1,2}|%)?(.*?)()?%>([ \t]*\r?\n)?/m
9
+ STMT_TRIM_MATCHER = /\A(-|#)?(.*?)(-)?\z/m
10
+ EXPR_TRIM_MATCHER = /\A(.*?)(-)?\z/m
11
+
12
+ attr_reader :tokens
13
+ attr_reader :current_position
14
+
15
+ def initialize(document)
16
+ @document = document
17
+ @tokens = []
18
+ @current_position = 0
19
+ super(document, regexp: REGEXP_WITHOUT_TRIM, trim: false)
20
+ end
21
+
22
+ private
23
+
24
+ def append(text)
25
+ @current_position += text.length
26
+ end
27
+
28
+ def add_code(code)
29
+ _, ltrim_or_comment, code, rtrim = *STMT_TRIM_MATCHER.match(code)
30
+ ltrim = ltrim_or_comment if ltrim_or_comment == '-'
31
+ indicator = ltrim_or_comment if ltrim_or_comment == '#'
32
+ add_erb_tokens(ltrim, indicator, code, rtrim)
33
+ append("<%#{ltrim}#{indicator}#{code}#{rtrim}%>")
34
+ end
35
+
36
+ def add_expression(indicator, code)
37
+ _, code, rtrim = *EXPR_TRIM_MATCHER.match(code)
38
+ add_erb_tokens(nil, indicator, code, rtrim)
39
+ append("<%#{indicator}#{code}#{rtrim}%>")
40
+ end
41
+
42
+ def add_erb_tokens(ltrim, indicator, code, rtrim)
43
+ pos = current_position
44
+
45
+ token = add_token(:erb_begin, pos, pos + 2)
46
+ pos += 2
47
+
48
+ if ltrim
49
+ token = add_token(:trim, pos, pos + ltrim.length)
50
+ pos += ltrim.length
51
+ end
52
+
53
+ if indicator
54
+ token = add_token(:indicator, pos, pos + indicator.length)
55
+ pos += indicator.length
56
+ end
57
+
58
+ token = add_token(:code, pos, pos + code.length)
59
+ pos += code.length
60
+
61
+ if rtrim
62
+ token = add_token(:trim, pos, pos + rtrim.length)
63
+ pos += rtrim.length
64
+ end
65
+
66
+ token = add_token(:erb_end, pos, pos + 2)
67
+ end
68
+
69
+ def add_token(type, start, stop, line = nil, column = nil)
70
+ token = Token.new(
71
+ type: type,
72
+ loc: Location.new(@document, start, stop - 1, line, column)
73
+ )
74
+ @tokens << token
75
+ token
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,31 @@
1
+ require 'html_tokenizer'
2
+ require_relative 'base_erb'
3
+
4
+ module BetterHtml
5
+ module Tokenizer
6
+ class HtmlErb < BaseErb
7
+ attr_reader :parser
8
+
9
+ def initialize(document)
10
+ @parser = HtmlTokenizer::Parser.new
11
+ super(document)
12
+ end
13
+
14
+ def current_position
15
+ @parser.document_length
16
+ end
17
+
18
+ private
19
+
20
+ def append(text)
21
+ @parser.append_placeholder(text)
22
+ end
23
+
24
+ def add_text(text)
25
+ @parser.parse(text) do |type, start, stop, line, column|
26
+ add_token(type, start, stop, line, column)
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -1,8 +1,9 @@
1
+ require 'active_support'
1
2
  require_relative 'token'
2
3
  require_relative 'location'
3
4
 
4
5
  module BetterHtml
5
- class NodeIterator
6
+ module Tokenizer
6
7
  class HtmlLodash
7
8
  attr_reader :tokens
8
9
  attr_reader :parser
@@ -12,9 +13,9 @@ module BetterHtml
12
13
  self.lodash_evaluate = %r{(?:\[\%)(.+?)(?:\%\])}m
13
14
  self.lodash_interpolate = %r{(?:\[\%)!(.+?)(?:\%\])}m
14
15
 
15
- def initialize(source)
16
- @source = source
17
- @scanner = StringScanner.new(source)
16
+ def initialize(document)
17
+ @document = document
18
+ @scanner = StringScanner.new(document)
18
19
  @parser = HtmlTokenizer::Parser.new
19
20
  @tokens = []
20
21
  scan!
@@ -28,20 +29,21 @@ module BetterHtml
28
29
  if scanned.present?
29
30
  captures = scan_pattern.match(scanned).captures
30
31
  if pre_match = captures[0]
31
- add_text(pre_match) unless pre_match.blank?
32
+ add_text(pre_match) if pre_match.present?
32
33
  end
33
34
  match = captures[1]
34
35
  if code = lodash_escape.match(match)
35
- add_expr_escape(match, code.captures[0])
36
+ add_lodash_tokens("=", code.captures[0])
36
37
  elsif code = lodash_interpolate.match(match)
37
- add_expr_interpolate(match, code.captures[0])
38
+ add_lodash_tokens("!", code.captures[0])
38
39
  elsif code = lodash_evaluate.match(match)
39
- add_stmt(match, code.captures[0])
40
+ add_lodash_tokens(nil, code.captures[0])
40
41
  else
41
42
  raise RuntimeError, 'unexpected match'
42
43
  end
44
+ @parser.append_placeholder(match)
43
45
  else
44
- text = @source[(@scanner.pos)..(@source.size)]
46
+ text = @document[(@scanner.pos)..(@document.size)]
45
47
  add_text(text) unless text.blank?
46
48
  break
47
49
  end
@@ -61,40 +63,34 @@ module BetterHtml
61
63
 
62
64
  def add_text(text)
63
65
  @parser.parse(text) do |type, start, stop, line, column|
64
- add_token(type, @parser.extract(start, stop), start: start, stop: stop, line: line, column: column)
66
+ add_token(type, start: start, stop: stop, line: line, column: column)
65
67
  end
66
68
  end
67
69
 
68
- def add_stmt(text, code)
69
- add_token(:stmt, text, code: code)
70
- @parser.append_placeholder(text)
71
- end
70
+ def add_lodash_tokens(indicator, code)
71
+ pos = @parser.document_length
72
72
 
73
- def add_expr_interpolate(text, code)
74
- add_token(:expr_escaped, text, code: code)
75
- @parser.append_placeholder(text)
76
- end
73
+ add_token(:lodash_begin, start: pos, stop: pos + 2)
74
+ pos += 2
75
+
76
+ if indicator
77
+ add_token(:indicator, start: pos, stop: pos + indicator.length)
78
+ pos += indicator.length
79
+ end
80
+
81
+ add_token(:code, start: pos, stop: pos + code.length)
82
+ pos += code.length
77
83
 
78
- def add_expr_escape(text, code)
79
- add_token(:expr_literal, text, code: code)
80
- @parser.append_placeholder(text)
84
+ add_token(:lodash_end, start: pos, stop: pos + 2)
81
85
  end
82
86
 
83
- def add_token(type, text, code: nil, start: nil, stop: nil, line: nil, column: nil)
84
- start ||= @parser.document_length
85
- stop ||= start + text.size
86
- extra_attributes = if type == :tag_end
87
- {
88
- self_closing: @parser.self_closing_tag?
89
- }
90
- end
91
- @tokens << Token.new(
87
+ def add_token(type, start: nil, stop: nil, line: nil, column: nil)
88
+ token = Token.new(
92
89
  type: type,
93
- text: text,
94
- code: code,
95
- location: Location.new(@source, start, stop, line || @parser.line_number, column || @parser.column_number),
96
- **(extra_attributes || {})
90
+ loc: Location.new(@document, start, stop-1, line, column)
97
91
  )
92
+ @tokens << token
93
+ token
98
94
  end
99
95
  end
100
96
  end
@@ -0,0 +1,15 @@
1
+ require_relative 'base_erb'
2
+
3
+ module BetterHtml
4
+ module Tokenizer
5
+ class JavascriptErb < BaseErb
6
+ private
7
+
8
+ def add_text(text)
9
+ pos = current_position
10
+ add_token(:text, pos, pos + text.size) if text.present?
11
+ append(text)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -1,9 +1,13 @@
1
1
  module BetterHtml
2
- class NodeIterator
2
+ module Tokenizer
3
3
  class Location
4
4
  attr_accessor :start, :stop
5
5
 
6
6
  def initialize(document, start, stop, line = nil, column = nil)
7
+ raise ArgumentError, "start location #{start} is out of range for document of size #{document.size}" if start > document.size
8
+ raise ArgumentError, "stop location #{stop} is out of range for document of size #{document.size}" if stop > document.size
9
+ raise ArgumentError, "end of range must be greater than start of range (#{stop} < #{start})" if stop < start
10
+
7
11
  @document = document
8
12
  @start = start
9
13
  @stop = stop
@@ -12,7 +16,7 @@ module BetterHtml
12
16
  end
13
17
 
14
18
  def range
15
- Range.new(start, stop-1)
19
+ Range.new(start, stop)
16
20
  end
17
21
 
18
22
  def source
@@ -31,17 +35,19 @@ module BetterHtml
31
35
  line_content = extract_line(line: line)
32
36
  spaces = line_content.scan(/\A\s*/).first
33
37
  column_without_spaces = [column - spaces.length, 0].max
34
- underscore_length = [[stop - start, line_content.length - column_without_spaces].min, 1].max
38
+ underscore_length = [[stop - start + 1, line_content.length - column_without_spaces].min, 1].max
35
39
  "#{line_content.gsub(/\A\s*/, '')}\n#{' ' * column_without_spaces}#{'^' * underscore_length}"
36
40
  end
37
41
 
38
42
  private
39
43
 
40
44
  def calculate_line
45
+ return 1 if start == 0
41
46
  @document[0..start-1].scan("\n").count + 1
42
47
  end
43
48
 
44
49
  def calculate_column
50
+ return 0 if start == 0
45
51
  @document[0..start-1]&.split("\n", -1)&.last&.length || 0
46
52
  end
47
53
 
@@ -0,0 +1,16 @@
1
+ module BetterHtml
2
+ module Tokenizer
3
+ class Token
4
+ attr_reader :type, :loc
5
+
6
+ def initialize(type:, loc:)
7
+ @type = type
8
+ @loc = loc
9
+ end
10
+
11
+ def inspect
12
+ "t(#{type.inspect}, #{loc&.source.inspect})"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,54 @@
1
+ module BetterHtml
2
+ module Tokenizer
3
+ class TokenArray
4
+ def initialize(list)
5
+ @list = list
6
+ @current = 0
7
+ @last = @list.size
8
+ end
9
+
10
+ def shift
11
+ raise RuntimeError, 'no tokens left to shift' if empty?
12
+ item = @list[@current]
13
+ @current += 1
14
+ item
15
+ end
16
+
17
+ def pop
18
+ raise RuntimeError, 'no tokens left to pop' if empty?
19
+ item = @list[@last - 1]
20
+ @last -= 1
21
+ item
22
+ end
23
+
24
+ def trim(type)
25
+ while current&.type == type
26
+ shift
27
+ end
28
+ while last&.type == type
29
+ pop
30
+ end
31
+ end
32
+
33
+ def empty?
34
+ size <= 0
35
+ end
36
+
37
+ def any?
38
+ !empty?
39
+ end
40
+
41
+ def current
42
+ @list[@current] unless empty?
43
+ end
44
+
45
+ def last
46
+ @list[@last - 1] unless empty?
47
+ end
48
+
49
+ def size
50
+ @last - @current
51
+ end
52
+ end
53
+ end
54
+ end