better_html 0.0.12 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/lib/better_html.rb +0 -2
  3. data/lib/better_html/ast/iterator.rb +32 -0
  4. data/lib/better_html/ast/node.rb +14 -0
  5. data/lib/better_html/better_erb/runtime_checks.rb +3 -3
  6. data/lib/better_html/config.rb +12 -0
  7. data/lib/better_html/parser.rb +286 -0
  8. data/lib/better_html/test_helper/ruby_expr.rb +8 -5
  9. data/lib/better_html/test_helper/safe_erb_tester.rb +121 -108
  10. data/lib/better_html/test_helper/safe_lodash_tester.rb +44 -42
  11. data/lib/better_html/tokenizer/base_erb.rb +79 -0
  12. data/lib/better_html/tokenizer/html_erb.rb +31 -0
  13. data/lib/better_html/{node_iterator → tokenizer}/html_lodash.rb +30 -34
  14. data/lib/better_html/tokenizer/javascript_erb.rb +15 -0
  15. data/lib/better_html/{node_iterator → tokenizer}/location.rb +9 -3
  16. data/lib/better_html/tokenizer/token.rb +16 -0
  17. data/lib/better_html/tokenizer/token_array.rb +54 -0
  18. data/lib/better_html/tree/attribute.rb +31 -0
  19. data/lib/better_html/tree/attributes_list.rb +25 -0
  20. data/lib/better_html/tree/tag.rb +39 -0
  21. data/lib/better_html/version.rb +1 -1
  22. data/test/better_html/parser_test.rb +279 -0
  23. data/test/better_html/test_helper/safe_erb_tester_test.rb +11 -0
  24. data/test/better_html/test_helper/safe_lodash_tester_test.rb +11 -1
  25. data/test/better_html/tokenizer/html_erb_test.rb +158 -0
  26. data/test/better_html/tokenizer/html_lodash_test.rb +98 -0
  27. data/test/better_html/tokenizer/location_test.rb +57 -0
  28. data/test/better_html/tokenizer/token_array_test.rb +144 -0
  29. data/test/better_html/tokenizer/token_test.rb +15 -0
  30. metadata +45 -30
  31. data/lib/better_html/node_iterator.rb +0 -144
  32. data/lib/better_html/node_iterator/attribute.rb +0 -34
  33. data/lib/better_html/node_iterator/base.rb +0 -27
  34. data/lib/better_html/node_iterator/cdata.rb +0 -8
  35. data/lib/better_html/node_iterator/comment.rb +0 -8
  36. data/lib/better_html/node_iterator/content_node.rb +0 -13
  37. data/lib/better_html/node_iterator/element.rb +0 -26
  38. data/lib/better_html/node_iterator/html_erb.rb +0 -70
  39. data/lib/better_html/node_iterator/javascript_erb.rb +0 -55
  40. data/lib/better_html/node_iterator/text.rb +0 -8
  41. data/lib/better_html/node_iterator/token.rb +0 -8
  42. data/lib/better_html/tree.rb +0 -113
  43. data/test/better_html/node_iterator/html_erb_test.rb +0 -116
  44. data/test/better_html/node_iterator/html_lodash_test.rb +0 -132
  45. data/test/better_html/node_iterator/location_test.rb +0 -36
  46. data/test/better_html/node_iterator_test.rb +0 -221
  47. data/test/better_html/tree_test.rb +0 -110
@@ -1,4 +1,6 @@
1
1
  require 'better_html/test_helper/safety_error'
2
+ require 'better_html/ast/iterator'
3
+ require 'better_html/tree/tag'
2
4
 
3
5
  module BetterHtml
4
6
  module TestHelper
@@ -51,7 +53,7 @@ EOF
51
53
  @data = data
52
54
  @config = config
53
55
  @errors = Errors.new
54
- @nodes = BetterHtml::NodeIterator.new(data, template_language: :lodash)
56
+ @parser = BetterHtml::Parser.new(data, template_language: :lodash)
55
57
  validate!
56
58
  end
57
59
 
@@ -60,73 +62,73 @@ EOF
60
62
  end
61
63
 
62
64
  def validate!
63
- @nodes.each_with_index do |node, index|
64
- case node
65
- when BetterHtml::NodeIterator::Element
66
- validate_element(node)
67
-
68
- if node.name == 'script' && !node.closing?
69
- add_error(
70
- "No script tags allowed nested in lodash templates",
71
- location: node.name_parts.first.location
72
- )
73
- end
74
- when BetterHtml::NodeIterator::CData, BetterHtml::NodeIterator::Comment
75
- validate_no_statements(node)
65
+ @parser.nodes_with_type(:tag).each do |tag_node|
66
+ tag = Tree::Tag.from_node(tag_node)
67
+ validate_tag_attributes(tag)
68
+ validate_no_statements(tag_node)
69
+
70
+ if tag.name == 'script' && !tag.closing?
71
+ add_error(
72
+ "No script tags allowed nested in lodash templates",
73
+ location: tag_node.loc
74
+ )
76
75
  end
77
76
  end
77
+
78
+ @parser.nodes_with_type(:cdata, :comment).each do |node|
79
+ validate_no_statements(node)
80
+ end
78
81
  end
79
82
 
80
- def validate_element(element)
81
- element.attributes.each do |attribute|
82
- attribute.name_parts.each do |token|
83
- add_no_statement_error(attribute, token) if token.type == :stmt
83
+ def lodash_nodes(node)
84
+ Enumerator.new do |yielder|
85
+ next if node.nil?
86
+ node.descendants(:lodash).each do |lodash_node|
87
+ indicator_node, code_node = *lodash_node
88
+ yielder.yield(lodash_node, indicator_node, code_node)
84
89
  end
90
+ end
91
+ end
92
+
93
+ def validate_tag_attributes(tag)
94
+ tag.attributes.each do |attribute|
95
+ lodash_nodes(attribute.value_node).each do |lodash_node, indicator_node, code_node|
96
+ next if indicator_node.nil?
85
97
 
86
- attribute.value_parts.each do |token|
87
- case token.type
88
- when :stmt
89
- add_no_statement_error(attribute, token)
90
- when :expr_literal
91
- validate_tag_expression(element, attribute.name, token)
92
- when :expr_escaped
98
+ if indicator_node.loc.source == '='
99
+ validate_tag_expression(attribute, lodash_node)
100
+ elsif indicator_node.loc.source == '!'
93
101
  add_error(
94
102
  "lodash interpolation with '[%!' inside html attribute is never safe",
95
- location: token.location
103
+ location: lodash_node.loc
96
104
  )
97
105
  end
98
106
  end
99
107
  end
100
108
  end
101
109
 
102
- def validate_tag_expression(node, attr_name, value_token)
103
- if javascript_attribute_name?(attr_name) && !lodash_safe_javascript_expression?(value_token.code.strip)
110
+ def validate_tag_expression(attribute, lodash_node)
111
+ _, code_node = *lodash_node
112
+ source = code_node.loc.source.strip
113
+ if @config.javascript_attribute_name?(attribute.name) && !@config.lodash_safe_javascript_expression?(source)
104
114
  add_error(
105
115
  "lodash interpolation in javascript attribute "\
106
- "`#{attr_name}` must call `JSON.stringify(#{value_token.code.strip})`",
107
- location: value_token.location
116
+ "`#{attribute.name}` must call `JSON.stringify(#{source})`",
117
+ location: lodash_node.loc
108
118
  )
109
119
  end
110
120
  end
111
121
 
112
- def javascript_attribute_name?(name)
113
- @config.javascript_attribute_names.any?{ |other| other === name }
114
- end
115
-
116
- def lodash_safe_javascript_expression?(code)
117
- @config.lodash_safe_javascript_expression.any?{ |other| other === code }
118
- end
119
-
120
122
  def validate_no_statements(node)
121
- node.content_parts.each do |token|
122
- add_no_statement_error(node, token) if token.type == :stmt
123
+ lodash_nodes(node).each do |lodash_node, indicator_node, code_node|
124
+ add_no_statement_error(lodash_node.loc) if indicator_node.nil?
123
125
  end
124
126
  end
125
127
 
126
- def add_no_statement_error(node, token)
128
+ def add_no_statement_error(loc)
127
129
  add_error(
128
130
  "javascript statement not allowed here; did you mean '[%=' ?",
129
- location: token.location
131
+ location: loc
130
132
  )
131
133
  end
132
134
  end
@@ -0,0 +1,79 @@
1
+ require 'erubi'
2
+ require_relative 'token'
3
+ require_relative 'location'
4
+
5
+ module BetterHtml
6
+ module Tokenizer
7
+ class BaseErb < ::Erubi::Engine
8
+ REGEXP_WITHOUT_TRIM = /<%(={1,2}|%)?(.*?)()?%>([ \t]*\r?\n)?/m
9
+ STMT_TRIM_MATCHER = /\A(-|#)?(.*?)(-)?\z/m
10
+ EXPR_TRIM_MATCHER = /\A(.*?)(-)?\z/m
11
+
12
+ attr_reader :tokens
13
+ attr_reader :current_position
14
+
15
+ def initialize(document)
16
+ @document = document
17
+ @tokens = []
18
+ @current_position = 0
19
+ super(document, regexp: REGEXP_WITHOUT_TRIM, trim: false)
20
+ end
21
+
22
+ private
23
+
24
+ def append(text)
25
+ @current_position += text.length
26
+ end
27
+
28
+ def add_code(code)
29
+ _, ltrim_or_comment, code, rtrim = *STMT_TRIM_MATCHER.match(code)
30
+ ltrim = ltrim_or_comment if ltrim_or_comment == '-'
31
+ indicator = ltrim_or_comment if ltrim_or_comment == '#'
32
+ add_erb_tokens(ltrim, indicator, code, rtrim)
33
+ append("<%#{ltrim}#{indicator}#{code}#{rtrim}%>")
34
+ end
35
+
36
+ def add_expression(indicator, code)
37
+ _, code, rtrim = *EXPR_TRIM_MATCHER.match(code)
38
+ add_erb_tokens(nil, indicator, code, rtrim)
39
+ append("<%#{indicator}#{code}#{rtrim}%>")
40
+ end
41
+
42
+ def add_erb_tokens(ltrim, indicator, code, rtrim)
43
+ pos = current_position
44
+
45
+ token = add_token(:erb_begin, pos, pos + 2)
46
+ pos += 2
47
+
48
+ if ltrim
49
+ token = add_token(:trim, pos, pos + ltrim.length)
50
+ pos += ltrim.length
51
+ end
52
+
53
+ if indicator
54
+ token = add_token(:indicator, pos, pos + indicator.length)
55
+ pos += indicator.length
56
+ end
57
+
58
+ token = add_token(:code, pos, pos + code.length)
59
+ pos += code.length
60
+
61
+ if rtrim
62
+ token = add_token(:trim, pos, pos + rtrim.length)
63
+ pos += rtrim.length
64
+ end
65
+
66
+ token = add_token(:erb_end, pos, pos + 2)
67
+ end
68
+
69
+ def add_token(type, start, stop, line = nil, column = nil)
70
+ token = Token.new(
71
+ type: type,
72
+ loc: Location.new(@document, start, stop - 1, line, column)
73
+ )
74
+ @tokens << token
75
+ token
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,31 @@
1
+ require 'html_tokenizer'
2
+ require_relative 'base_erb'
3
+
4
+ module BetterHtml
5
+ module Tokenizer
6
+ class HtmlErb < BaseErb
7
+ attr_reader :parser
8
+
9
+ def initialize(document)
10
+ @parser = HtmlTokenizer::Parser.new
11
+ super(document)
12
+ end
13
+
14
+ def current_position
15
+ @parser.document_length
16
+ end
17
+
18
+ private
19
+
20
+ def append(text)
21
+ @parser.append_placeholder(text)
22
+ end
23
+
24
+ def add_text(text)
25
+ @parser.parse(text) do |type, start, stop, line, column|
26
+ add_token(type, start, stop, line, column)
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -1,8 +1,9 @@
1
+ require 'active_support'
1
2
  require_relative 'token'
2
3
  require_relative 'location'
3
4
 
4
5
  module BetterHtml
5
- class NodeIterator
6
+ module Tokenizer
6
7
  class HtmlLodash
7
8
  attr_reader :tokens
8
9
  attr_reader :parser
@@ -12,9 +13,9 @@ module BetterHtml
12
13
  self.lodash_evaluate = %r{(?:\[\%)(.+?)(?:\%\])}m
13
14
  self.lodash_interpolate = %r{(?:\[\%)!(.+?)(?:\%\])}m
14
15
 
15
- def initialize(source)
16
- @source = source
17
- @scanner = StringScanner.new(source)
16
+ def initialize(document)
17
+ @document = document
18
+ @scanner = StringScanner.new(document)
18
19
  @parser = HtmlTokenizer::Parser.new
19
20
  @tokens = []
20
21
  scan!
@@ -28,20 +29,21 @@ module BetterHtml
28
29
  if scanned.present?
29
30
  captures = scan_pattern.match(scanned).captures
30
31
  if pre_match = captures[0]
31
- add_text(pre_match) unless pre_match.blank?
32
+ add_text(pre_match) if pre_match.present?
32
33
  end
33
34
  match = captures[1]
34
35
  if code = lodash_escape.match(match)
35
- add_expr_escape(match, code.captures[0])
36
+ add_lodash_tokens("=", code.captures[0])
36
37
  elsif code = lodash_interpolate.match(match)
37
- add_expr_interpolate(match, code.captures[0])
38
+ add_lodash_tokens("!", code.captures[0])
38
39
  elsif code = lodash_evaluate.match(match)
39
- add_stmt(match, code.captures[0])
40
+ add_lodash_tokens(nil, code.captures[0])
40
41
  else
41
42
  raise RuntimeError, 'unexpected match'
42
43
  end
44
+ @parser.append_placeholder(match)
43
45
  else
44
- text = @source[(@scanner.pos)..(@source.size)]
46
+ text = @document[(@scanner.pos)..(@document.size)]
45
47
  add_text(text) unless text.blank?
46
48
  break
47
49
  end
@@ -61,40 +63,34 @@ module BetterHtml
61
63
 
62
64
  def add_text(text)
63
65
  @parser.parse(text) do |type, start, stop, line, column|
64
- add_token(type, @parser.extract(start, stop), start: start, stop: stop, line: line, column: column)
66
+ add_token(type, start: start, stop: stop, line: line, column: column)
65
67
  end
66
68
  end
67
69
 
68
- def add_stmt(text, code)
69
- add_token(:stmt, text, code: code)
70
- @parser.append_placeholder(text)
71
- end
70
+ def add_lodash_tokens(indicator, code)
71
+ pos = @parser.document_length
72
72
 
73
- def add_expr_interpolate(text, code)
74
- add_token(:expr_escaped, text, code: code)
75
- @parser.append_placeholder(text)
76
- end
73
+ add_token(:lodash_begin, start: pos, stop: pos + 2)
74
+ pos += 2
75
+
76
+ if indicator
77
+ add_token(:indicator, start: pos, stop: pos + indicator.length)
78
+ pos += indicator.length
79
+ end
80
+
81
+ add_token(:code, start: pos, stop: pos + code.length)
82
+ pos += code.length
77
83
 
78
- def add_expr_escape(text, code)
79
- add_token(:expr_literal, text, code: code)
80
- @parser.append_placeholder(text)
84
+ add_token(:lodash_end, start: pos, stop: pos + 2)
81
85
  end
82
86
 
83
- def add_token(type, text, code: nil, start: nil, stop: nil, line: nil, column: nil)
84
- start ||= @parser.document_length
85
- stop ||= start + text.size
86
- extra_attributes = if type == :tag_end
87
- {
88
- self_closing: @parser.self_closing_tag?
89
- }
90
- end
91
- @tokens << Token.new(
87
+ def add_token(type, start: nil, stop: nil, line: nil, column: nil)
88
+ token = Token.new(
92
89
  type: type,
93
- text: text,
94
- code: code,
95
- location: Location.new(@source, start, stop, line || @parser.line_number, column || @parser.column_number),
96
- **(extra_attributes || {})
90
+ loc: Location.new(@document, start, stop-1, line, column)
97
91
  )
92
+ @tokens << token
93
+ token
98
94
  end
99
95
  end
100
96
  end
@@ -0,0 +1,15 @@
1
+ require_relative 'base_erb'
2
+
3
+ module BetterHtml
4
+ module Tokenizer
5
+ class JavascriptErb < BaseErb
6
+ private
7
+
8
+ def add_text(text)
9
+ pos = current_position
10
+ add_token(:text, pos, pos + text.size) if text.present?
11
+ append(text)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -1,9 +1,13 @@
1
1
  module BetterHtml
2
- class NodeIterator
2
+ module Tokenizer
3
3
  class Location
4
4
  attr_accessor :start, :stop
5
5
 
6
6
  def initialize(document, start, stop, line = nil, column = nil)
7
+ raise ArgumentError, "start location #{start} is out of range for document of size #{document.size}" if start > document.size
8
+ raise ArgumentError, "stop location #{stop} is out of range for document of size #{document.size}" if stop > document.size
9
+ raise ArgumentError, "end of range must be greater than start of range (#{stop} < #{start})" if stop < start
10
+
7
11
  @document = document
8
12
  @start = start
9
13
  @stop = stop
@@ -12,7 +16,7 @@ module BetterHtml
12
16
  end
13
17
 
14
18
  def range
15
- Range.new(start, stop-1)
19
+ Range.new(start, stop)
16
20
  end
17
21
 
18
22
  def source
@@ -31,17 +35,19 @@ module BetterHtml
31
35
  line_content = extract_line(line: line)
32
36
  spaces = line_content.scan(/\A\s*/).first
33
37
  column_without_spaces = [column - spaces.length, 0].max
34
- underscore_length = [[stop - start, line_content.length - column_without_spaces].min, 1].max
38
+ underscore_length = [[stop - start + 1, line_content.length - column_without_spaces].min, 1].max
35
39
  "#{line_content.gsub(/\A\s*/, '')}\n#{' ' * column_without_spaces}#{'^' * underscore_length}"
36
40
  end
37
41
 
38
42
  private
39
43
 
40
44
  def calculate_line
45
+ return 1 if start == 0
41
46
  @document[0..start-1].scan("\n").count + 1
42
47
  end
43
48
 
44
49
  def calculate_column
50
+ return 0 if start == 0
45
51
  @document[0..start-1]&.split("\n", -1)&.last&.length || 0
46
52
  end
47
53
 
@@ -0,0 +1,16 @@
1
+ module BetterHtml
2
+ module Tokenizer
3
+ class Token
4
+ attr_reader :type, :loc
5
+
6
+ def initialize(type:, loc:)
7
+ @type = type
8
+ @loc = loc
9
+ end
10
+
11
+ def inspect
12
+ "t(#{type.inspect}, #{loc&.source.inspect})"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,54 @@
1
+ module BetterHtml
2
+ module Tokenizer
3
+ class TokenArray
4
+ def initialize(list)
5
+ @list = list
6
+ @current = 0
7
+ @last = @list.size
8
+ end
9
+
10
+ def shift
11
+ raise RuntimeError, 'no tokens left to shift' if empty?
12
+ item = @list[@current]
13
+ @current += 1
14
+ item
15
+ end
16
+
17
+ def pop
18
+ raise RuntimeError, 'no tokens left to pop' if empty?
19
+ item = @list[@last - 1]
20
+ @last -= 1
21
+ item
22
+ end
23
+
24
+ def trim(type)
25
+ while current&.type == type
26
+ shift
27
+ end
28
+ while last&.type == type
29
+ pop
30
+ end
31
+ end
32
+
33
+ def empty?
34
+ size <= 0
35
+ end
36
+
37
+ def any?
38
+ !empty?
39
+ end
40
+
41
+ def current
42
+ @list[@current] unless empty?
43
+ end
44
+
45
+ def last
46
+ @list[@last - 1] unless empty?
47
+ end
48
+
49
+ def size
50
+ @last - @current
51
+ end
52
+ end
53
+ end
54
+ end