better_html 0.0.12 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/lib/better_html.rb +0 -2
  3. data/lib/better_html/ast/iterator.rb +32 -0
  4. data/lib/better_html/ast/node.rb +14 -0
  5. data/lib/better_html/better_erb/runtime_checks.rb +3 -3
  6. data/lib/better_html/config.rb +12 -0
  7. data/lib/better_html/parser.rb +286 -0
  8. data/lib/better_html/test_helper/ruby_expr.rb +8 -5
  9. data/lib/better_html/test_helper/safe_erb_tester.rb +121 -108
  10. data/lib/better_html/test_helper/safe_lodash_tester.rb +44 -42
  11. data/lib/better_html/tokenizer/base_erb.rb +79 -0
  12. data/lib/better_html/tokenizer/html_erb.rb +31 -0
  13. data/lib/better_html/{node_iterator → tokenizer}/html_lodash.rb +30 -34
  14. data/lib/better_html/tokenizer/javascript_erb.rb +15 -0
  15. data/lib/better_html/{node_iterator → tokenizer}/location.rb +9 -3
  16. data/lib/better_html/tokenizer/token.rb +16 -0
  17. data/lib/better_html/tokenizer/token_array.rb +54 -0
  18. data/lib/better_html/tree/attribute.rb +31 -0
  19. data/lib/better_html/tree/attributes_list.rb +25 -0
  20. data/lib/better_html/tree/tag.rb +39 -0
  21. data/lib/better_html/version.rb +1 -1
  22. data/test/better_html/parser_test.rb +279 -0
  23. data/test/better_html/test_helper/safe_erb_tester_test.rb +11 -0
  24. data/test/better_html/test_helper/safe_lodash_tester_test.rb +11 -1
  25. data/test/better_html/tokenizer/html_erb_test.rb +158 -0
  26. data/test/better_html/tokenizer/html_lodash_test.rb +98 -0
  27. data/test/better_html/tokenizer/location_test.rb +57 -0
  28. data/test/better_html/tokenizer/token_array_test.rb +144 -0
  29. data/test/better_html/tokenizer/token_test.rb +15 -0
  30. metadata +45 -30
  31. data/lib/better_html/node_iterator.rb +0 -144
  32. data/lib/better_html/node_iterator/attribute.rb +0 -34
  33. data/lib/better_html/node_iterator/base.rb +0 -27
  34. data/lib/better_html/node_iterator/cdata.rb +0 -8
  35. data/lib/better_html/node_iterator/comment.rb +0 -8
  36. data/lib/better_html/node_iterator/content_node.rb +0 -13
  37. data/lib/better_html/node_iterator/element.rb +0 -26
  38. data/lib/better_html/node_iterator/html_erb.rb +0 -70
  39. data/lib/better_html/node_iterator/javascript_erb.rb +0 -55
  40. data/lib/better_html/node_iterator/text.rb +0 -8
  41. data/lib/better_html/node_iterator/token.rb +0 -8
  42. data/lib/better_html/tree.rb +0 -113
  43. data/test/better_html/node_iterator/html_erb_test.rb +0 -116
  44. data/test/better_html/node_iterator/html_lodash_test.rb +0 -132
  45. data/test/better_html/node_iterator/location_test.rb +0 -36
  46. data/test/better_html/node_iterator_test.rb +0 -221
  47. data/test/better_html/tree_test.rb +0 -110
@@ -1,144 +0,0 @@
1
- require_relative 'node_iterator/javascript_erb'
2
- require_relative 'node_iterator/html_erb'
3
- require_relative 'node_iterator/html_lodash'
4
- require_relative 'node_iterator/cdata'
5
- require_relative 'node_iterator/comment'
6
- require_relative 'node_iterator/element'
7
- require_relative 'node_iterator/attribute'
8
- require_relative 'node_iterator/text'
9
-
10
- module BetterHtml
11
- class NodeIterator
12
- attr_reader :nodes, :template_language
13
-
14
- delegate :each, :each_with_index, :[], to: :nodes
15
- delegate :parser, to: :@erb, allow_nil: true
16
- delegate :errors, to: :parser, allow_nil: true, prefix: true
17
-
18
- def initialize(document, template_language: :html)
19
- @document = document
20
- @template_language = template_language
21
- @erb = case template_language
22
- when :html
23
- HtmlErb.new(@document)
24
- when :lodash
25
- HtmlLodash.new(@document)
26
- when :javascript
27
- JavascriptErb.new(@document)
28
- else
29
- raise ArgumentError, "template_language can be :html or :javascript"
30
- end
31
- @nodes = parse!
32
- end
33
-
34
- private
35
-
36
- def parse!
37
- nodes = []
38
- tokens = @erb.tokens.dup
39
- while token = tokens[0]
40
- case token.type
41
- when :cdata_start
42
- tokens.shift
43
- nodes << consume_cdata(tokens)
44
- when :comment_start
45
- tokens.shift
46
- nodes << consume_comment(tokens)
47
- when :tag_start
48
- tokens.shift
49
- nodes << consume_element(tokens)
50
- when :text, :stmt, :expr_literal, :expr_escaped
51
- nodes << consume_text(tokens)
52
- else
53
- raise RuntimeError, "Unhandled token #{token.type} line #{token.location.line} column #{token.location.column}"
54
- end
55
- end
56
- nodes
57
- end
58
-
59
- def consume_cdata(tokens)
60
- node = CData.new
61
- while tokens.any? && tokens[0].type != :cdata_end
62
- node.content_parts << tokens.shift
63
- end
64
- tokens.shift if tokens.any? && tokens[0].type == :cdata_end
65
- node
66
- end
67
-
68
- def consume_comment(tokens)
69
- node = Comment.new
70
- while tokens.any? && tokens[0].type != :comment_end
71
- node.content_parts << tokens.shift
72
- end
73
- tokens.shift if tokens.any? && tokens[0].type == :comment_end
74
- node
75
- end
76
-
77
- def consume_element(tokens)
78
- node = Element.new
79
- if tokens.any? && tokens[0].type == :solidus
80
- tokens.shift
81
- node.closing = true
82
- end
83
- while tokens.any? && [:tag_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
84
- node.name_parts << tokens.shift
85
- end
86
- while tokens.any?
87
- token = tokens[0]
88
- if token.type == :attribute_name
89
- node.attributes << consume_attribute(tokens)
90
- elsif token.type == :attribute_quoted_value_start
91
- node.attributes << consume_attribute_value(tokens)
92
- elsif token.type == :tag_end
93
- tokens.shift
94
- node.self_closing = token.self_closing
95
- break
96
- else
97
- tokens.shift
98
- end
99
- end
100
- node
101
- end
102
-
103
- def consume_attribute(tokens)
104
- node = Attribute.new
105
- while tokens.any? && [:attribute_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
106
- node.name_parts << tokens.shift
107
- end
108
- return node unless consume_equal?(tokens)
109
- while tokens.any? && [
110
- :attribute_quoted_value_start, :attribute_quoted_value,
111
- :attribute_quoted_value_end, :attribute_unquoted_value,
112
- :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
113
- node.value_parts << tokens.shift
114
- end
115
- node
116
- end
117
-
118
- def consume_attribute_value(tokens)
119
- node = Attribute.new
120
- while tokens.any? && [
121
- :attribute_quoted_value_start, :attribute_quoted_value,
122
- :attribute_quoted_value_end, :attribute_unquoted_value,
123
- :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
124
- node.value_parts << tokens.shift
125
- end
126
- node
127
- end
128
-
129
- def consume_equal?(tokens)
130
- while tokens.any? && [:whitespace, :equal].include?(tokens[0].type)
131
- return true if tokens.shift.type == :equal
132
- end
133
- false
134
- end
135
-
136
- def consume_text(tokens)
137
- node = Text.new
138
- while tokens.any? && [:text, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
139
- node.content_parts << tokens.shift
140
- end
141
- node
142
- end
143
- end
144
- end
@@ -1,34 +0,0 @@
1
- require_relative 'base'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Attribute < Base
6
- tokenized_attribute :name
7
- tokenized_attribute :value
8
-
9
- def initialize
10
- @name_parts = []
11
- @value_parts = []
12
- end
13
-
14
- def unescaped_value_parts
15
- value_parts.map do |part|
16
- next if ["'", '"'].include?(part.text)
17
- if [:attribute_quoted_value, :attribute_unquoted_value].include?(part.type)
18
- CGI.unescapeHTML(part.text)
19
- else
20
- part.text
21
- end
22
- end.compact
23
- end
24
-
25
- def unescaped_value
26
- unescaped_value_parts.join
27
- end
28
-
29
- def value_without_quotes
30
- value_parts.map{ |s| ["'", '"'].include?(s.text) ? '' : s.text }.join
31
- end
32
- end
33
- end
34
- end
@@ -1,27 +0,0 @@
1
- module BetterHtml
2
- class NodeIterator
3
- class Base
4
- def self.tokenized_attribute(name)
5
- class_eval <<~RUBY
6
- attr_reader :#{name}_parts
7
-
8
- def #{name}
9
- #{name}_parts.map(&:text).join
10
- end
11
- RUBY
12
- end
13
-
14
- def node_type
15
- self.class.name.split('::').last.downcase.to_sym
16
- end
17
-
18
- %w(text cdata comment element).each do |name|
19
- class_eval <<~RUBY
20
- def #{name}?
21
- node_type == :#{name}
22
- end
23
- RUBY
24
- end
25
- end
26
- end
27
- end
@@ -1,8 +0,0 @@
1
- require_relative 'content_node'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class CData < ContentNode
6
- end
7
- end
8
- end
@@ -1,8 +0,0 @@
1
- require_relative 'content_node'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Comment < ContentNode
6
- end
7
- end
8
- end
@@ -1,13 +0,0 @@
1
- require_relative 'base'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class ContentNode < Base
6
- tokenized_attribute :content
7
-
8
- def initialize
9
- @content_parts = []
10
- end
11
- end
12
- end
13
- end
@@ -1,26 +0,0 @@
1
- require_relative 'base'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Element < Base
6
- tokenized_attribute :name
7
- attr_reader :attributes
8
- attr_accessor :closing, :self_closing
9
- alias_method :closing?, :closing
10
- alias_method :self_closing?, :self_closing
11
-
12
- def initialize
13
- @name_parts = []
14
- @attributes = []
15
- end
16
-
17
- def find_attr(wanted)
18
- @attributes.each do |attribute|
19
- return attribute if attribute.name == wanted
20
- end
21
- nil
22
- end
23
- alias_method :[], :find_attr
24
- end
25
- end
26
- end
@@ -1,70 +0,0 @@
1
- require 'erubi'
2
- require 'html_tokenizer'
3
- require_relative 'token'
4
- require_relative 'location'
5
-
6
- module BetterHtml
7
- class NodeIterator
8
- class HtmlErb < ::Erubi::Engine
9
- attr_reader :tokens
10
- attr_reader :parser
11
-
12
- REGEXP_WITHOUT_TRIM = /<%(={1,2}|-|%)?(.*?)(?:[-=])?()?%>([ \t]*\r?\n)?/m
13
-
14
- def initialize(document)
15
- @parser = HtmlTokenizer::Parser.new
16
- @tokens = []
17
- @document = document
18
- super(document, regexp: REGEXP_WITHOUT_TRIM, trim: false)
19
- end
20
-
21
- def add_text(text)
22
- @parser.parse(text) { |*args| add_tokens(*args) }
23
- end
24
-
25
- def add_code(code)
26
- text = "<%#{code}%>"
27
- start = @parser.document_length
28
- stop = start + text.size
29
- @tokens << Token.new(
30
- type: :stmt,
31
- code: code,
32
- text: text,
33
- location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
34
- code_location: Location.new(@document, start+2, stop-2, @parser.line_number, @parser.column_number+2)
35
- )
36
- @parser.append_placeholder(text)
37
- end
38
-
39
- def add_expression(indicator, code)
40
- text = "<%#{indicator}#{code}%>"
41
- start = @parser.document_length
42
- stop = start + text.size
43
- @tokens << Token.new(
44
- type: indicator == '=' ? :expr_literal : :expr_escaped,
45
- code: code,
46
- text: text,
47
- location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
48
- code_location: Location.new(@document, start+2+indicator.size, stop-2, @parser.line_number, @parser.column_number+2+indicator.size)
49
- )
50
- @parser.append_placeholder(text)
51
- end
52
-
53
- private
54
-
55
- def add_tokens(type, start, stop, line, column)
56
- extra_attributes = if type == :tag_end
57
- {
58
- self_closing: @parser.self_closing_tag?
59
- }
60
- end
61
- @tokens << Token.new(
62
- type: type,
63
- text: @parser.extract(start, stop),
64
- location: Location.new(@document, start, stop, line, column),
65
- **(extra_attributes || {})
66
- )
67
- end
68
- end
69
- end
70
- end
@@ -1,55 +0,0 @@
1
- require 'erubi'
2
- require_relative 'token'
3
- require_relative 'location'
4
-
5
- module BetterHtml
6
- class NodeIterator
7
- class JavascriptErb < ::Erubi::Engine
8
- attr_reader :tokens
9
-
10
- def initialize(source)
11
- @source = source
12
- @parsed_document = ""
13
- @tokens = []
14
- super(source, regexp: HtmlErb::REGEXP_WITHOUT_TRIM, trim: false)
15
- end
16
-
17
- def add_text(text)
18
- add_token(:text, text)
19
- append(text)
20
- end
21
-
22
- def add_code(code)
23
- text = "<%#{code}%>"
24
- add_token(:stmt, text, code)
25
- append(text)
26
- end
27
-
28
- def add_expression(indicator, code)
29
- text = "<%#{indicator}#{code}%>"
30
- add_token(indicator == '=' ? :expr_literal : :expr_escaped, text, code)
31
- append(text)
32
- end
33
-
34
- private
35
-
36
- def add_token(type, text, code = nil)
37
- start = @parsed_document.size
38
- stop = start + text.size
39
- lines = @parsed_document.split("\n", -1)
40
- line = lines.empty? ? 1 : lines.size
41
- column = lines.empty? ? 0 : lines.last.size
42
- @tokens << Token.new(
43
- type: type,
44
- text: text,
45
- code: code,
46
- location: Location.new(@source, start, stop, line, column)
47
- )
48
- end
49
-
50
- def append(text)
51
- @parsed_document << text
52
- end
53
- end
54
- end
55
- end
@@ -1,8 +0,0 @@
1
- require_relative 'content_node'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Text < ContentNode
6
- end
7
- end
8
- end
@@ -1,8 +0,0 @@
1
- require 'ostruct'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Token < OpenStruct
6
- end
7
- end
8
- end
@@ -1,113 +0,0 @@
1
- require 'better_html/node_iterator'
2
-
3
- module BetterHtml
4
- class Tree
5
- attr_reader :errors
6
- attr_reader :root
7
-
8
- cattr_accessor :void_elements
9
- self.void_elements = %w(area base br col embed hr img
10
- input keygen link menuitem meta param source track wbr)
11
-
12
- def initialize(data, **options)
13
- @data = data
14
- @errors = Errors.new
15
- @nodes = BetterHtml::NodeIterator.new(data, **options.slice(:template_language))
16
- @root = TreeRoot.new
17
- construct!
18
- @nodes.parser_errors&.each do |error|
19
- @errors.add(error)
20
- end
21
- end
22
-
23
- private
24
-
25
- class TreeError < HtmlError
26
- attr_reader :token
27
-
28
- def initialize(token, message)
29
- @token = token
30
- super(message)
31
- end
32
- end
33
-
34
- def add_error(token, message)
35
- @errors.add(TreeError.new(token, message))
36
- end
37
-
38
- def construct!
39
- current = @root
40
- @nodes.each do |node|
41
- case node.node_type
42
- when :text, :comment, :cdata
43
- current << node
44
- when :element
45
- if node.closing?
46
- if void_elements.include?(node.name)
47
- add_error(node.name_parts.first,
48
- "end of tag for void element: </#{node.name}>")
49
- elsif current.root?
50
- add_error(node.name_parts.first,
51
- "mismatched </#{node.name}> at root of tree")
52
- else
53
- if node.name == current.name
54
- current.end_node = node
55
- current = current.parent
56
- else
57
- add_error(node.name_parts.first,
58
- "mismatched </#{node.name}> in <#{current.name}> element")
59
- end
60
- end
61
- else
62
- element = Element.new(parent: current, start_node: node)
63
- current << element
64
- current = element unless element.closed?
65
- end
66
- end
67
- end
68
- end
69
-
70
- class NodeContainer
71
- attr_accessor :content_nodes
72
- delegate :each, :[], :each_with_index, :<<, :push,
73
- :size, :empty?, :any?, to: :content_nodes
74
-
75
- def root?
76
- false
77
- end
78
-
79
- def initialize
80
- @content_nodes = []
81
- end
82
- end
83
-
84
- class TreeRoot < NodeContainer
85
- def root?
86
- true
87
- end
88
- end
89
-
90
- class Element < NodeContainer
91
- attr_reader :parent
92
- attr_accessor :start_node
93
- attr_accessor :end_node
94
-
95
- delegate :name, :attributes, :self_closing?, to: :start_node
96
- delegate :element?, :text?, :comment?, :cdata?, to: :start_node
97
-
98
- def initialize(parent:, start_node:)
99
- super()
100
- @parent = parent
101
- @start_node = start_node
102
- end
103
-
104
- def closed?
105
- void? || end_node.present? || self_closing?
106
- end
107
-
108
- def void?
109
- BetterHtml::Tree.void_elements.include?(name)
110
- end
111
- end
112
- end
113
- end