better_html 0.0.12 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/lib/better_html.rb +0 -2
  3. data/lib/better_html/ast/iterator.rb +32 -0
  4. data/lib/better_html/ast/node.rb +14 -0
  5. data/lib/better_html/better_erb/runtime_checks.rb +3 -3
  6. data/lib/better_html/config.rb +12 -0
  7. data/lib/better_html/parser.rb +286 -0
  8. data/lib/better_html/test_helper/ruby_expr.rb +8 -5
  9. data/lib/better_html/test_helper/safe_erb_tester.rb +121 -108
  10. data/lib/better_html/test_helper/safe_lodash_tester.rb +44 -42
  11. data/lib/better_html/tokenizer/base_erb.rb +79 -0
  12. data/lib/better_html/tokenizer/html_erb.rb +31 -0
  13. data/lib/better_html/{node_iterator → tokenizer}/html_lodash.rb +30 -34
  14. data/lib/better_html/tokenizer/javascript_erb.rb +15 -0
  15. data/lib/better_html/{node_iterator → tokenizer}/location.rb +9 -3
  16. data/lib/better_html/tokenizer/token.rb +16 -0
  17. data/lib/better_html/tokenizer/token_array.rb +54 -0
  18. data/lib/better_html/tree/attribute.rb +31 -0
  19. data/lib/better_html/tree/attributes_list.rb +25 -0
  20. data/lib/better_html/tree/tag.rb +39 -0
  21. data/lib/better_html/version.rb +1 -1
  22. data/test/better_html/parser_test.rb +279 -0
  23. data/test/better_html/test_helper/safe_erb_tester_test.rb +11 -0
  24. data/test/better_html/test_helper/safe_lodash_tester_test.rb +11 -1
  25. data/test/better_html/tokenizer/html_erb_test.rb +158 -0
  26. data/test/better_html/tokenizer/html_lodash_test.rb +98 -0
  27. data/test/better_html/tokenizer/location_test.rb +57 -0
  28. data/test/better_html/tokenizer/token_array_test.rb +144 -0
  29. data/test/better_html/tokenizer/token_test.rb +15 -0
  30. metadata +45 -30
  31. data/lib/better_html/node_iterator.rb +0 -144
  32. data/lib/better_html/node_iterator/attribute.rb +0 -34
  33. data/lib/better_html/node_iterator/base.rb +0 -27
  34. data/lib/better_html/node_iterator/cdata.rb +0 -8
  35. data/lib/better_html/node_iterator/comment.rb +0 -8
  36. data/lib/better_html/node_iterator/content_node.rb +0 -13
  37. data/lib/better_html/node_iterator/element.rb +0 -26
  38. data/lib/better_html/node_iterator/html_erb.rb +0 -70
  39. data/lib/better_html/node_iterator/javascript_erb.rb +0 -55
  40. data/lib/better_html/node_iterator/text.rb +0 -8
  41. data/lib/better_html/node_iterator/token.rb +0 -8
  42. data/lib/better_html/tree.rb +0 -113
  43. data/test/better_html/node_iterator/html_erb_test.rb +0 -116
  44. data/test/better_html/node_iterator/html_lodash_test.rb +0 -132
  45. data/test/better_html/node_iterator/location_test.rb +0 -36
  46. data/test/better_html/node_iterator_test.rb +0 -221
  47. data/test/better_html/tree_test.rb +0 -110
@@ -1,144 +0,0 @@
1
- require_relative 'node_iterator/javascript_erb'
2
- require_relative 'node_iterator/html_erb'
3
- require_relative 'node_iterator/html_lodash'
4
- require_relative 'node_iterator/cdata'
5
- require_relative 'node_iterator/comment'
6
- require_relative 'node_iterator/element'
7
- require_relative 'node_iterator/attribute'
8
- require_relative 'node_iterator/text'
9
-
10
- module BetterHtml
11
- class NodeIterator
12
- attr_reader :nodes, :template_language
13
-
14
- delegate :each, :each_with_index, :[], to: :nodes
15
- delegate :parser, to: :@erb, allow_nil: true
16
- delegate :errors, to: :parser, allow_nil: true, prefix: true
17
-
18
- def initialize(document, template_language: :html)
19
- @document = document
20
- @template_language = template_language
21
- @erb = case template_language
22
- when :html
23
- HtmlErb.new(@document)
24
- when :lodash
25
- HtmlLodash.new(@document)
26
- when :javascript
27
- JavascriptErb.new(@document)
28
- else
29
- raise ArgumentError, "template_language can be :html or :javascript"
30
- end
31
- @nodes = parse!
32
- end
33
-
34
- private
35
-
36
- def parse!
37
- nodes = []
38
- tokens = @erb.tokens.dup
39
- while token = tokens[0]
40
- case token.type
41
- when :cdata_start
42
- tokens.shift
43
- nodes << consume_cdata(tokens)
44
- when :comment_start
45
- tokens.shift
46
- nodes << consume_comment(tokens)
47
- when :tag_start
48
- tokens.shift
49
- nodes << consume_element(tokens)
50
- when :text, :stmt, :expr_literal, :expr_escaped
51
- nodes << consume_text(tokens)
52
- else
53
- raise RuntimeError, "Unhandled token #{token.type} line #{token.location.line} column #{token.location.column}"
54
- end
55
- end
56
- nodes
57
- end
58
-
59
- def consume_cdata(tokens)
60
- node = CData.new
61
- while tokens.any? && tokens[0].type != :cdata_end
62
- node.content_parts << tokens.shift
63
- end
64
- tokens.shift if tokens.any? && tokens[0].type == :cdata_end
65
- node
66
- end
67
-
68
- def consume_comment(tokens)
69
- node = Comment.new
70
- while tokens.any? && tokens[0].type != :comment_end
71
- node.content_parts << tokens.shift
72
- end
73
- tokens.shift if tokens.any? && tokens[0].type == :comment_end
74
- node
75
- end
76
-
77
- def consume_element(tokens)
78
- node = Element.new
79
- if tokens.any? && tokens[0].type == :solidus
80
- tokens.shift
81
- node.closing = true
82
- end
83
- while tokens.any? && [:tag_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
84
- node.name_parts << tokens.shift
85
- end
86
- while tokens.any?
87
- token = tokens[0]
88
- if token.type == :attribute_name
89
- node.attributes << consume_attribute(tokens)
90
- elsif token.type == :attribute_quoted_value_start
91
- node.attributes << consume_attribute_value(tokens)
92
- elsif token.type == :tag_end
93
- tokens.shift
94
- node.self_closing = token.self_closing
95
- break
96
- else
97
- tokens.shift
98
- end
99
- end
100
- node
101
- end
102
-
103
- def consume_attribute(tokens)
104
- node = Attribute.new
105
- while tokens.any? && [:attribute_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
106
- node.name_parts << tokens.shift
107
- end
108
- return node unless consume_equal?(tokens)
109
- while tokens.any? && [
110
- :attribute_quoted_value_start, :attribute_quoted_value,
111
- :attribute_quoted_value_end, :attribute_unquoted_value,
112
- :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
113
- node.value_parts << tokens.shift
114
- end
115
- node
116
- end
117
-
118
- def consume_attribute_value(tokens)
119
- node = Attribute.new
120
- while tokens.any? && [
121
- :attribute_quoted_value_start, :attribute_quoted_value,
122
- :attribute_quoted_value_end, :attribute_unquoted_value,
123
- :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
124
- node.value_parts << tokens.shift
125
- end
126
- node
127
- end
128
-
129
- def consume_equal?(tokens)
130
- while tokens.any? && [:whitespace, :equal].include?(tokens[0].type)
131
- return true if tokens.shift.type == :equal
132
- end
133
- false
134
- end
135
-
136
- def consume_text(tokens)
137
- node = Text.new
138
- while tokens.any? && [:text, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
139
- node.content_parts << tokens.shift
140
- end
141
- node
142
- end
143
- end
144
- end
@@ -1,34 +0,0 @@
1
- require_relative 'base'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Attribute < Base
6
- tokenized_attribute :name
7
- tokenized_attribute :value
8
-
9
- def initialize
10
- @name_parts = []
11
- @value_parts = []
12
- end
13
-
14
- def unescaped_value_parts
15
- value_parts.map do |part|
16
- next if ["'", '"'].include?(part.text)
17
- if [:attribute_quoted_value, :attribute_unquoted_value].include?(part.type)
18
- CGI.unescapeHTML(part.text)
19
- else
20
- part.text
21
- end
22
- end.compact
23
- end
24
-
25
- def unescaped_value
26
- unescaped_value_parts.join
27
- end
28
-
29
- def value_without_quotes
30
- value_parts.map{ |s| ["'", '"'].include?(s.text) ? '' : s.text }.join
31
- end
32
- end
33
- end
34
- end
@@ -1,27 +0,0 @@
1
- module BetterHtml
2
- class NodeIterator
3
- class Base
4
- def self.tokenized_attribute(name)
5
- class_eval <<~RUBY
6
- attr_reader :#{name}_parts
7
-
8
- def #{name}
9
- #{name}_parts.map(&:text).join
10
- end
11
- RUBY
12
- end
13
-
14
- def node_type
15
- self.class.name.split('::').last.downcase.to_sym
16
- end
17
-
18
- %w(text cdata comment element).each do |name|
19
- class_eval <<~RUBY
20
- def #{name}?
21
- node_type == :#{name}
22
- end
23
- RUBY
24
- end
25
- end
26
- end
27
- end
@@ -1,8 +0,0 @@
1
- require_relative 'content_node'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class CData < ContentNode
6
- end
7
- end
8
- end
@@ -1,8 +0,0 @@
1
- require_relative 'content_node'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Comment < ContentNode
6
- end
7
- end
8
- end
@@ -1,13 +0,0 @@
1
- require_relative 'base'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class ContentNode < Base
6
- tokenized_attribute :content
7
-
8
- def initialize
9
- @content_parts = []
10
- end
11
- end
12
- end
13
- end
@@ -1,26 +0,0 @@
1
- require_relative 'base'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Element < Base
6
- tokenized_attribute :name
7
- attr_reader :attributes
8
- attr_accessor :closing, :self_closing
9
- alias_method :closing?, :closing
10
- alias_method :self_closing?, :self_closing
11
-
12
- def initialize
13
- @name_parts = []
14
- @attributes = []
15
- end
16
-
17
- def find_attr(wanted)
18
- @attributes.each do |attribute|
19
- return attribute if attribute.name == wanted
20
- end
21
- nil
22
- end
23
- alias_method :[], :find_attr
24
- end
25
- end
26
- end
@@ -1,70 +0,0 @@
1
- require 'erubi'
2
- require 'html_tokenizer'
3
- require_relative 'token'
4
- require_relative 'location'
5
-
6
- module BetterHtml
7
- class NodeIterator
8
- class HtmlErb < ::Erubi::Engine
9
- attr_reader :tokens
10
- attr_reader :parser
11
-
12
- REGEXP_WITHOUT_TRIM = /<%(={1,2}|-|%)?(.*?)(?:[-=])?()?%>([ \t]*\r?\n)?/m
13
-
14
- def initialize(document)
15
- @parser = HtmlTokenizer::Parser.new
16
- @tokens = []
17
- @document = document
18
- super(document, regexp: REGEXP_WITHOUT_TRIM, trim: false)
19
- end
20
-
21
- def add_text(text)
22
- @parser.parse(text) { |*args| add_tokens(*args) }
23
- end
24
-
25
- def add_code(code)
26
- text = "<%#{code}%>"
27
- start = @parser.document_length
28
- stop = start + text.size
29
- @tokens << Token.new(
30
- type: :stmt,
31
- code: code,
32
- text: text,
33
- location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
34
- code_location: Location.new(@document, start+2, stop-2, @parser.line_number, @parser.column_number+2)
35
- )
36
- @parser.append_placeholder(text)
37
- end
38
-
39
- def add_expression(indicator, code)
40
- text = "<%#{indicator}#{code}%>"
41
- start = @parser.document_length
42
- stop = start + text.size
43
- @tokens << Token.new(
44
- type: indicator == '=' ? :expr_literal : :expr_escaped,
45
- code: code,
46
- text: text,
47
- location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
48
- code_location: Location.new(@document, start+2+indicator.size, stop-2, @parser.line_number, @parser.column_number+2+indicator.size)
49
- )
50
- @parser.append_placeholder(text)
51
- end
52
-
53
- private
54
-
55
- def add_tokens(type, start, stop, line, column)
56
- extra_attributes = if type == :tag_end
57
- {
58
- self_closing: @parser.self_closing_tag?
59
- }
60
- end
61
- @tokens << Token.new(
62
- type: type,
63
- text: @parser.extract(start, stop),
64
- location: Location.new(@document, start, stop, line, column),
65
- **(extra_attributes || {})
66
- )
67
- end
68
- end
69
- end
70
- end
@@ -1,55 +0,0 @@
1
- require 'erubi'
2
- require_relative 'token'
3
- require_relative 'location'
4
-
5
- module BetterHtml
6
- class NodeIterator
7
- class JavascriptErb < ::Erubi::Engine
8
- attr_reader :tokens
9
-
10
- def initialize(source)
11
- @source = source
12
- @parsed_document = ""
13
- @tokens = []
14
- super(source, regexp: HtmlErb::REGEXP_WITHOUT_TRIM, trim: false)
15
- end
16
-
17
- def add_text(text)
18
- add_token(:text, text)
19
- append(text)
20
- end
21
-
22
- def add_code(code)
23
- text = "<%#{code}%>"
24
- add_token(:stmt, text, code)
25
- append(text)
26
- end
27
-
28
- def add_expression(indicator, code)
29
- text = "<%#{indicator}#{code}%>"
30
- add_token(indicator == '=' ? :expr_literal : :expr_escaped, text, code)
31
- append(text)
32
- end
33
-
34
- private
35
-
36
- def add_token(type, text, code = nil)
37
- start = @parsed_document.size
38
- stop = start + text.size
39
- lines = @parsed_document.split("\n", -1)
40
- line = lines.empty? ? 1 : lines.size
41
- column = lines.empty? ? 0 : lines.last.size
42
- @tokens << Token.new(
43
- type: type,
44
- text: text,
45
- code: code,
46
- location: Location.new(@source, start, stop, line, column)
47
- )
48
- end
49
-
50
- def append(text)
51
- @parsed_document << text
52
- end
53
- end
54
- end
55
- end
@@ -1,8 +0,0 @@
1
- require_relative 'content_node'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Text < ContentNode
6
- end
7
- end
8
- end
@@ -1,8 +0,0 @@
1
- require 'ostruct'
2
-
3
- module BetterHtml
4
- class NodeIterator
5
- class Token < OpenStruct
6
- end
7
- end
8
- end
@@ -1,113 +0,0 @@
1
- require 'better_html/node_iterator'
2
-
3
- module BetterHtml
4
- class Tree
5
- attr_reader :errors
6
- attr_reader :root
7
-
8
- cattr_accessor :void_elements
9
- self.void_elements = %w(area base br col embed hr img
10
- input keygen link menuitem meta param source track wbr)
11
-
12
- def initialize(data, **options)
13
- @data = data
14
- @errors = Errors.new
15
- @nodes = BetterHtml::NodeIterator.new(data, **options.slice(:template_language))
16
- @root = TreeRoot.new
17
- construct!
18
- @nodes.parser_errors&.each do |error|
19
- @errors.add(error)
20
- end
21
- end
22
-
23
- private
24
-
25
- class TreeError < HtmlError
26
- attr_reader :token
27
-
28
- def initialize(token, message)
29
- @token = token
30
- super(message)
31
- end
32
- end
33
-
34
- def add_error(token, message)
35
- @errors.add(TreeError.new(token, message))
36
- end
37
-
38
- def construct!
39
- current = @root
40
- @nodes.each do |node|
41
- case node.node_type
42
- when :text, :comment, :cdata
43
- current << node
44
- when :element
45
- if node.closing?
46
- if void_elements.include?(node.name)
47
- add_error(node.name_parts.first,
48
- "end of tag for void element: </#{node.name}>")
49
- elsif current.root?
50
- add_error(node.name_parts.first,
51
- "mismatched </#{node.name}> at root of tree")
52
- else
53
- if node.name == current.name
54
- current.end_node = node
55
- current = current.parent
56
- else
57
- add_error(node.name_parts.first,
58
- "mismatched </#{node.name}> in <#{current.name}> element")
59
- end
60
- end
61
- else
62
- element = Element.new(parent: current, start_node: node)
63
- current << element
64
- current = element unless element.closed?
65
- end
66
- end
67
- end
68
- end
69
-
70
- class NodeContainer
71
- attr_accessor :content_nodes
72
- delegate :each, :[], :each_with_index, :<<, :push,
73
- :size, :empty?, :any?, to: :content_nodes
74
-
75
- def root?
76
- false
77
- end
78
-
79
- def initialize
80
- @content_nodes = []
81
- end
82
- end
83
-
84
- class TreeRoot < NodeContainer
85
- def root?
86
- true
87
- end
88
- end
89
-
90
- class Element < NodeContainer
91
- attr_reader :parent
92
- attr_accessor :start_node
93
- attr_accessor :end_node
94
-
95
- delegate :name, :attributes, :self_closing?, to: :start_node
96
- delegate :element?, :text?, :comment?, :cdata?, to: :start_node
97
-
98
- def initialize(parent:, start_node:)
99
- super()
100
- @parent = parent
101
- @start_node = start_node
102
- end
103
-
104
- def closed?
105
- void? || end_node.present? || self_closing?
106
- end
107
-
108
- def void?
109
- BetterHtml::Tree.void_elements.include?(name)
110
- end
111
- end
112
- end
113
- end