better_html 0.0.12 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/better_html.rb +0 -2
- data/lib/better_html/ast/iterator.rb +32 -0
- data/lib/better_html/ast/node.rb +14 -0
- data/lib/better_html/better_erb/runtime_checks.rb +3 -3
- data/lib/better_html/config.rb +12 -0
- data/lib/better_html/parser.rb +286 -0
- data/lib/better_html/test_helper/ruby_expr.rb +8 -5
- data/lib/better_html/test_helper/safe_erb_tester.rb +121 -108
- data/lib/better_html/test_helper/safe_lodash_tester.rb +44 -42
- data/lib/better_html/tokenizer/base_erb.rb +79 -0
- data/lib/better_html/tokenizer/html_erb.rb +31 -0
- data/lib/better_html/{node_iterator → tokenizer}/html_lodash.rb +30 -34
- data/lib/better_html/tokenizer/javascript_erb.rb +15 -0
- data/lib/better_html/{node_iterator → tokenizer}/location.rb +9 -3
- data/lib/better_html/tokenizer/token.rb +16 -0
- data/lib/better_html/tokenizer/token_array.rb +54 -0
- data/lib/better_html/tree/attribute.rb +31 -0
- data/lib/better_html/tree/attributes_list.rb +25 -0
- data/lib/better_html/tree/tag.rb +39 -0
- data/lib/better_html/version.rb +1 -1
- data/test/better_html/parser_test.rb +279 -0
- data/test/better_html/test_helper/safe_erb_tester_test.rb +11 -0
- data/test/better_html/test_helper/safe_lodash_tester_test.rb +11 -1
- data/test/better_html/tokenizer/html_erb_test.rb +158 -0
- data/test/better_html/tokenizer/html_lodash_test.rb +98 -0
- data/test/better_html/tokenizer/location_test.rb +57 -0
- data/test/better_html/tokenizer/token_array_test.rb +144 -0
- data/test/better_html/tokenizer/token_test.rb +15 -0
- metadata +45 -30
- data/lib/better_html/node_iterator.rb +0 -144
- data/lib/better_html/node_iterator/attribute.rb +0 -34
- data/lib/better_html/node_iterator/base.rb +0 -27
- data/lib/better_html/node_iterator/cdata.rb +0 -8
- data/lib/better_html/node_iterator/comment.rb +0 -8
- data/lib/better_html/node_iterator/content_node.rb +0 -13
- data/lib/better_html/node_iterator/element.rb +0 -26
- data/lib/better_html/node_iterator/html_erb.rb +0 -70
- data/lib/better_html/node_iterator/javascript_erb.rb +0 -55
- data/lib/better_html/node_iterator/text.rb +0 -8
- data/lib/better_html/node_iterator/token.rb +0 -8
- data/lib/better_html/tree.rb +0 -113
- data/test/better_html/node_iterator/html_erb_test.rb +0 -116
- data/test/better_html/node_iterator/html_lodash_test.rb +0 -132
- data/test/better_html/node_iterator/location_test.rb +0 -36
- data/test/better_html/node_iterator_test.rb +0 -221
- data/test/better_html/tree_test.rb +0 -110
@@ -1,144 +0,0 @@
|
|
1
|
-
require_relative 'node_iterator/javascript_erb'
|
2
|
-
require_relative 'node_iterator/html_erb'
|
3
|
-
require_relative 'node_iterator/html_lodash'
|
4
|
-
require_relative 'node_iterator/cdata'
|
5
|
-
require_relative 'node_iterator/comment'
|
6
|
-
require_relative 'node_iterator/element'
|
7
|
-
require_relative 'node_iterator/attribute'
|
8
|
-
require_relative 'node_iterator/text'
|
9
|
-
|
10
|
-
module BetterHtml
|
11
|
-
class NodeIterator
|
12
|
-
attr_reader :nodes, :template_language
|
13
|
-
|
14
|
-
delegate :each, :each_with_index, :[], to: :nodes
|
15
|
-
delegate :parser, to: :@erb, allow_nil: true
|
16
|
-
delegate :errors, to: :parser, allow_nil: true, prefix: true
|
17
|
-
|
18
|
-
def initialize(document, template_language: :html)
|
19
|
-
@document = document
|
20
|
-
@template_language = template_language
|
21
|
-
@erb = case template_language
|
22
|
-
when :html
|
23
|
-
HtmlErb.new(@document)
|
24
|
-
when :lodash
|
25
|
-
HtmlLodash.new(@document)
|
26
|
-
when :javascript
|
27
|
-
JavascriptErb.new(@document)
|
28
|
-
else
|
29
|
-
raise ArgumentError, "template_language can be :html or :javascript"
|
30
|
-
end
|
31
|
-
@nodes = parse!
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def parse!
|
37
|
-
nodes = []
|
38
|
-
tokens = @erb.tokens.dup
|
39
|
-
while token = tokens[0]
|
40
|
-
case token.type
|
41
|
-
when :cdata_start
|
42
|
-
tokens.shift
|
43
|
-
nodes << consume_cdata(tokens)
|
44
|
-
when :comment_start
|
45
|
-
tokens.shift
|
46
|
-
nodes << consume_comment(tokens)
|
47
|
-
when :tag_start
|
48
|
-
tokens.shift
|
49
|
-
nodes << consume_element(tokens)
|
50
|
-
when :text, :stmt, :expr_literal, :expr_escaped
|
51
|
-
nodes << consume_text(tokens)
|
52
|
-
else
|
53
|
-
raise RuntimeError, "Unhandled token #{token.type} line #{token.location.line} column #{token.location.column}"
|
54
|
-
end
|
55
|
-
end
|
56
|
-
nodes
|
57
|
-
end
|
58
|
-
|
59
|
-
def consume_cdata(tokens)
|
60
|
-
node = CData.new
|
61
|
-
while tokens.any? && tokens[0].type != :cdata_end
|
62
|
-
node.content_parts << tokens.shift
|
63
|
-
end
|
64
|
-
tokens.shift if tokens.any? && tokens[0].type == :cdata_end
|
65
|
-
node
|
66
|
-
end
|
67
|
-
|
68
|
-
def consume_comment(tokens)
|
69
|
-
node = Comment.new
|
70
|
-
while tokens.any? && tokens[0].type != :comment_end
|
71
|
-
node.content_parts << tokens.shift
|
72
|
-
end
|
73
|
-
tokens.shift if tokens.any? && tokens[0].type == :comment_end
|
74
|
-
node
|
75
|
-
end
|
76
|
-
|
77
|
-
def consume_element(tokens)
|
78
|
-
node = Element.new
|
79
|
-
if tokens.any? && tokens[0].type == :solidus
|
80
|
-
tokens.shift
|
81
|
-
node.closing = true
|
82
|
-
end
|
83
|
-
while tokens.any? && [:tag_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
84
|
-
node.name_parts << tokens.shift
|
85
|
-
end
|
86
|
-
while tokens.any?
|
87
|
-
token = tokens[0]
|
88
|
-
if token.type == :attribute_name
|
89
|
-
node.attributes << consume_attribute(tokens)
|
90
|
-
elsif token.type == :attribute_quoted_value_start
|
91
|
-
node.attributes << consume_attribute_value(tokens)
|
92
|
-
elsif token.type == :tag_end
|
93
|
-
tokens.shift
|
94
|
-
node.self_closing = token.self_closing
|
95
|
-
break
|
96
|
-
else
|
97
|
-
tokens.shift
|
98
|
-
end
|
99
|
-
end
|
100
|
-
node
|
101
|
-
end
|
102
|
-
|
103
|
-
def consume_attribute(tokens)
|
104
|
-
node = Attribute.new
|
105
|
-
while tokens.any? && [:attribute_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
106
|
-
node.name_parts << tokens.shift
|
107
|
-
end
|
108
|
-
return node unless consume_equal?(tokens)
|
109
|
-
while tokens.any? && [
|
110
|
-
:attribute_quoted_value_start, :attribute_quoted_value,
|
111
|
-
:attribute_quoted_value_end, :attribute_unquoted_value,
|
112
|
-
:stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
113
|
-
node.value_parts << tokens.shift
|
114
|
-
end
|
115
|
-
node
|
116
|
-
end
|
117
|
-
|
118
|
-
def consume_attribute_value(tokens)
|
119
|
-
node = Attribute.new
|
120
|
-
while tokens.any? && [
|
121
|
-
:attribute_quoted_value_start, :attribute_quoted_value,
|
122
|
-
:attribute_quoted_value_end, :attribute_unquoted_value,
|
123
|
-
:stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
124
|
-
node.value_parts << tokens.shift
|
125
|
-
end
|
126
|
-
node
|
127
|
-
end
|
128
|
-
|
129
|
-
def consume_equal?(tokens)
|
130
|
-
while tokens.any? && [:whitespace, :equal].include?(tokens[0].type)
|
131
|
-
return true if tokens.shift.type == :equal
|
132
|
-
end
|
133
|
-
false
|
134
|
-
end
|
135
|
-
|
136
|
-
def consume_text(tokens)
|
137
|
-
node = Text.new
|
138
|
-
while tokens.any? && [:text, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
139
|
-
node.content_parts << tokens.shift
|
140
|
-
end
|
141
|
-
node
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module BetterHtml
|
4
|
-
class NodeIterator
|
5
|
-
class Attribute < Base
|
6
|
-
tokenized_attribute :name
|
7
|
-
tokenized_attribute :value
|
8
|
-
|
9
|
-
def initialize
|
10
|
-
@name_parts = []
|
11
|
-
@value_parts = []
|
12
|
-
end
|
13
|
-
|
14
|
-
def unescaped_value_parts
|
15
|
-
value_parts.map do |part|
|
16
|
-
next if ["'", '"'].include?(part.text)
|
17
|
-
if [:attribute_quoted_value, :attribute_unquoted_value].include?(part.type)
|
18
|
-
CGI.unescapeHTML(part.text)
|
19
|
-
else
|
20
|
-
part.text
|
21
|
-
end
|
22
|
-
end.compact
|
23
|
-
end
|
24
|
-
|
25
|
-
def unescaped_value
|
26
|
-
unescaped_value_parts.join
|
27
|
-
end
|
28
|
-
|
29
|
-
def value_without_quotes
|
30
|
-
value_parts.map{ |s| ["'", '"'].include?(s.text) ? '' : s.text }.join
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
module BetterHtml
|
2
|
-
class NodeIterator
|
3
|
-
class Base
|
4
|
-
def self.tokenized_attribute(name)
|
5
|
-
class_eval <<~RUBY
|
6
|
-
attr_reader :#{name}_parts
|
7
|
-
|
8
|
-
def #{name}
|
9
|
-
#{name}_parts.map(&:text).join
|
10
|
-
end
|
11
|
-
RUBY
|
12
|
-
end
|
13
|
-
|
14
|
-
def node_type
|
15
|
-
self.class.name.split('::').last.downcase.to_sym
|
16
|
-
end
|
17
|
-
|
18
|
-
%w(text cdata comment element).each do |name|
|
19
|
-
class_eval <<~RUBY
|
20
|
-
def #{name}?
|
21
|
-
node_type == :#{name}
|
22
|
-
end
|
23
|
-
RUBY
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module BetterHtml
|
4
|
-
class NodeIterator
|
5
|
-
class Element < Base
|
6
|
-
tokenized_attribute :name
|
7
|
-
attr_reader :attributes
|
8
|
-
attr_accessor :closing, :self_closing
|
9
|
-
alias_method :closing?, :closing
|
10
|
-
alias_method :self_closing?, :self_closing
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@name_parts = []
|
14
|
-
@attributes = []
|
15
|
-
end
|
16
|
-
|
17
|
-
def find_attr(wanted)
|
18
|
-
@attributes.each do |attribute|
|
19
|
-
return attribute if attribute.name == wanted
|
20
|
-
end
|
21
|
-
nil
|
22
|
-
end
|
23
|
-
alias_method :[], :find_attr
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
require 'erubi'
|
2
|
-
require 'html_tokenizer'
|
3
|
-
require_relative 'token'
|
4
|
-
require_relative 'location'
|
5
|
-
|
6
|
-
module BetterHtml
|
7
|
-
class NodeIterator
|
8
|
-
class HtmlErb < ::Erubi::Engine
|
9
|
-
attr_reader :tokens
|
10
|
-
attr_reader :parser
|
11
|
-
|
12
|
-
REGEXP_WITHOUT_TRIM = /<%(={1,2}|-|%)?(.*?)(?:[-=])?()?%>([ \t]*\r?\n)?/m
|
13
|
-
|
14
|
-
def initialize(document)
|
15
|
-
@parser = HtmlTokenizer::Parser.new
|
16
|
-
@tokens = []
|
17
|
-
@document = document
|
18
|
-
super(document, regexp: REGEXP_WITHOUT_TRIM, trim: false)
|
19
|
-
end
|
20
|
-
|
21
|
-
def add_text(text)
|
22
|
-
@parser.parse(text) { |*args| add_tokens(*args) }
|
23
|
-
end
|
24
|
-
|
25
|
-
def add_code(code)
|
26
|
-
text = "<%#{code}%>"
|
27
|
-
start = @parser.document_length
|
28
|
-
stop = start + text.size
|
29
|
-
@tokens << Token.new(
|
30
|
-
type: :stmt,
|
31
|
-
code: code,
|
32
|
-
text: text,
|
33
|
-
location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
|
34
|
-
code_location: Location.new(@document, start+2, stop-2, @parser.line_number, @parser.column_number+2)
|
35
|
-
)
|
36
|
-
@parser.append_placeholder(text)
|
37
|
-
end
|
38
|
-
|
39
|
-
def add_expression(indicator, code)
|
40
|
-
text = "<%#{indicator}#{code}%>"
|
41
|
-
start = @parser.document_length
|
42
|
-
stop = start + text.size
|
43
|
-
@tokens << Token.new(
|
44
|
-
type: indicator == '=' ? :expr_literal : :expr_escaped,
|
45
|
-
code: code,
|
46
|
-
text: text,
|
47
|
-
location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
|
48
|
-
code_location: Location.new(@document, start+2+indicator.size, stop-2, @parser.line_number, @parser.column_number+2+indicator.size)
|
49
|
-
)
|
50
|
-
@parser.append_placeholder(text)
|
51
|
-
end
|
52
|
-
|
53
|
-
private
|
54
|
-
|
55
|
-
def add_tokens(type, start, stop, line, column)
|
56
|
-
extra_attributes = if type == :tag_end
|
57
|
-
{
|
58
|
-
self_closing: @parser.self_closing_tag?
|
59
|
-
}
|
60
|
-
end
|
61
|
-
@tokens << Token.new(
|
62
|
-
type: type,
|
63
|
-
text: @parser.extract(start, stop),
|
64
|
-
location: Location.new(@document, start, stop, line, column),
|
65
|
-
**(extra_attributes || {})
|
66
|
-
)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
@@ -1,55 +0,0 @@
|
|
1
|
-
require 'erubi'
|
2
|
-
require_relative 'token'
|
3
|
-
require_relative 'location'
|
4
|
-
|
5
|
-
module BetterHtml
|
6
|
-
class NodeIterator
|
7
|
-
class JavascriptErb < ::Erubi::Engine
|
8
|
-
attr_reader :tokens
|
9
|
-
|
10
|
-
def initialize(source)
|
11
|
-
@source = source
|
12
|
-
@parsed_document = ""
|
13
|
-
@tokens = []
|
14
|
-
super(source, regexp: HtmlErb::REGEXP_WITHOUT_TRIM, trim: false)
|
15
|
-
end
|
16
|
-
|
17
|
-
def add_text(text)
|
18
|
-
add_token(:text, text)
|
19
|
-
append(text)
|
20
|
-
end
|
21
|
-
|
22
|
-
def add_code(code)
|
23
|
-
text = "<%#{code}%>"
|
24
|
-
add_token(:stmt, text, code)
|
25
|
-
append(text)
|
26
|
-
end
|
27
|
-
|
28
|
-
def add_expression(indicator, code)
|
29
|
-
text = "<%#{indicator}#{code}%>"
|
30
|
-
add_token(indicator == '=' ? :expr_literal : :expr_escaped, text, code)
|
31
|
-
append(text)
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def add_token(type, text, code = nil)
|
37
|
-
start = @parsed_document.size
|
38
|
-
stop = start + text.size
|
39
|
-
lines = @parsed_document.split("\n", -1)
|
40
|
-
line = lines.empty? ? 1 : lines.size
|
41
|
-
column = lines.empty? ? 0 : lines.last.size
|
42
|
-
@tokens << Token.new(
|
43
|
-
type: type,
|
44
|
-
text: text,
|
45
|
-
code: code,
|
46
|
-
location: Location.new(@source, start, stop, line, column)
|
47
|
-
)
|
48
|
-
end
|
49
|
-
|
50
|
-
def append(text)
|
51
|
-
@parsed_document << text
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
data/lib/better_html/tree.rb
DELETED
@@ -1,113 +0,0 @@
|
|
1
|
-
require 'better_html/node_iterator'
|
2
|
-
|
3
|
-
module BetterHtml
|
4
|
-
class Tree
|
5
|
-
attr_reader :errors
|
6
|
-
attr_reader :root
|
7
|
-
|
8
|
-
cattr_accessor :void_elements
|
9
|
-
self.void_elements = %w(area base br col embed hr img
|
10
|
-
input keygen link menuitem meta param source track wbr)
|
11
|
-
|
12
|
-
def initialize(data, **options)
|
13
|
-
@data = data
|
14
|
-
@errors = Errors.new
|
15
|
-
@nodes = BetterHtml::NodeIterator.new(data, **options.slice(:template_language))
|
16
|
-
@root = TreeRoot.new
|
17
|
-
construct!
|
18
|
-
@nodes.parser_errors&.each do |error|
|
19
|
-
@errors.add(error)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
class TreeError < HtmlError
|
26
|
-
attr_reader :token
|
27
|
-
|
28
|
-
def initialize(token, message)
|
29
|
-
@token = token
|
30
|
-
super(message)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def add_error(token, message)
|
35
|
-
@errors.add(TreeError.new(token, message))
|
36
|
-
end
|
37
|
-
|
38
|
-
def construct!
|
39
|
-
current = @root
|
40
|
-
@nodes.each do |node|
|
41
|
-
case node.node_type
|
42
|
-
when :text, :comment, :cdata
|
43
|
-
current << node
|
44
|
-
when :element
|
45
|
-
if node.closing?
|
46
|
-
if void_elements.include?(node.name)
|
47
|
-
add_error(node.name_parts.first,
|
48
|
-
"end of tag for void element: </#{node.name}>")
|
49
|
-
elsif current.root?
|
50
|
-
add_error(node.name_parts.first,
|
51
|
-
"mismatched </#{node.name}> at root of tree")
|
52
|
-
else
|
53
|
-
if node.name == current.name
|
54
|
-
current.end_node = node
|
55
|
-
current = current.parent
|
56
|
-
else
|
57
|
-
add_error(node.name_parts.first,
|
58
|
-
"mismatched </#{node.name}> in <#{current.name}> element")
|
59
|
-
end
|
60
|
-
end
|
61
|
-
else
|
62
|
-
element = Element.new(parent: current, start_node: node)
|
63
|
-
current << element
|
64
|
-
current = element unless element.closed?
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
class NodeContainer
|
71
|
-
attr_accessor :content_nodes
|
72
|
-
delegate :each, :[], :each_with_index, :<<, :push,
|
73
|
-
:size, :empty?, :any?, to: :content_nodes
|
74
|
-
|
75
|
-
def root?
|
76
|
-
false
|
77
|
-
end
|
78
|
-
|
79
|
-
def initialize
|
80
|
-
@content_nodes = []
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
class TreeRoot < NodeContainer
|
85
|
-
def root?
|
86
|
-
true
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
class Element < NodeContainer
|
91
|
-
attr_reader :parent
|
92
|
-
attr_accessor :start_node
|
93
|
-
attr_accessor :end_node
|
94
|
-
|
95
|
-
delegate :name, :attributes, :self_closing?, to: :start_node
|
96
|
-
delegate :element?, :text?, :comment?, :cdata?, to: :start_node
|
97
|
-
|
98
|
-
def initialize(parent:, start_node:)
|
99
|
-
super()
|
100
|
-
@parent = parent
|
101
|
-
@start_node = start_node
|
102
|
-
end
|
103
|
-
|
104
|
-
def closed?
|
105
|
-
void? || end_node.present? || self_closing?
|
106
|
-
end
|
107
|
-
|
108
|
-
def void?
|
109
|
-
BetterHtml::Tree.void_elements.include?(name)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|