better_html 0.0.12 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/better_html.rb +0 -2
- data/lib/better_html/ast/iterator.rb +32 -0
- data/lib/better_html/ast/node.rb +14 -0
- data/lib/better_html/better_erb/runtime_checks.rb +3 -3
- data/lib/better_html/config.rb +12 -0
- data/lib/better_html/parser.rb +286 -0
- data/lib/better_html/test_helper/ruby_expr.rb +8 -5
- data/lib/better_html/test_helper/safe_erb_tester.rb +121 -108
- data/lib/better_html/test_helper/safe_lodash_tester.rb +44 -42
- data/lib/better_html/tokenizer/base_erb.rb +79 -0
- data/lib/better_html/tokenizer/html_erb.rb +31 -0
- data/lib/better_html/{node_iterator → tokenizer}/html_lodash.rb +30 -34
- data/lib/better_html/tokenizer/javascript_erb.rb +15 -0
- data/lib/better_html/{node_iterator → tokenizer}/location.rb +9 -3
- data/lib/better_html/tokenizer/token.rb +16 -0
- data/lib/better_html/tokenizer/token_array.rb +54 -0
- data/lib/better_html/tree/attribute.rb +31 -0
- data/lib/better_html/tree/attributes_list.rb +25 -0
- data/lib/better_html/tree/tag.rb +39 -0
- data/lib/better_html/version.rb +1 -1
- data/test/better_html/parser_test.rb +279 -0
- data/test/better_html/test_helper/safe_erb_tester_test.rb +11 -0
- data/test/better_html/test_helper/safe_lodash_tester_test.rb +11 -1
- data/test/better_html/tokenizer/html_erb_test.rb +158 -0
- data/test/better_html/tokenizer/html_lodash_test.rb +98 -0
- data/test/better_html/tokenizer/location_test.rb +57 -0
- data/test/better_html/tokenizer/token_array_test.rb +144 -0
- data/test/better_html/tokenizer/token_test.rb +15 -0
- metadata +45 -30
- data/lib/better_html/node_iterator.rb +0 -144
- data/lib/better_html/node_iterator/attribute.rb +0 -34
- data/lib/better_html/node_iterator/base.rb +0 -27
- data/lib/better_html/node_iterator/cdata.rb +0 -8
- data/lib/better_html/node_iterator/comment.rb +0 -8
- data/lib/better_html/node_iterator/content_node.rb +0 -13
- data/lib/better_html/node_iterator/element.rb +0 -26
- data/lib/better_html/node_iterator/html_erb.rb +0 -70
- data/lib/better_html/node_iterator/javascript_erb.rb +0 -55
- data/lib/better_html/node_iterator/text.rb +0 -8
- data/lib/better_html/node_iterator/token.rb +0 -8
- data/lib/better_html/tree.rb +0 -113
- data/test/better_html/node_iterator/html_erb_test.rb +0 -116
- data/test/better_html/node_iterator/html_lodash_test.rb +0 -132
- data/test/better_html/node_iterator/location_test.rb +0 -36
- data/test/better_html/node_iterator_test.rb +0 -221
- data/test/better_html/tree_test.rb +0 -110
@@ -1,144 +0,0 @@
|
|
1
|
-
require_relative 'node_iterator/javascript_erb'
|
2
|
-
require_relative 'node_iterator/html_erb'
|
3
|
-
require_relative 'node_iterator/html_lodash'
|
4
|
-
require_relative 'node_iterator/cdata'
|
5
|
-
require_relative 'node_iterator/comment'
|
6
|
-
require_relative 'node_iterator/element'
|
7
|
-
require_relative 'node_iterator/attribute'
|
8
|
-
require_relative 'node_iterator/text'
|
9
|
-
|
10
|
-
module BetterHtml
|
11
|
-
class NodeIterator
|
12
|
-
attr_reader :nodes, :template_language
|
13
|
-
|
14
|
-
delegate :each, :each_with_index, :[], to: :nodes
|
15
|
-
delegate :parser, to: :@erb, allow_nil: true
|
16
|
-
delegate :errors, to: :parser, allow_nil: true, prefix: true
|
17
|
-
|
18
|
-
def initialize(document, template_language: :html)
|
19
|
-
@document = document
|
20
|
-
@template_language = template_language
|
21
|
-
@erb = case template_language
|
22
|
-
when :html
|
23
|
-
HtmlErb.new(@document)
|
24
|
-
when :lodash
|
25
|
-
HtmlLodash.new(@document)
|
26
|
-
when :javascript
|
27
|
-
JavascriptErb.new(@document)
|
28
|
-
else
|
29
|
-
raise ArgumentError, "template_language can be :html or :javascript"
|
30
|
-
end
|
31
|
-
@nodes = parse!
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def parse!
|
37
|
-
nodes = []
|
38
|
-
tokens = @erb.tokens.dup
|
39
|
-
while token = tokens[0]
|
40
|
-
case token.type
|
41
|
-
when :cdata_start
|
42
|
-
tokens.shift
|
43
|
-
nodes << consume_cdata(tokens)
|
44
|
-
when :comment_start
|
45
|
-
tokens.shift
|
46
|
-
nodes << consume_comment(tokens)
|
47
|
-
when :tag_start
|
48
|
-
tokens.shift
|
49
|
-
nodes << consume_element(tokens)
|
50
|
-
when :text, :stmt, :expr_literal, :expr_escaped
|
51
|
-
nodes << consume_text(tokens)
|
52
|
-
else
|
53
|
-
raise RuntimeError, "Unhandled token #{token.type} line #{token.location.line} column #{token.location.column}"
|
54
|
-
end
|
55
|
-
end
|
56
|
-
nodes
|
57
|
-
end
|
58
|
-
|
59
|
-
def consume_cdata(tokens)
|
60
|
-
node = CData.new
|
61
|
-
while tokens.any? && tokens[0].type != :cdata_end
|
62
|
-
node.content_parts << tokens.shift
|
63
|
-
end
|
64
|
-
tokens.shift if tokens.any? && tokens[0].type == :cdata_end
|
65
|
-
node
|
66
|
-
end
|
67
|
-
|
68
|
-
def consume_comment(tokens)
|
69
|
-
node = Comment.new
|
70
|
-
while tokens.any? && tokens[0].type != :comment_end
|
71
|
-
node.content_parts << tokens.shift
|
72
|
-
end
|
73
|
-
tokens.shift if tokens.any? && tokens[0].type == :comment_end
|
74
|
-
node
|
75
|
-
end
|
76
|
-
|
77
|
-
def consume_element(tokens)
|
78
|
-
node = Element.new
|
79
|
-
if tokens.any? && tokens[0].type == :solidus
|
80
|
-
tokens.shift
|
81
|
-
node.closing = true
|
82
|
-
end
|
83
|
-
while tokens.any? && [:tag_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
84
|
-
node.name_parts << tokens.shift
|
85
|
-
end
|
86
|
-
while tokens.any?
|
87
|
-
token = tokens[0]
|
88
|
-
if token.type == :attribute_name
|
89
|
-
node.attributes << consume_attribute(tokens)
|
90
|
-
elsif token.type == :attribute_quoted_value_start
|
91
|
-
node.attributes << consume_attribute_value(tokens)
|
92
|
-
elsif token.type == :tag_end
|
93
|
-
tokens.shift
|
94
|
-
node.self_closing = token.self_closing
|
95
|
-
break
|
96
|
-
else
|
97
|
-
tokens.shift
|
98
|
-
end
|
99
|
-
end
|
100
|
-
node
|
101
|
-
end
|
102
|
-
|
103
|
-
def consume_attribute(tokens)
|
104
|
-
node = Attribute.new
|
105
|
-
while tokens.any? && [:attribute_name, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
106
|
-
node.name_parts << tokens.shift
|
107
|
-
end
|
108
|
-
return node unless consume_equal?(tokens)
|
109
|
-
while tokens.any? && [
|
110
|
-
:attribute_quoted_value_start, :attribute_quoted_value,
|
111
|
-
:attribute_quoted_value_end, :attribute_unquoted_value,
|
112
|
-
:stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
113
|
-
node.value_parts << tokens.shift
|
114
|
-
end
|
115
|
-
node
|
116
|
-
end
|
117
|
-
|
118
|
-
def consume_attribute_value(tokens)
|
119
|
-
node = Attribute.new
|
120
|
-
while tokens.any? && [
|
121
|
-
:attribute_quoted_value_start, :attribute_quoted_value,
|
122
|
-
:attribute_quoted_value_end, :attribute_unquoted_value,
|
123
|
-
:stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
124
|
-
node.value_parts << tokens.shift
|
125
|
-
end
|
126
|
-
node
|
127
|
-
end
|
128
|
-
|
129
|
-
def consume_equal?(tokens)
|
130
|
-
while tokens.any? && [:whitespace, :equal].include?(tokens[0].type)
|
131
|
-
return true if tokens.shift.type == :equal
|
132
|
-
end
|
133
|
-
false
|
134
|
-
end
|
135
|
-
|
136
|
-
def consume_text(tokens)
|
137
|
-
node = Text.new
|
138
|
-
while tokens.any? && [:text, :stmt, :expr_literal, :expr_escaped].include?(tokens[0].type)
|
139
|
-
node.content_parts << tokens.shift
|
140
|
-
end
|
141
|
-
node
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module BetterHtml
|
4
|
-
class NodeIterator
|
5
|
-
class Attribute < Base
|
6
|
-
tokenized_attribute :name
|
7
|
-
tokenized_attribute :value
|
8
|
-
|
9
|
-
def initialize
|
10
|
-
@name_parts = []
|
11
|
-
@value_parts = []
|
12
|
-
end
|
13
|
-
|
14
|
-
def unescaped_value_parts
|
15
|
-
value_parts.map do |part|
|
16
|
-
next if ["'", '"'].include?(part.text)
|
17
|
-
if [:attribute_quoted_value, :attribute_unquoted_value].include?(part.type)
|
18
|
-
CGI.unescapeHTML(part.text)
|
19
|
-
else
|
20
|
-
part.text
|
21
|
-
end
|
22
|
-
end.compact
|
23
|
-
end
|
24
|
-
|
25
|
-
def unescaped_value
|
26
|
-
unescaped_value_parts.join
|
27
|
-
end
|
28
|
-
|
29
|
-
def value_without_quotes
|
30
|
-
value_parts.map{ |s| ["'", '"'].include?(s.text) ? '' : s.text }.join
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
module BetterHtml
|
2
|
-
class NodeIterator
|
3
|
-
class Base
|
4
|
-
def self.tokenized_attribute(name)
|
5
|
-
class_eval <<~RUBY
|
6
|
-
attr_reader :#{name}_parts
|
7
|
-
|
8
|
-
def #{name}
|
9
|
-
#{name}_parts.map(&:text).join
|
10
|
-
end
|
11
|
-
RUBY
|
12
|
-
end
|
13
|
-
|
14
|
-
def node_type
|
15
|
-
self.class.name.split('::').last.downcase.to_sym
|
16
|
-
end
|
17
|
-
|
18
|
-
%w(text cdata comment element).each do |name|
|
19
|
-
class_eval <<~RUBY
|
20
|
-
def #{name}?
|
21
|
-
node_type == :#{name}
|
22
|
-
end
|
23
|
-
RUBY
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require_relative 'base'
|
2
|
-
|
3
|
-
module BetterHtml
|
4
|
-
class NodeIterator
|
5
|
-
class Element < Base
|
6
|
-
tokenized_attribute :name
|
7
|
-
attr_reader :attributes
|
8
|
-
attr_accessor :closing, :self_closing
|
9
|
-
alias_method :closing?, :closing
|
10
|
-
alias_method :self_closing?, :self_closing
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@name_parts = []
|
14
|
-
@attributes = []
|
15
|
-
end
|
16
|
-
|
17
|
-
def find_attr(wanted)
|
18
|
-
@attributes.each do |attribute|
|
19
|
-
return attribute if attribute.name == wanted
|
20
|
-
end
|
21
|
-
nil
|
22
|
-
end
|
23
|
-
alias_method :[], :find_attr
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
require 'erubi'
|
2
|
-
require 'html_tokenizer'
|
3
|
-
require_relative 'token'
|
4
|
-
require_relative 'location'
|
5
|
-
|
6
|
-
module BetterHtml
|
7
|
-
class NodeIterator
|
8
|
-
class HtmlErb < ::Erubi::Engine
|
9
|
-
attr_reader :tokens
|
10
|
-
attr_reader :parser
|
11
|
-
|
12
|
-
REGEXP_WITHOUT_TRIM = /<%(={1,2}|-|%)?(.*?)(?:[-=])?()?%>([ \t]*\r?\n)?/m
|
13
|
-
|
14
|
-
def initialize(document)
|
15
|
-
@parser = HtmlTokenizer::Parser.new
|
16
|
-
@tokens = []
|
17
|
-
@document = document
|
18
|
-
super(document, regexp: REGEXP_WITHOUT_TRIM, trim: false)
|
19
|
-
end
|
20
|
-
|
21
|
-
def add_text(text)
|
22
|
-
@parser.parse(text) { |*args| add_tokens(*args) }
|
23
|
-
end
|
24
|
-
|
25
|
-
def add_code(code)
|
26
|
-
text = "<%#{code}%>"
|
27
|
-
start = @parser.document_length
|
28
|
-
stop = start + text.size
|
29
|
-
@tokens << Token.new(
|
30
|
-
type: :stmt,
|
31
|
-
code: code,
|
32
|
-
text: text,
|
33
|
-
location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
|
34
|
-
code_location: Location.new(@document, start+2, stop-2, @parser.line_number, @parser.column_number+2)
|
35
|
-
)
|
36
|
-
@parser.append_placeholder(text)
|
37
|
-
end
|
38
|
-
|
39
|
-
def add_expression(indicator, code)
|
40
|
-
text = "<%#{indicator}#{code}%>"
|
41
|
-
start = @parser.document_length
|
42
|
-
stop = start + text.size
|
43
|
-
@tokens << Token.new(
|
44
|
-
type: indicator == '=' ? :expr_literal : :expr_escaped,
|
45
|
-
code: code,
|
46
|
-
text: text,
|
47
|
-
location: Location.new(@document, start, stop, @parser.line_number, @parser.column_number),
|
48
|
-
code_location: Location.new(@document, start+2+indicator.size, stop-2, @parser.line_number, @parser.column_number+2+indicator.size)
|
49
|
-
)
|
50
|
-
@parser.append_placeholder(text)
|
51
|
-
end
|
52
|
-
|
53
|
-
private
|
54
|
-
|
55
|
-
def add_tokens(type, start, stop, line, column)
|
56
|
-
extra_attributes = if type == :tag_end
|
57
|
-
{
|
58
|
-
self_closing: @parser.self_closing_tag?
|
59
|
-
}
|
60
|
-
end
|
61
|
-
@tokens << Token.new(
|
62
|
-
type: type,
|
63
|
-
text: @parser.extract(start, stop),
|
64
|
-
location: Location.new(@document, start, stop, line, column),
|
65
|
-
**(extra_attributes || {})
|
66
|
-
)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
@@ -1,55 +0,0 @@
|
|
1
|
-
require 'erubi'
|
2
|
-
require_relative 'token'
|
3
|
-
require_relative 'location'
|
4
|
-
|
5
|
-
module BetterHtml
|
6
|
-
class NodeIterator
|
7
|
-
class JavascriptErb < ::Erubi::Engine
|
8
|
-
attr_reader :tokens
|
9
|
-
|
10
|
-
def initialize(source)
|
11
|
-
@source = source
|
12
|
-
@parsed_document = ""
|
13
|
-
@tokens = []
|
14
|
-
super(source, regexp: HtmlErb::REGEXP_WITHOUT_TRIM, trim: false)
|
15
|
-
end
|
16
|
-
|
17
|
-
def add_text(text)
|
18
|
-
add_token(:text, text)
|
19
|
-
append(text)
|
20
|
-
end
|
21
|
-
|
22
|
-
def add_code(code)
|
23
|
-
text = "<%#{code}%>"
|
24
|
-
add_token(:stmt, text, code)
|
25
|
-
append(text)
|
26
|
-
end
|
27
|
-
|
28
|
-
def add_expression(indicator, code)
|
29
|
-
text = "<%#{indicator}#{code}%>"
|
30
|
-
add_token(indicator == '=' ? :expr_literal : :expr_escaped, text, code)
|
31
|
-
append(text)
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def add_token(type, text, code = nil)
|
37
|
-
start = @parsed_document.size
|
38
|
-
stop = start + text.size
|
39
|
-
lines = @parsed_document.split("\n", -1)
|
40
|
-
line = lines.empty? ? 1 : lines.size
|
41
|
-
column = lines.empty? ? 0 : lines.last.size
|
42
|
-
@tokens << Token.new(
|
43
|
-
type: type,
|
44
|
-
text: text,
|
45
|
-
code: code,
|
46
|
-
location: Location.new(@source, start, stop, line, column)
|
47
|
-
)
|
48
|
-
end
|
49
|
-
|
50
|
-
def append(text)
|
51
|
-
@parsed_document << text
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
data/lib/better_html/tree.rb
DELETED
@@ -1,113 +0,0 @@
|
|
1
|
-
require 'better_html/node_iterator'
|
2
|
-
|
3
|
-
module BetterHtml
|
4
|
-
class Tree
|
5
|
-
attr_reader :errors
|
6
|
-
attr_reader :root
|
7
|
-
|
8
|
-
cattr_accessor :void_elements
|
9
|
-
self.void_elements = %w(area base br col embed hr img
|
10
|
-
input keygen link menuitem meta param source track wbr)
|
11
|
-
|
12
|
-
def initialize(data, **options)
|
13
|
-
@data = data
|
14
|
-
@errors = Errors.new
|
15
|
-
@nodes = BetterHtml::NodeIterator.new(data, **options.slice(:template_language))
|
16
|
-
@root = TreeRoot.new
|
17
|
-
construct!
|
18
|
-
@nodes.parser_errors&.each do |error|
|
19
|
-
@errors.add(error)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
class TreeError < HtmlError
|
26
|
-
attr_reader :token
|
27
|
-
|
28
|
-
def initialize(token, message)
|
29
|
-
@token = token
|
30
|
-
super(message)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def add_error(token, message)
|
35
|
-
@errors.add(TreeError.new(token, message))
|
36
|
-
end
|
37
|
-
|
38
|
-
def construct!
|
39
|
-
current = @root
|
40
|
-
@nodes.each do |node|
|
41
|
-
case node.node_type
|
42
|
-
when :text, :comment, :cdata
|
43
|
-
current << node
|
44
|
-
when :element
|
45
|
-
if node.closing?
|
46
|
-
if void_elements.include?(node.name)
|
47
|
-
add_error(node.name_parts.first,
|
48
|
-
"end of tag for void element: </#{node.name}>")
|
49
|
-
elsif current.root?
|
50
|
-
add_error(node.name_parts.first,
|
51
|
-
"mismatched </#{node.name}> at root of tree")
|
52
|
-
else
|
53
|
-
if node.name == current.name
|
54
|
-
current.end_node = node
|
55
|
-
current = current.parent
|
56
|
-
else
|
57
|
-
add_error(node.name_parts.first,
|
58
|
-
"mismatched </#{node.name}> in <#{current.name}> element")
|
59
|
-
end
|
60
|
-
end
|
61
|
-
else
|
62
|
-
element = Element.new(parent: current, start_node: node)
|
63
|
-
current << element
|
64
|
-
current = element unless element.closed?
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
class NodeContainer
|
71
|
-
attr_accessor :content_nodes
|
72
|
-
delegate :each, :[], :each_with_index, :<<, :push,
|
73
|
-
:size, :empty?, :any?, to: :content_nodes
|
74
|
-
|
75
|
-
def root?
|
76
|
-
false
|
77
|
-
end
|
78
|
-
|
79
|
-
def initialize
|
80
|
-
@content_nodes = []
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
class TreeRoot < NodeContainer
|
85
|
-
def root?
|
86
|
-
true
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
class Element < NodeContainer
|
91
|
-
attr_reader :parent
|
92
|
-
attr_accessor :start_node
|
93
|
-
attr_accessor :end_node
|
94
|
-
|
95
|
-
delegate :name, :attributes, :self_closing?, to: :start_node
|
96
|
-
delegate :element?, :text?, :comment?, :cdata?, to: :start_node
|
97
|
-
|
98
|
-
def initialize(parent:, start_node:)
|
99
|
-
super()
|
100
|
-
@parent = parent
|
101
|
-
@start_node = start_node
|
102
|
-
end
|
103
|
-
|
104
|
-
def closed?
|
105
|
-
void? || end_node.present? || self_closing?
|
106
|
-
end
|
107
|
-
|
108
|
-
def void?
|
109
|
-
BetterHtml::Tree.void_elements.include?(name)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|