html_tokenizer 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.autotest +3 -0
- data/.gitignore +35 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +24 -0
- data/LICENSE +21 -0
- data/Manifest.txt +8 -0
- data/README.md +2 -0
- data/Rakefile +20 -0
- data/bin/html_tokenizer +3 -0
- data/ext/html_tokenizer_ext/extconf.rb +6 -0
- data/ext/html_tokenizer_ext/html_tokenizer.c +12 -0
- data/ext/html_tokenizer_ext/html_tokenizer.h +7 -0
- data/ext/html_tokenizer_ext/parser.c +767 -0
- data/ext/html_tokenizer_ext/parser.h +87 -0
- data/ext/html_tokenizer_ext/tokenizer.c +682 -0
- data/ext/html_tokenizer_ext/tokenizer.h +74 -0
- data/html_tokenizer.gemspec +19 -0
- data/lib/html_tokenizer.rb +12 -0
- data/test/unit/parser_test.rb +575 -0
- data/test/unit/tokenizer_test.rb +337 -0
- metadata +109 -0
@@ -0,0 +1,337 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "html_tokenizer"
|
3
|
+
|
4
|
+
class HtmlTokenizer::TokenizerTest < Minitest::Test
|
5
|
+
def test_closing_tag_without_start_is_text
|
6
|
+
assert_equal [
|
7
|
+
[:text, ">"],
|
8
|
+
], tokenize(">")
|
9
|
+
assert_equal [
|
10
|
+
[:tag_start, "<"], [:tag_name, "foo"], [:tag_end, ">"], [:text, ">"],
|
11
|
+
], tokenize("<foo>>")
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_tokenize_text
|
15
|
+
result = tokenize("\n hello world\n ")
|
16
|
+
assert_equal [[:text, "\n hello world\n "]], result
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_namespace_tag_name_multipart
|
20
|
+
assert_equal [
|
21
|
+
[:tag_start, "<"], [:tag_name, "foo:"], [:tag_name, "bar"],
|
22
|
+
], tokenize("<foo:", "bar")
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_tokenize_doctype
|
26
|
+
assert_equal [
|
27
|
+
[:tag_start, "<"], [:tag_name, "!DOCTYPE"], [:whitespace, " "],
|
28
|
+
[:attribute_name, "html"], [:tag_end, ">"]
|
29
|
+
], tokenize("<!DOCTYPE html>")
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_tokenize_multiple_elements
|
33
|
+
assert_equal [
|
34
|
+
[:tag_start, "<"], [:tag_name, "div"], [:tag_end, ">"],
|
35
|
+
[:text, " bla "],
|
36
|
+
[:tag_start, "<"], [:tag_name, "strong"], [:tag_end, ">"]
|
37
|
+
], tokenize("<div> bla <strong>")
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_tokenize_complex_doctype
|
41
|
+
text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
|
42
|
+
assert_equal [
|
43
|
+
[:tag_start, "<"], [:tag_name, "!DOCTYPE"], [:whitespace, " "],
|
44
|
+
[:attribute_name, "html"], [:whitespace, " "],
|
45
|
+
[:attribute_name, "PUBLIC"], [:whitespace, " "],
|
46
|
+
[:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "-//W3C//DTD XHTML 1.0 Transitional//EN"], [:attribute_quoted_value_end, "\""],
|
47
|
+
[:whitespace, " "],
|
48
|
+
[:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"], [:attribute_quoted_value_end, "\""],
|
49
|
+
[:tag_end, ">"]
|
50
|
+
], tokenize(text)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_tokenize_html_comment
|
54
|
+
result = tokenize("<!-- COMMENT -->")
|
55
|
+
assert_equal [[:comment_start, "<!--"], [:text, " COMMENT "], [:comment_end, "-->"]], result
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_tokenize_comment_with_newlines
|
59
|
+
result = tokenize <<-EOF
|
60
|
+
<!-- debug: <%== @unsafe %> -->
|
61
|
+
EOF
|
62
|
+
|
63
|
+
assert_equal [
|
64
|
+
[:text, " "], [:comment_start, "<!--"],
|
65
|
+
[:text, " debug: <%== @unsafe %> "],
|
66
|
+
[:comment_end, "-->"], [:text, "\n"]
|
67
|
+
], result
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_tokenize_cdata_section
|
71
|
+
result = tokenize("<![CDATA[ bla bla <!&@#> foo ]]>")
|
72
|
+
assert_equal [[:cdata_start, "<![CDATA["], [:text, " bla bla <!&@#> foo "], [:cdata_end, "]]>"]], result
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_tokenizer_cdata_regression
|
76
|
+
result = tokenize("<![CDATA[ foo ", " baz ]]>")
|
77
|
+
assert_equal [[:cdata_start, "<![CDATA["],
|
78
|
+
[:text, " foo "], [:text, " baz "], [:cdata_end, "]]>"]], result
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_tokenizer_comment_regression
|
82
|
+
result = tokenize("<!-- foo ", " baz -->")
|
83
|
+
assert_equal [[:comment_start, "<!--"],
|
84
|
+
[:text, " foo "], [:text, " baz "], [:comment_end, "-->"]], result
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_tokenizer_parse_tag_after_comment_regression
|
88
|
+
result = tokenize("<!-- foo --> <li>")
|
89
|
+
assert_equal [[:comment_start, "<!--"], [:text, " foo "], [:comment_end, "-->"],
|
90
|
+
[:text, " "], [:tag_start, "<"], [:tag_name, "li"], [:tag_end, ">"]], result
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_tokenize_basic_tag
|
94
|
+
result = tokenize("<div>")
|
95
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "div"], [:tag_end, ">"]], result
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_tokenize_namespaced_tag
|
99
|
+
result = tokenize("<ns:foo>")
|
100
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "ns:foo"], [:tag_end, ">"]], result
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_tokenize_tag_with_lt
|
104
|
+
result = tokenize("<a<b>")
|
105
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "a<b"], [:tag_end, ">"]], result
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_tokenize_tag_multipart_name
|
109
|
+
result = tokenize("<d", "iv", ">")
|
110
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "d"], [:tag_name, "iv"], [:tag_end, ">"]], result
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_tokenize_tag_name_ending_with_slash
|
114
|
+
result = tokenize("<div/1>")
|
115
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "div"], [:solidus, "/"], [:attribute_name, "1"], [:tag_end, ">"]], result
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_tokenize_empty_tag
|
119
|
+
result = tokenize("<>")
|
120
|
+
assert_equal [[:tag_start, "<"], [:tag_end, ">"]], result
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_tokenize_tag_with_solidus
|
124
|
+
result = tokenize("</>")
|
125
|
+
assert_equal [[:tag_start, "<"], [:solidus, "/"], [:tag_end, ">"]], result
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_tokenize_end_tag
|
129
|
+
result = tokenize("</div>")
|
130
|
+
assert_equal [[:tag_start, "<"], [:solidus, "/"], [:tag_name, "div"], [:tag_end, ">"]], result
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_tokenize_tag_attribute_with_double_quote
|
134
|
+
result = tokenize('<div foo="bar">')
|
135
|
+
assert_equal [
|
136
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
137
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "\""],
|
138
|
+
[:tag_end, ">"]
|
139
|
+
], result
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_tokenize_unquoted_attributes_separated_with_solidus
|
143
|
+
result = tokenize('<div foo=1/bar=2>')
|
144
|
+
assert_equal [
|
145
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
146
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "1/bar=2"],
|
147
|
+
[:tag_end, ">"]
|
148
|
+
], result
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_tokenize_quoted_attributes_separated_with_solidus
|
152
|
+
result = tokenize('<div foo="1"/bar="2">')
|
153
|
+
assert_equal [
|
154
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
155
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "1"], [:attribute_quoted_value_end, "\""],
|
156
|
+
[:solidus, "/"],
|
157
|
+
[:attribute_name, "bar"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "2"], [:attribute_quoted_value_end, "\""],
|
158
|
+
[:tag_end, ">"]
|
159
|
+
], result
|
160
|
+
end
|
161
|
+
|
162
|
+
def test_tokenize_tag_attribute_without_space
|
163
|
+
result = tokenize('<div foo="bar"baz>')
|
164
|
+
assert_equal [
|
165
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
166
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "\""],
|
167
|
+
[:attribute_name, "baz"],
|
168
|
+
[:tag_end, ">"]
|
169
|
+
], result
|
170
|
+
end
|
171
|
+
|
172
|
+
def test_tokenize_multipart_unquoted_attribute
|
173
|
+
result = tokenize('<div foo=', 'bar', 'baz>')
|
174
|
+
assert_equal [
|
175
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
176
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "bar"],
|
177
|
+
[:attribute_unquoted_value, "baz"], [:tag_end, ">"]
|
178
|
+
], result
|
179
|
+
end
|
180
|
+
|
181
|
+
def test_tokenize_quoted_attribute_separately
|
182
|
+
result = tokenize('<div foo=', "'bar'", '>')
|
183
|
+
assert_equal [
|
184
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
185
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "'"], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "'"],
|
186
|
+
[:tag_end, ">"]
|
187
|
+
], result
|
188
|
+
end
|
189
|
+
|
190
|
+
def test_tokenize_quoted_attribute_in_multiple_parts
|
191
|
+
result = tokenize('<div foo=', "'bar", "baz'", '>')
|
192
|
+
assert_equal [
|
193
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
194
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "'"], [:attribute_quoted_value, "bar"], [:attribute_quoted_value, "baz"], [:attribute_quoted_value_end, "'"],
|
195
|
+
[:tag_end, ">"]
|
196
|
+
], result
|
197
|
+
end
|
198
|
+
|
199
|
+
def test_tokenize_tag_attribute_with_single_quote
|
200
|
+
result = tokenize("<div foo='bar'>")
|
201
|
+
assert_equal [
|
202
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
203
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "'"], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "'"],
|
204
|
+
[:tag_end, ">"]
|
205
|
+
], result
|
206
|
+
end
|
207
|
+
|
208
|
+
def test_tokenize_tag_attribute_with_no_quotes
|
209
|
+
result = tokenize("<div foo=bla bar=blo>")
|
210
|
+
assert_equal [
|
211
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
212
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "bla"], [:whitespace, " "],
|
213
|
+
[:attribute_name, "bar"], [:equal, "="], [:attribute_unquoted_value, "blo"],
|
214
|
+
[:tag_end, ">"]
|
215
|
+
], result
|
216
|
+
end
|
217
|
+
|
218
|
+
def test_tokenize_double_equals
|
219
|
+
result = tokenize("<div foo=blabar=blo>")
|
220
|
+
assert_equal [
|
221
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
222
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "blabar=blo"],
|
223
|
+
[:tag_end, ">"]
|
224
|
+
], result
|
225
|
+
end
|
226
|
+
|
227
|
+
def test_tokenize_closing_tag
|
228
|
+
result = tokenize('<div foo="bar" />')
|
229
|
+
assert_equal [
|
230
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
231
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "\""], [:whitespace, " "],
|
232
|
+
[:solidus, "/"], [:tag_end, ">"]
|
233
|
+
], result
|
234
|
+
end
|
235
|
+
|
236
|
+
def test_tokenize_script_tag
|
237
|
+
result = tokenize('<script>foo <b> bar</script>')
|
238
|
+
assert_equal [
|
239
|
+
[:tag_start, "<"], [:tag_name, "script"], [:tag_end, ">"],
|
240
|
+
[:text, "foo "], [:text, "<b"], [:text, "> bar"],
|
241
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "script"], [:tag_end, ">"],
|
242
|
+
], result
|
243
|
+
end
|
244
|
+
|
245
|
+
def test_tokenize_textarea_tag
|
246
|
+
result = tokenize('<textarea>hello</textarea>')
|
247
|
+
assert_equal [
|
248
|
+
[:tag_start, "<"], [:tag_name, "textarea"], [:tag_end, ">"],
|
249
|
+
[:text, "hello"],
|
250
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "textarea"], [:tag_end, ">"],
|
251
|
+
], result
|
252
|
+
end
|
253
|
+
|
254
|
+
def test_tokenize_style_tag
|
255
|
+
result = tokenize('<style></div></style>')
|
256
|
+
assert_equal [
|
257
|
+
[:tag_start, "<"], [:tag_name, "style"], [:tag_end, ">"],
|
258
|
+
[:text, "</div"], [:text, ">"],
|
259
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "style"], [:tag_end, ">"],
|
260
|
+
], result
|
261
|
+
end
|
262
|
+
|
263
|
+
def test_tokenize_script_containing_html
|
264
|
+
result = tokenize('<script type="text/html">foo <b> bar</script>')
|
265
|
+
assert_equal [
|
266
|
+
[:tag_start, "<"], [:tag_name, "script"], [:whitespace, " "],
|
267
|
+
[:attribute_name, "type"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "text/html"], [:attribute_quoted_value_end, "\""],
|
268
|
+
[:tag_end, ">"],
|
269
|
+
[:text, "foo "], [:text, "<b"], [:text, "> bar"],
|
270
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "script"], [:tag_end, ">"],
|
271
|
+
], result
|
272
|
+
end
|
273
|
+
|
274
|
+
def test_end_of_tag_on_newline
|
275
|
+
data = ["\
|
276
|
+
<div define=\"{credential_96_credential1: new Shopify.ProviderCredentials()}\"
|
277
|
+
", "", ">"]
|
278
|
+
result = tokenize(*data)
|
279
|
+
assert_equal [
|
280
|
+
[:text, " "],
|
281
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "], [:attribute_name, "define"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "{credential_96_credential1: new Shopify.ProviderCredentials()}"], [:attribute_quoted_value_end, "\""],
|
282
|
+
[:whitespace, "\n "], [:tag_end, ">"]
|
283
|
+
], result
|
284
|
+
end
|
285
|
+
|
286
|
+
def test_tokenize_multi_part_attribute_name
|
287
|
+
result = tokenize('<div data-', 'shipping', '-type>')
|
288
|
+
assert_equal [
|
289
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
290
|
+
[:attribute_name, "data-"], [:attribute_name, "shipping"], [:attribute_name, "-type"],
|
291
|
+
[:tag_end, ">"],
|
292
|
+
], result
|
293
|
+
end
|
294
|
+
|
295
|
+
def test_tokenize_attribute_name_with_space_before_equal
|
296
|
+
result = tokenize('<a href ="http://www.cra-arc.gc.ca/tx/bsnss/tpcs/gst-tps/menu-eng.html">GST/HST</a>')
|
297
|
+
assert_equal [
|
298
|
+
[:tag_start, "<"], [:tag_name, "a"], [:whitespace, " "],
|
299
|
+
[:attribute_name, "href"], [:whitespace, " "], [:equal, "="],
|
300
|
+
[:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "http://www.cra-arc.gc.ca/tx/bsnss/tpcs/gst-tps/menu-eng.html"], [:attribute_quoted_value_end, "\""],
|
301
|
+
[:tag_end, ">"], [:text, "GST/HST"],
|
302
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "a"], [:tag_end, ">"]
|
303
|
+
], result
|
304
|
+
end
|
305
|
+
|
306
|
+
def test_raise_in_block
|
307
|
+
@tokenizer = HtmlTokenizer::Tokenizer.new
|
308
|
+
10.times do
|
309
|
+
e = assert_raises(RuntimeError) do
|
310
|
+
@tokenizer.tokenize("<>") do |part|
|
311
|
+
raise RuntimeError, "something went wrong"
|
312
|
+
end
|
313
|
+
end
|
314
|
+
assert_equal "something went wrong", e.message
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
def test_tokenize_end_of_script_regression
|
319
|
+
result = tokenize("<script><</script>")
|
320
|
+
assert_equal [
|
321
|
+
[:tag_start, "<"], [:tag_name, "script"], [:tag_end, ">"],
|
322
|
+
[:text, "<"],
|
323
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "script"], [:tag_end, ">"]
|
324
|
+
], result
|
325
|
+
end
|
326
|
+
|
327
|
+
private
|
328
|
+
|
329
|
+
def tokenize(*parts)
|
330
|
+
tokens = []
|
331
|
+
@tokenizer = HtmlTokenizer::Tokenizer.new
|
332
|
+
parts.each do |part|
|
333
|
+
@tokenizer.tokenize(part) { |name, start, stop| tokens << [name, part[start..(stop-1)]] }
|
334
|
+
end
|
335
|
+
tokens
|
336
|
+
end
|
337
|
+
end
|
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: html_tokenizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Francois Chagnon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-10-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake-compiler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
executables:
|
58
|
+
- html_tokenizer
|
59
|
+
extensions:
|
60
|
+
- ext/html_tokenizer_ext/extconf.rb
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- .autotest
|
64
|
+
- .gitignore
|
65
|
+
- Gemfile
|
66
|
+
- Gemfile.lock
|
67
|
+
- LICENSE
|
68
|
+
- Manifest.txt
|
69
|
+
- README.md
|
70
|
+
- Rakefile
|
71
|
+
- bin/html_tokenizer
|
72
|
+
- ext/html_tokenizer_ext/extconf.rb
|
73
|
+
- ext/html_tokenizer_ext/html_tokenizer.c
|
74
|
+
- ext/html_tokenizer_ext/html_tokenizer.h
|
75
|
+
- ext/html_tokenizer_ext/parser.c
|
76
|
+
- ext/html_tokenizer_ext/parser.h
|
77
|
+
- ext/html_tokenizer_ext/tokenizer.c
|
78
|
+
- ext/html_tokenizer_ext/tokenizer.h
|
79
|
+
- html_tokenizer.gemspec
|
80
|
+
- lib/html_tokenizer.rb
|
81
|
+
- test/unit/parser_test.rb
|
82
|
+
- test/unit/tokenizer_test.rb
|
83
|
+
homepage:
|
84
|
+
licenses: []
|
85
|
+
metadata: {}
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
- ext
|
91
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 2.0.14.1
|
104
|
+
signing_key:
|
105
|
+
specification_version: 4
|
106
|
+
summary: HTML Tokenizer
|
107
|
+
test_files:
|
108
|
+
- test/unit/parser_test.rb
|
109
|
+
- test/unit/tokenizer_test.rb
|