html_tokenizer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.autotest +3 -0
- data/.gitignore +35 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +24 -0
- data/LICENSE +21 -0
- data/Manifest.txt +8 -0
- data/README.md +2 -0
- data/Rakefile +20 -0
- data/bin/html_tokenizer +3 -0
- data/ext/html_tokenizer_ext/extconf.rb +6 -0
- data/ext/html_tokenizer_ext/html_tokenizer.c +12 -0
- data/ext/html_tokenizer_ext/html_tokenizer.h +7 -0
- data/ext/html_tokenizer_ext/parser.c +767 -0
- data/ext/html_tokenizer_ext/parser.h +87 -0
- data/ext/html_tokenizer_ext/tokenizer.c +682 -0
- data/ext/html_tokenizer_ext/tokenizer.h +74 -0
- data/html_tokenizer.gemspec +19 -0
- data/lib/html_tokenizer.rb +12 -0
- data/test/unit/parser_test.rb +575 -0
- data/test/unit/tokenizer_test.rb +337 -0
- metadata +109 -0
@@ -0,0 +1,337 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "html_tokenizer"
|
3
|
+
|
4
|
+
class HtmlTokenizer::TokenizerTest < Minitest::Test
|
5
|
+
def test_closing_tag_without_start_is_text
|
6
|
+
assert_equal [
|
7
|
+
[:text, ">"],
|
8
|
+
], tokenize(">")
|
9
|
+
assert_equal [
|
10
|
+
[:tag_start, "<"], [:tag_name, "foo"], [:tag_end, ">"], [:text, ">"],
|
11
|
+
], tokenize("<foo>>")
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_tokenize_text
|
15
|
+
result = tokenize("\n hello world\n ")
|
16
|
+
assert_equal [[:text, "\n hello world\n "]], result
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_namespace_tag_name_multipart
|
20
|
+
assert_equal [
|
21
|
+
[:tag_start, "<"], [:tag_name, "foo:"], [:tag_name, "bar"],
|
22
|
+
], tokenize("<foo:", "bar")
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_tokenize_doctype
|
26
|
+
assert_equal [
|
27
|
+
[:tag_start, "<"], [:tag_name, "!DOCTYPE"], [:whitespace, " "],
|
28
|
+
[:attribute_name, "html"], [:tag_end, ">"]
|
29
|
+
], tokenize("<!DOCTYPE html>")
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_tokenize_multiple_elements
|
33
|
+
assert_equal [
|
34
|
+
[:tag_start, "<"], [:tag_name, "div"], [:tag_end, ">"],
|
35
|
+
[:text, " bla "],
|
36
|
+
[:tag_start, "<"], [:tag_name, "strong"], [:tag_end, ">"]
|
37
|
+
], tokenize("<div> bla <strong>")
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_tokenize_complex_doctype
|
41
|
+
text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
|
42
|
+
assert_equal [
|
43
|
+
[:tag_start, "<"], [:tag_name, "!DOCTYPE"], [:whitespace, " "],
|
44
|
+
[:attribute_name, "html"], [:whitespace, " "],
|
45
|
+
[:attribute_name, "PUBLIC"], [:whitespace, " "],
|
46
|
+
[:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "-//W3C//DTD XHTML 1.0 Transitional//EN"], [:attribute_quoted_value_end, "\""],
|
47
|
+
[:whitespace, " "],
|
48
|
+
[:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"], [:attribute_quoted_value_end, "\""],
|
49
|
+
[:tag_end, ">"]
|
50
|
+
], tokenize(text)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_tokenize_html_comment
|
54
|
+
result = tokenize("<!-- COMMENT -->")
|
55
|
+
assert_equal [[:comment_start, "<!--"], [:text, " COMMENT "], [:comment_end, "-->"]], result
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_tokenize_comment_with_newlines
|
59
|
+
result = tokenize <<-EOF
|
60
|
+
<!-- debug: <%== @unsafe %> -->
|
61
|
+
EOF
|
62
|
+
|
63
|
+
assert_equal [
|
64
|
+
[:text, " "], [:comment_start, "<!--"],
|
65
|
+
[:text, " debug: <%== @unsafe %> "],
|
66
|
+
[:comment_end, "-->"], [:text, "\n"]
|
67
|
+
], result
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_tokenize_cdata_section
|
71
|
+
result = tokenize("<![CDATA[ bla bla <!&@#> foo ]]>")
|
72
|
+
assert_equal [[:cdata_start, "<![CDATA["], [:text, " bla bla <!&@#> foo "], [:cdata_end, "]]>"]], result
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_tokenizer_cdata_regression
|
76
|
+
result = tokenize("<![CDATA[ foo ", " baz ]]>")
|
77
|
+
assert_equal [[:cdata_start, "<![CDATA["],
|
78
|
+
[:text, " foo "], [:text, " baz "], [:cdata_end, "]]>"]], result
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_tokenizer_comment_regression
|
82
|
+
result = tokenize("<!-- foo ", " baz -->")
|
83
|
+
assert_equal [[:comment_start, "<!--"],
|
84
|
+
[:text, " foo "], [:text, " baz "], [:comment_end, "-->"]], result
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_tokenizer_parse_tag_after_comment_regression
|
88
|
+
result = tokenize("<!-- foo --> <li>")
|
89
|
+
assert_equal [[:comment_start, "<!--"], [:text, " foo "], [:comment_end, "-->"],
|
90
|
+
[:text, " "], [:tag_start, "<"], [:tag_name, "li"], [:tag_end, ">"]], result
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_tokenize_basic_tag
|
94
|
+
result = tokenize("<div>")
|
95
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "div"], [:tag_end, ">"]], result
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_tokenize_namespaced_tag
|
99
|
+
result = tokenize("<ns:foo>")
|
100
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "ns:foo"], [:tag_end, ">"]], result
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_tokenize_tag_with_lt
|
104
|
+
result = tokenize("<a<b>")
|
105
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "a<b"], [:tag_end, ">"]], result
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_tokenize_tag_multipart_name
|
109
|
+
result = tokenize("<d", "iv", ">")
|
110
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "d"], [:tag_name, "iv"], [:tag_end, ">"]], result
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_tokenize_tag_name_ending_with_slash
|
114
|
+
result = tokenize("<div/1>")
|
115
|
+
assert_equal [[:tag_start, "<"], [:tag_name, "div"], [:solidus, "/"], [:attribute_name, "1"], [:tag_end, ">"]], result
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_tokenize_empty_tag
|
119
|
+
result = tokenize("<>")
|
120
|
+
assert_equal [[:tag_start, "<"], [:tag_end, ">"]], result
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_tokenize_tag_with_solidus
|
124
|
+
result = tokenize("</>")
|
125
|
+
assert_equal [[:tag_start, "<"], [:solidus, "/"], [:tag_end, ">"]], result
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_tokenize_end_tag
|
129
|
+
result = tokenize("</div>")
|
130
|
+
assert_equal [[:tag_start, "<"], [:solidus, "/"], [:tag_name, "div"], [:tag_end, ">"]], result
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_tokenize_tag_attribute_with_double_quote
|
134
|
+
result = tokenize('<div foo="bar">')
|
135
|
+
assert_equal [
|
136
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
137
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "\""],
|
138
|
+
[:tag_end, ">"]
|
139
|
+
], result
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_tokenize_unquoted_attributes_separated_with_solidus
|
143
|
+
result = tokenize('<div foo=1/bar=2>')
|
144
|
+
assert_equal [
|
145
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
146
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "1/bar=2"],
|
147
|
+
[:tag_end, ">"]
|
148
|
+
], result
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_tokenize_quoted_attributes_separated_with_solidus
|
152
|
+
result = tokenize('<div foo="1"/bar="2">')
|
153
|
+
assert_equal [
|
154
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
155
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "1"], [:attribute_quoted_value_end, "\""],
|
156
|
+
[:solidus, "/"],
|
157
|
+
[:attribute_name, "bar"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "2"], [:attribute_quoted_value_end, "\""],
|
158
|
+
[:tag_end, ">"]
|
159
|
+
], result
|
160
|
+
end
|
161
|
+
|
162
|
+
def test_tokenize_tag_attribute_without_space
|
163
|
+
result = tokenize('<div foo="bar"baz>')
|
164
|
+
assert_equal [
|
165
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
166
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "\""],
|
167
|
+
[:attribute_name, "baz"],
|
168
|
+
[:tag_end, ">"]
|
169
|
+
], result
|
170
|
+
end
|
171
|
+
|
172
|
+
def test_tokenize_multipart_unquoted_attribute
|
173
|
+
result = tokenize('<div foo=', 'bar', 'baz>')
|
174
|
+
assert_equal [
|
175
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
176
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "bar"],
|
177
|
+
[:attribute_unquoted_value, "baz"], [:tag_end, ">"]
|
178
|
+
], result
|
179
|
+
end
|
180
|
+
|
181
|
+
def test_tokenize_quoted_attribute_separately
|
182
|
+
result = tokenize('<div foo=', "'bar'", '>')
|
183
|
+
assert_equal [
|
184
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
185
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "'"], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "'"],
|
186
|
+
[:tag_end, ">"]
|
187
|
+
], result
|
188
|
+
end
|
189
|
+
|
190
|
+
def test_tokenize_quoted_attribute_in_multiple_parts
|
191
|
+
result = tokenize('<div foo=', "'bar", "baz'", '>')
|
192
|
+
assert_equal [
|
193
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
194
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "'"], [:attribute_quoted_value, "bar"], [:attribute_quoted_value, "baz"], [:attribute_quoted_value_end, "'"],
|
195
|
+
[:tag_end, ">"]
|
196
|
+
], result
|
197
|
+
end
|
198
|
+
|
199
|
+
def test_tokenize_tag_attribute_with_single_quote
|
200
|
+
result = tokenize("<div foo='bar'>")
|
201
|
+
assert_equal [
|
202
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
203
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "'"], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "'"],
|
204
|
+
[:tag_end, ">"]
|
205
|
+
], result
|
206
|
+
end
|
207
|
+
|
208
|
+
def test_tokenize_tag_attribute_with_no_quotes
|
209
|
+
result = tokenize("<div foo=bla bar=blo>")
|
210
|
+
assert_equal [
|
211
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
212
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "bla"], [:whitespace, " "],
|
213
|
+
[:attribute_name, "bar"], [:equal, "="], [:attribute_unquoted_value, "blo"],
|
214
|
+
[:tag_end, ">"]
|
215
|
+
], result
|
216
|
+
end
|
217
|
+
|
218
|
+
def test_tokenize_double_equals
|
219
|
+
result = tokenize("<div foo=blabar=blo>")
|
220
|
+
assert_equal [
|
221
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
222
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_unquoted_value, "blabar=blo"],
|
223
|
+
[:tag_end, ">"]
|
224
|
+
], result
|
225
|
+
end
|
226
|
+
|
227
|
+
def test_tokenize_closing_tag
|
228
|
+
result = tokenize('<div foo="bar" />')
|
229
|
+
assert_equal [
|
230
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
231
|
+
[:attribute_name, "foo"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "bar"], [:attribute_quoted_value_end, "\""], [:whitespace, " "],
|
232
|
+
[:solidus, "/"], [:tag_end, ">"]
|
233
|
+
], result
|
234
|
+
end
|
235
|
+
|
236
|
+
def test_tokenize_script_tag
|
237
|
+
result = tokenize('<script>foo <b> bar</script>')
|
238
|
+
assert_equal [
|
239
|
+
[:tag_start, "<"], [:tag_name, "script"], [:tag_end, ">"],
|
240
|
+
[:text, "foo "], [:text, "<b"], [:text, "> bar"],
|
241
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "script"], [:tag_end, ">"],
|
242
|
+
], result
|
243
|
+
end
|
244
|
+
|
245
|
+
def test_tokenize_textarea_tag
|
246
|
+
result = tokenize('<textarea>hello</textarea>')
|
247
|
+
assert_equal [
|
248
|
+
[:tag_start, "<"], [:tag_name, "textarea"], [:tag_end, ">"],
|
249
|
+
[:text, "hello"],
|
250
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "textarea"], [:tag_end, ">"],
|
251
|
+
], result
|
252
|
+
end
|
253
|
+
|
254
|
+
def test_tokenize_style_tag
|
255
|
+
result = tokenize('<style></div></style>')
|
256
|
+
assert_equal [
|
257
|
+
[:tag_start, "<"], [:tag_name, "style"], [:tag_end, ">"],
|
258
|
+
[:text, "</div"], [:text, ">"],
|
259
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "style"], [:tag_end, ">"],
|
260
|
+
], result
|
261
|
+
end
|
262
|
+
|
263
|
+
def test_tokenize_script_containing_html
|
264
|
+
result = tokenize('<script type="text/html">foo <b> bar</script>')
|
265
|
+
assert_equal [
|
266
|
+
[:tag_start, "<"], [:tag_name, "script"], [:whitespace, " "],
|
267
|
+
[:attribute_name, "type"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "text/html"], [:attribute_quoted_value_end, "\""],
|
268
|
+
[:tag_end, ">"],
|
269
|
+
[:text, "foo "], [:text, "<b"], [:text, "> bar"],
|
270
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "script"], [:tag_end, ">"],
|
271
|
+
], result
|
272
|
+
end
|
273
|
+
|
274
|
+
def test_end_of_tag_on_newline
|
275
|
+
data = ["\
|
276
|
+
<div define=\"{credential_96_credential1: new Shopify.ProviderCredentials()}\"
|
277
|
+
", "", ">"]
|
278
|
+
result = tokenize(*data)
|
279
|
+
assert_equal [
|
280
|
+
[:text, " "],
|
281
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "], [:attribute_name, "define"], [:equal, "="], [:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "{credential_96_credential1: new Shopify.ProviderCredentials()}"], [:attribute_quoted_value_end, "\""],
|
282
|
+
[:whitespace, "\n "], [:tag_end, ">"]
|
283
|
+
], result
|
284
|
+
end
|
285
|
+
|
286
|
+
def test_tokenize_multi_part_attribute_name
|
287
|
+
result = tokenize('<div data-', 'shipping', '-type>')
|
288
|
+
assert_equal [
|
289
|
+
[:tag_start, "<"], [:tag_name, "div"], [:whitespace, " "],
|
290
|
+
[:attribute_name, "data-"], [:attribute_name, "shipping"], [:attribute_name, "-type"],
|
291
|
+
[:tag_end, ">"],
|
292
|
+
], result
|
293
|
+
end
|
294
|
+
|
295
|
+
def test_tokenize_attribute_name_with_space_before_equal
|
296
|
+
result = tokenize('<a href ="http://www.cra-arc.gc.ca/tx/bsnss/tpcs/gst-tps/menu-eng.html">GST/HST</a>')
|
297
|
+
assert_equal [
|
298
|
+
[:tag_start, "<"], [:tag_name, "a"], [:whitespace, " "],
|
299
|
+
[:attribute_name, "href"], [:whitespace, " "], [:equal, "="],
|
300
|
+
[:attribute_quoted_value_start, "\""], [:attribute_quoted_value, "http://www.cra-arc.gc.ca/tx/bsnss/tpcs/gst-tps/menu-eng.html"], [:attribute_quoted_value_end, "\""],
|
301
|
+
[:tag_end, ">"], [:text, "GST/HST"],
|
302
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "a"], [:tag_end, ">"]
|
303
|
+
], result
|
304
|
+
end
|
305
|
+
|
306
|
+
def test_raise_in_block
|
307
|
+
@tokenizer = HtmlTokenizer::Tokenizer.new
|
308
|
+
10.times do
|
309
|
+
e = assert_raises(RuntimeError) do
|
310
|
+
@tokenizer.tokenize("<>") do |part|
|
311
|
+
raise RuntimeError, "something went wrong"
|
312
|
+
end
|
313
|
+
end
|
314
|
+
assert_equal "something went wrong", e.message
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
def test_tokenize_end_of_script_regression
|
319
|
+
result = tokenize("<script><</script>")
|
320
|
+
assert_equal [
|
321
|
+
[:tag_start, "<"], [:tag_name, "script"], [:tag_end, ">"],
|
322
|
+
[:text, "<"],
|
323
|
+
[:tag_start, "<"], [:solidus, "/"], [:tag_name, "script"], [:tag_end, ">"]
|
324
|
+
], result
|
325
|
+
end
|
326
|
+
|
327
|
+
private
|
328
|
+
|
329
|
+
def tokenize(*parts)
|
330
|
+
tokens = []
|
331
|
+
@tokenizer = HtmlTokenizer::Tokenizer.new
|
332
|
+
parts.each do |part|
|
333
|
+
@tokenizer.tokenize(part) { |name, start, stop| tokens << [name, part[start..(stop-1)]] }
|
334
|
+
end
|
335
|
+
tokens
|
336
|
+
end
|
337
|
+
end
|
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: html_tokenizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Francois Chagnon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-10-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake-compiler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
executables:
|
58
|
+
- html_tokenizer
|
59
|
+
extensions:
|
60
|
+
- ext/html_tokenizer_ext/extconf.rb
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- .autotest
|
64
|
+
- .gitignore
|
65
|
+
- Gemfile
|
66
|
+
- Gemfile.lock
|
67
|
+
- LICENSE
|
68
|
+
- Manifest.txt
|
69
|
+
- README.md
|
70
|
+
- Rakefile
|
71
|
+
- bin/html_tokenizer
|
72
|
+
- ext/html_tokenizer_ext/extconf.rb
|
73
|
+
- ext/html_tokenizer_ext/html_tokenizer.c
|
74
|
+
- ext/html_tokenizer_ext/html_tokenizer.h
|
75
|
+
- ext/html_tokenizer_ext/parser.c
|
76
|
+
- ext/html_tokenizer_ext/parser.h
|
77
|
+
- ext/html_tokenizer_ext/tokenizer.c
|
78
|
+
- ext/html_tokenizer_ext/tokenizer.h
|
79
|
+
- html_tokenizer.gemspec
|
80
|
+
- lib/html_tokenizer.rb
|
81
|
+
- test/unit/parser_test.rb
|
82
|
+
- test/unit/tokenizer_test.rb
|
83
|
+
homepage:
|
84
|
+
licenses: []
|
85
|
+
metadata: {}
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
- ext
|
91
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 2.0.14.1
|
104
|
+
signing_key:
|
105
|
+
specification_version: 4
|
106
|
+
summary: HTML Tokenizer
|
107
|
+
test_files:
|
108
|
+
- test/unit/parser_test.rb
|
109
|
+
- test/unit/tokenizer_test.rb
|