chupa-text-decomposer-html 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +4 -2
- data/chupa-text-decomposer-html.gemspec +4 -4
- data/doc/text/news.md +6 -0
- data/lib/chupa-text/decomposers/html.rb +121 -8
- data/test/test-html.rb +251 -3
- metadata +25 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '068db7f654ca70a8f65209c3d2f0d2fa1edf45ff'
|
4
|
+
data.tar.gz: 715d8630a7e69c100ed38a8bb090c5282b62fe2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab46f697f57427a940bd7968391648f54e2edbfab77974bc651ebe155f8dbfb6bbb9f8922d7153ee306ccb0402d30957a05fefe839437ba3ff28037f9a9e6ab0
|
7
|
+
data.tar.gz: 932251f709b54256f6478d4e7103f1b483c9c83166a277c6ee4b67b24c1528389ef1602ec960536de9b727cfcfa771e323d3467e9573911486f752e715ca5fcb
|
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- mode: ruby; coding: utf-8 -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -34,7 +34,9 @@ end
|
|
34
34
|
helper.install
|
35
35
|
spec = helper.gemspec
|
36
36
|
|
37
|
-
Packnga::DocumentTask.new(spec) do
|
37
|
+
Packnga::DocumentTask.new(spec) do |task|
|
38
|
+
task.original_language = "en"
|
39
|
+
task.translate_language = "ja"
|
38
40
|
end
|
39
41
|
|
40
42
|
Packnga::ReleaseTask.new(spec) do
|
@@ -1,6 +1,6 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -22,7 +22,7 @@ end
|
|
22
22
|
|
23
23
|
Gem::Specification.new do |spec|
|
24
24
|
spec.name = "chupa-text-decomposer-html"
|
25
|
-
spec.version = "1.0.
|
25
|
+
spec.version = "1.0.2"
|
26
26
|
spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-html"
|
27
27
|
spec.authors = ["Kouhei Sutou"]
|
28
28
|
spec.email = ["kou@clear-code.com"]
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
32
32
|
spec.summary = description.split(/\n\n+/, 2).first
|
33
33
|
spec.description = description
|
34
|
-
spec.license = "
|
34
|
+
spec.license = "LGPL-2.1+"
|
35
35
|
spec.files = ["#{spec.name}.gemspec"]
|
36
36
|
spec.files += ["README.md", "LICENSE.txt", "Rakefile", "Gemfile"]
|
37
37
|
spec.files += [".yardopts"]
|
data/doc/text/news.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -28,8 +28,16 @@ module ChupaText
|
|
28
28
|
"application/xhtml+xml",
|
29
29
|
]
|
30
30
|
def target?(data)
|
31
|
-
TARGET_EXTENSIONS.include?(data.extension)
|
32
|
-
|
31
|
+
return true if TARGET_EXTENSIONS.include?(data.extension)
|
32
|
+
return true if TARGET_MIME_TYPES.include?(data.mime_type)
|
33
|
+
|
34
|
+
body = data.body
|
35
|
+
return false if body.nil?
|
36
|
+
|
37
|
+
return true if body.start_with?("<!DOCTYPE html ")
|
38
|
+
return true if body.start_with?("<html")
|
39
|
+
|
40
|
+
false
|
33
41
|
end
|
34
42
|
|
35
43
|
def decompose(data)
|
@@ -37,13 +45,11 @@ module ChupaText
|
|
37
45
|
doc = Nokogiri::HTML.parse(html, nil, guess_encoding(html))
|
38
46
|
body_element = (doc % "body")
|
39
47
|
if body_element
|
40
|
-
body = body_element.
|
48
|
+
body = extract_text(body_element, "").scrub.gsub(/^\s+|\s+$/, '')
|
41
49
|
else
|
42
50
|
body = ""
|
43
51
|
end
|
44
|
-
decomposed_data = TextData.new(body)
|
45
|
-
decomposed_data.uri = data.uri
|
46
|
-
|
52
|
+
decomposed_data = TextData.new(body, :source_data => data)
|
47
53
|
attributes = decomposed_data.attributes
|
48
54
|
title_element = (doc % "head/title")
|
49
55
|
attributes.title = title_element.text if title_element
|
@@ -55,6 +61,10 @@ module ChupaText
|
|
55
61
|
|
56
62
|
private
|
57
63
|
def guess_encoding(text)
|
64
|
+
unless text.encoding.ascii_compatible?
|
65
|
+
return text.encoding.name
|
66
|
+
end
|
67
|
+
|
58
68
|
case text
|
59
69
|
when /\A<\?xml.+?encoding=(['"])([a-zA-Z0-9_-]+)\1/
|
60
70
|
$2
|
@@ -72,7 +82,11 @@ module ChupaText
|
|
72
82
|
charset = $2
|
73
83
|
normalize_charset(charset)
|
74
84
|
else
|
75
|
-
|
85
|
+
if text.encoding != Encoding::ASCII_8BIT and text.valid_encoding?
|
86
|
+
text.encoding.to_s
|
87
|
+
else
|
88
|
+
guess_encoding_nkf(text)
|
89
|
+
end
|
76
90
|
end
|
77
91
|
end
|
78
92
|
|
@@ -90,6 +104,105 @@ module ChupaText
|
|
90
104
|
def guess_encoding_nkf(text)
|
91
105
|
NKF.guess(text).name
|
92
106
|
end
|
107
|
+
|
108
|
+
def extract_text(element, text)
|
109
|
+
name = element.name.downcase
|
110
|
+
classes = (element["class"] || "").split
|
111
|
+
return text if noindex_element?(element, name, classes)
|
112
|
+
return text if header_element?(element, name, classes)
|
113
|
+
return text if footer_element?(element, name, classes)
|
114
|
+
return text if navigation_element?(element, name, classes)
|
115
|
+
|
116
|
+
element.children.each do |child|
|
117
|
+
case child
|
118
|
+
when Nokogiri::XML::Text
|
119
|
+
text << child.text
|
120
|
+
when Nokogiri::XML::Element
|
121
|
+
extract_text(child, text)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
text
|
126
|
+
end
|
127
|
+
|
128
|
+
def noindex_element?(element, name, classes)
|
129
|
+
case name
|
130
|
+
when "script", "noscript", "link", "style"
|
131
|
+
return true
|
132
|
+
end
|
133
|
+
|
134
|
+
classes.each do |klass|
|
135
|
+
case klass
|
136
|
+
when "noindex", "robots-noindex"
|
137
|
+
return true
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
def header_element?(element, name, classes)
|
145
|
+
case name
|
146
|
+
when "header", "nav"
|
147
|
+
return true
|
148
|
+
end
|
149
|
+
|
150
|
+
classes.each do |klass|
|
151
|
+
case klass
|
152
|
+
when "header"
|
153
|
+
return true
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
case element["id"]
|
158
|
+
when "header"
|
159
|
+
return true
|
160
|
+
end
|
161
|
+
|
162
|
+
false
|
163
|
+
end
|
164
|
+
|
165
|
+
def footer_element?(element, name, classes)
|
166
|
+
case name
|
167
|
+
when "footer"
|
168
|
+
return true
|
169
|
+
end
|
170
|
+
|
171
|
+
classes.each do |klass|
|
172
|
+
case klass
|
173
|
+
when "footer"
|
174
|
+
return true
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
case element["id"]
|
179
|
+
when "footer"
|
180
|
+
return true
|
181
|
+
end
|
182
|
+
|
183
|
+
false
|
184
|
+
end
|
185
|
+
|
186
|
+
def navigation_element?(element, name, classes)
|
187
|
+
case name
|
188
|
+
when "nav"
|
189
|
+
return true
|
190
|
+
end
|
191
|
+
|
192
|
+
classes.each do |klass|
|
193
|
+
case klass
|
194
|
+
when "nav", "menu"
|
195
|
+
return true
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
case element["id"]
|
200
|
+
when "nav", "menu"
|
201
|
+
return true
|
202
|
+
end
|
203
|
+
|
204
|
+
false
|
205
|
+
end
|
93
206
|
end
|
94
207
|
end
|
95
208
|
end
|
data/test/test-html.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013-
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -51,6 +51,12 @@ class TestHTML < Test::Unit::TestCase
|
|
51
51
|
def test_txt
|
52
52
|
assert_false(@decomposer.target?(create_data("index.txt")))
|
53
53
|
end
|
54
|
+
|
55
|
+
def test_php
|
56
|
+
assert do
|
57
|
+
not @decomposer.target?(create_data("index.php"))
|
58
|
+
end
|
59
|
+
end
|
54
60
|
end
|
55
61
|
|
56
62
|
sub_test_case("mime-type") do
|
@@ -72,6 +78,26 @@ class TestHTML < Test::Unit::TestCase
|
|
72
78
|
assert_false(@decomposer.target?(create_data("text/plain")))
|
73
79
|
end
|
74
80
|
end
|
81
|
+
|
82
|
+
sub_test_case("content") do
|
83
|
+
def create_data(body)
|
84
|
+
data = ChupaText::Data.new
|
85
|
+
data.body = body
|
86
|
+
data
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_doctype_html
|
90
|
+
assert do
|
91
|
+
@decomposer.target?(create_data("<!DOCTYPE html "))
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_html
|
96
|
+
assert do
|
97
|
+
@decomposer.target?(create_data("<html"))
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
75
101
|
end
|
76
102
|
|
77
103
|
sub_test_case("decompose") do
|
@@ -138,12 +164,12 @@ class TestHTML < Test::Unit::TestCase
|
|
138
164
|
|
139
165
|
sub_test_case("detect") do
|
140
166
|
def test_nothing
|
141
|
-
@data.body = <<-HTML
|
167
|
+
@data.body = <<-HTML.force_encoding("UTF-8")
|
142
168
|
<html>
|
143
169
|
<body>Hello</body>
|
144
170
|
</html>
|
145
171
|
HTML
|
146
|
-
assert_equal([Encoding::
|
172
|
+
assert_equal([Encoding::UTF_8], decompose(@data))
|
147
173
|
end
|
148
174
|
|
149
175
|
def test_xml_declaration
|
@@ -181,6 +207,44 @@ class TestHTML < Test::Unit::TestCase
|
|
181
207
|
HTML5
|
182
208
|
assert_equal([Encoding::EUC_JP], decompose(@data))
|
183
209
|
end
|
210
|
+
|
211
|
+
sub_test_case("not ascii_compatible?") do
|
212
|
+
def test_iso_2022_jp
|
213
|
+
@data.body = <<-ISO_2022_JP_HTML.encode("ISO-2022-JP")
|
214
|
+
<html>
|
215
|
+
<head>
|
216
|
+
<title>タイトル</title>
|
217
|
+
</head>
|
218
|
+
<body>Hello</body>
|
219
|
+
</html>
|
220
|
+
ISO_2022_JP_HTML
|
221
|
+
assert_equal([Encoding::ISO_2022_JP], decompose(@data))
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_utf_32
|
225
|
+
@data.body = <<-UTF_32_HTML.encode("UTF-32")
|
226
|
+
<html>
|
227
|
+
<head>
|
228
|
+
<title>タイトル</title>
|
229
|
+
</head>
|
230
|
+
<body>Hello</body>
|
231
|
+
</html>
|
232
|
+
UTF_32_HTML
|
233
|
+
assert_equal([Encoding::UTF_32], decompose(@data))
|
234
|
+
end
|
235
|
+
|
236
|
+
def test_koi8_r
|
237
|
+
@data.body = <<-KOI8_R_HTML.encode("KOI8-R")
|
238
|
+
<html>
|
239
|
+
<head>
|
240
|
+
<title>название</title>
|
241
|
+
</head>
|
242
|
+
<body>Hello</body>
|
243
|
+
</html>
|
244
|
+
KOI8_R_HTML
|
245
|
+
assert_equal([Encoding::KOI8_R], decompose(@data))
|
246
|
+
end
|
247
|
+
end
|
184
248
|
end
|
185
249
|
|
186
250
|
sub_test_case("normalize") do
|
@@ -209,5 +273,189 @@ class TestHTML < Test::Unit::TestCase
|
|
209
273
|
end
|
210
274
|
end
|
211
275
|
end
|
276
|
+
|
277
|
+
sub_test_case("body") do
|
278
|
+
def normalize_decomposed_data(decomposed_data)
|
279
|
+
decomposed_data.body
|
280
|
+
end
|
281
|
+
|
282
|
+
sub_test_case("noindex") do
|
283
|
+
def test_script
|
284
|
+
@data.body = <<-HTML
|
285
|
+
<html>
|
286
|
+
<body>Before<script>var x;</script>After</body>
|
287
|
+
</html>
|
288
|
+
HTML
|
289
|
+
assert_equal(["BeforeAfter"],
|
290
|
+
decompose(@data))
|
291
|
+
end
|
292
|
+
|
293
|
+
def test_noscript
|
294
|
+
@data.body = <<-HTML
|
295
|
+
<html>
|
296
|
+
<body>Before<noscript>Enable JavaScript!</noscript>After</body>
|
297
|
+
</html>
|
298
|
+
HTML
|
299
|
+
assert_equal(["BeforeAfter"],
|
300
|
+
decompose(@data))
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_link
|
304
|
+
@data.body = <<-HTML
|
305
|
+
<html>
|
306
|
+
<body>Before<link rel="stylehseet">After</body>
|
307
|
+
</html>
|
308
|
+
HTML
|
309
|
+
assert_equal(["BeforeAfter"],
|
310
|
+
decompose(@data))
|
311
|
+
end
|
312
|
+
|
313
|
+
def test_style
|
314
|
+
@data.body = <<-HTML
|
315
|
+
<html>
|
316
|
+
<body>Before<style>a {color: "red";}</style>After</body>
|
317
|
+
</html>
|
318
|
+
HTML
|
319
|
+
assert_equal(["BeforeAfter"],
|
320
|
+
decompose(@data))
|
321
|
+
end
|
322
|
+
|
323
|
+
def test_noindex
|
324
|
+
@data.body = <<-HTML
|
325
|
+
<html>
|
326
|
+
<body>Before<div class="noindex">header</div>After</body>
|
327
|
+
</html>
|
328
|
+
HTML
|
329
|
+
assert_equal(["BeforeAfter"],
|
330
|
+
decompose(@data))
|
331
|
+
end
|
332
|
+
|
333
|
+
def test_robots_noindex
|
334
|
+
@data.body = <<-HTML
|
335
|
+
<html>
|
336
|
+
<body>Before<div class="robots-noindex">header</div>After</body>
|
337
|
+
</html>
|
338
|
+
HTML
|
339
|
+
assert_equal(["BeforeAfter"],
|
340
|
+
decompose(@data))
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
sub_test_case("header") do
|
345
|
+
def test_tag
|
346
|
+
@data.body = <<-HTML
|
347
|
+
<html>
|
348
|
+
<body>Before<header>header</header>After</body>
|
349
|
+
</html>
|
350
|
+
HTML
|
351
|
+
assert_equal(["BeforeAfter"],
|
352
|
+
decompose(@data))
|
353
|
+
end
|
354
|
+
|
355
|
+
def test_class
|
356
|
+
@data.body = <<-HTML
|
357
|
+
<html>
|
358
|
+
<body>Before<div class="header">header</div>After</body>
|
359
|
+
</html>
|
360
|
+
HTML
|
361
|
+
assert_equal(["BeforeAfter"],
|
362
|
+
decompose(@data))
|
363
|
+
end
|
364
|
+
|
365
|
+
def test_id
|
366
|
+
@data.body = <<-HTML
|
367
|
+
<html>
|
368
|
+
<body>Before<div id="header">header</div>After</body>
|
369
|
+
</html>
|
370
|
+
HTML
|
371
|
+
assert_equal(["BeforeAfter"],
|
372
|
+
decompose(@data))
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
sub_test_case("footer") do
|
377
|
+
def test_tag
|
378
|
+
@data.body = <<-HTML
|
379
|
+
<html>
|
380
|
+
<body>Before<footer>footer</footer>After</body>
|
381
|
+
</html>
|
382
|
+
HTML
|
383
|
+
assert_equal(["BeforeAfter"],
|
384
|
+
decompose(@data))
|
385
|
+
end
|
386
|
+
|
387
|
+
def test_class
|
388
|
+
@data.body = <<-HTML
|
389
|
+
<html>
|
390
|
+
<body>Before<div class="footer">footer</div>After</body>
|
391
|
+
</html>
|
392
|
+
HTML
|
393
|
+
assert_equal(["BeforeAfter"],
|
394
|
+
decompose(@data))
|
395
|
+
end
|
396
|
+
|
397
|
+
def test_id
|
398
|
+
@data.body = <<-HTML
|
399
|
+
<html>
|
400
|
+
<body>Before<div id="footer">footer</div>After</body>
|
401
|
+
</html>
|
402
|
+
HTML
|
403
|
+
assert_equal(["BeforeAfter"],
|
404
|
+
decompose(@data))
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
sub_test_case("navigation") do
|
409
|
+
def test_nav_tag
|
410
|
+
@data.body = <<-HTML
|
411
|
+
<html>
|
412
|
+
<body>Before<nav>nav</nav>After</body>
|
413
|
+
</html>
|
414
|
+
HTML
|
415
|
+
assert_equal(["BeforeAfter"],
|
416
|
+
decompose(@data))
|
417
|
+
end
|
418
|
+
|
419
|
+
def test_nav_class
|
420
|
+
@data.body = <<-HTML
|
421
|
+
<html>
|
422
|
+
<body>Before<div class="nav">nav</div>After</body>
|
423
|
+
</html>
|
424
|
+
HTML
|
425
|
+
assert_equal(["BeforeAfter"],
|
426
|
+
decompose(@data))
|
427
|
+
end
|
428
|
+
|
429
|
+
def test_menu_class
|
430
|
+
@data.body = <<-HTML
|
431
|
+
<html>
|
432
|
+
<body>Before<div class="menu">nav</div>After</body>
|
433
|
+
</html>
|
434
|
+
HTML
|
435
|
+
assert_equal(["BeforeAfter"],
|
436
|
+
decompose(@data))
|
437
|
+
end
|
438
|
+
|
439
|
+
def test_nav_id
|
440
|
+
@data.body = <<-HTML
|
441
|
+
<html>
|
442
|
+
<body>Before<div id="nav">nav</div>After</body>
|
443
|
+
</html>
|
444
|
+
HTML
|
445
|
+
assert_equal(["BeforeAfter"],
|
446
|
+
decompose(@data))
|
447
|
+
end
|
448
|
+
|
449
|
+
def test_menu_id
|
450
|
+
@data.body = <<-HTML
|
451
|
+
<html>
|
452
|
+
<body>Before<div id="menu">nav</div>After</body>
|
453
|
+
</html>
|
454
|
+
HTML
|
455
|
+
assert_equal(["BeforeAfter"],
|
456
|
+
decompose(@data))
|
457
|
+
end
|
458
|
+
end
|
459
|
+
end
|
212
460
|
end
|
213
461
|
end
|
metadata
CHANGED
@@ -1,111 +1,111 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-html
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chupa-text
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: test-unit
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: packnga
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: redcarpet
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
description: |
|
@@ -119,19 +119,19 @@ executables: []
|
|
119
119
|
extensions: []
|
120
120
|
extra_rdoc_files: []
|
121
121
|
files:
|
122
|
-
-
|
123
|
-
-
|
122
|
+
- ".yardopts"
|
123
|
+
- Gemfile
|
124
124
|
- LICENSE.txt
|
125
|
+
- README.md
|
125
126
|
- Rakefile
|
126
|
-
-
|
127
|
-
- .yardopts
|
128
|
-
- lib/chupa-text/decomposers/html.rb
|
127
|
+
- chupa-text-decomposer-html.gemspec
|
129
128
|
- doc/text/news.md
|
129
|
+
- lib/chupa-text/decomposers/html.rb
|
130
130
|
- test/run-test.rb
|
131
131
|
- test/test-html.rb
|
132
132
|
homepage: https://github.com/ranguba/chupa-text-decomposer-html
|
133
133
|
licenses:
|
134
|
-
-
|
134
|
+
- LGPL-2.1+
|
135
135
|
metadata: {}
|
136
136
|
post_install_message:
|
137
137
|
rdoc_options: []
|
@@ -139,20 +139,19 @@ require_paths:
|
|
139
139
|
- lib
|
140
140
|
required_ruby_version: !ruby/object:Gem::Requirement
|
141
141
|
requirements:
|
142
|
-
- -
|
142
|
+
- - ">="
|
143
143
|
- !ruby/object:Gem::Version
|
144
144
|
version: '0'
|
145
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
146
|
requirements:
|
147
|
-
- -
|
147
|
+
- - ">="
|
148
148
|
- !ruby/object:Gem::Version
|
149
149
|
version: '0'
|
150
150
|
requirements: []
|
151
151
|
rubyforge_project:
|
152
|
-
rubygems_version: 2.
|
152
|
+
rubygems_version: 2.5.2
|
153
153
|
signing_key:
|
154
154
|
specification_version: 4
|
155
155
|
summary: This is a ChupaText decomposer plugin for to extract text and meta-data from
|
156
156
|
HTML.
|
157
157
|
test_files: []
|
158
|
-
has_rdoc:
|