chupa-text-decomposer-html 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +4 -2
- data/chupa-text-decomposer-html.gemspec +4 -4
- data/doc/text/news.md +6 -0
- data/lib/chupa-text/decomposers/html.rb +121 -8
- data/test/test-html.rb +251 -3
- metadata +25 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '068db7f654ca70a8f65209c3d2f0d2fa1edf45ff'
|
4
|
+
data.tar.gz: 715d8630a7e69c100ed38a8bb090c5282b62fe2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab46f697f57427a940bd7968391648f54e2edbfab77974bc651ebe155f8dbfb6bbb9f8922d7153ee306ccb0402d30957a05fefe839437ba3ff28037f9a9e6ab0
|
7
|
+
data.tar.gz: 932251f709b54256f6478d4e7103f1b483c9c83166a277c6ee4b67b24c1528389ef1602ec960536de9b727cfcfa771e323d3467e9573911486f752e715ca5fcb
|
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- mode: ruby; coding: utf-8 -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -34,7 +34,9 @@ end
|
|
34
34
|
helper.install
|
35
35
|
spec = helper.gemspec
|
36
36
|
|
37
|
-
Packnga::DocumentTask.new(spec) do
|
37
|
+
Packnga::DocumentTask.new(spec) do |task|
|
38
|
+
task.original_language = "en"
|
39
|
+
task.translate_language = "ja"
|
38
40
|
end
|
39
41
|
|
40
42
|
Packnga::ReleaseTask.new(spec) do
|
@@ -1,6 +1,6 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -22,7 +22,7 @@ end
|
|
22
22
|
|
23
23
|
Gem::Specification.new do |spec|
|
24
24
|
spec.name = "chupa-text-decomposer-html"
|
25
|
-
spec.version = "1.0.
|
25
|
+
spec.version = "1.0.2"
|
26
26
|
spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-html"
|
27
27
|
spec.authors = ["Kouhei Sutou"]
|
28
28
|
spec.email = ["kou@clear-code.com"]
|
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
32
32
|
spec.summary = description.split(/\n\n+/, 2).first
|
33
33
|
spec.description = description
|
34
|
-
spec.license = "
|
34
|
+
spec.license = "LGPL-2.1+"
|
35
35
|
spec.files = ["#{spec.name}.gemspec"]
|
36
36
|
spec.files += ["README.md", "LICENSE.txt", "Rakefile", "Gemfile"]
|
37
37
|
spec.files += [".yardopts"]
|
data/doc/text/news.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -28,8 +28,16 @@ module ChupaText
|
|
28
28
|
"application/xhtml+xml",
|
29
29
|
]
|
30
30
|
def target?(data)
|
31
|
-
TARGET_EXTENSIONS.include?(data.extension)
|
32
|
-
|
31
|
+
return true if TARGET_EXTENSIONS.include?(data.extension)
|
32
|
+
return true if TARGET_MIME_TYPES.include?(data.mime_type)
|
33
|
+
|
34
|
+
body = data.body
|
35
|
+
return false if body.nil?
|
36
|
+
|
37
|
+
return true if body.start_with?("<!DOCTYPE html ")
|
38
|
+
return true if body.start_with?("<html")
|
39
|
+
|
40
|
+
false
|
33
41
|
end
|
34
42
|
|
35
43
|
def decompose(data)
|
@@ -37,13 +45,11 @@ module ChupaText
|
|
37
45
|
doc = Nokogiri::HTML.parse(html, nil, guess_encoding(html))
|
38
46
|
body_element = (doc % "body")
|
39
47
|
if body_element
|
40
|
-
body = body_element.
|
48
|
+
body = extract_text(body_element, "").scrub.gsub(/^\s+|\s+$/, '')
|
41
49
|
else
|
42
50
|
body = ""
|
43
51
|
end
|
44
|
-
decomposed_data = TextData.new(body)
|
45
|
-
decomposed_data.uri = data.uri
|
46
|
-
|
52
|
+
decomposed_data = TextData.new(body, :source_data => data)
|
47
53
|
attributes = decomposed_data.attributes
|
48
54
|
title_element = (doc % "head/title")
|
49
55
|
attributes.title = title_element.text if title_element
|
@@ -55,6 +61,10 @@ module ChupaText
|
|
55
61
|
|
56
62
|
private
|
57
63
|
def guess_encoding(text)
|
64
|
+
unless text.encoding.ascii_compatible?
|
65
|
+
return text.encoding.name
|
66
|
+
end
|
67
|
+
|
58
68
|
case text
|
59
69
|
when /\A<\?xml.+?encoding=(['"])([a-zA-Z0-9_-]+)\1/
|
60
70
|
$2
|
@@ -72,7 +82,11 @@ module ChupaText
|
|
72
82
|
charset = $2
|
73
83
|
normalize_charset(charset)
|
74
84
|
else
|
75
|
-
|
85
|
+
if text.encoding != Encoding::ASCII_8BIT and text.valid_encoding?
|
86
|
+
text.encoding.to_s
|
87
|
+
else
|
88
|
+
guess_encoding_nkf(text)
|
89
|
+
end
|
76
90
|
end
|
77
91
|
end
|
78
92
|
|
@@ -90,6 +104,105 @@ module ChupaText
|
|
90
104
|
def guess_encoding_nkf(text)
|
91
105
|
NKF.guess(text).name
|
92
106
|
end
|
107
|
+
|
108
|
+
def extract_text(element, text)
|
109
|
+
name = element.name.downcase
|
110
|
+
classes = (element["class"] || "").split
|
111
|
+
return text if noindex_element?(element, name, classes)
|
112
|
+
return text if header_element?(element, name, classes)
|
113
|
+
return text if footer_element?(element, name, classes)
|
114
|
+
return text if navigation_element?(element, name, classes)
|
115
|
+
|
116
|
+
element.children.each do |child|
|
117
|
+
case child
|
118
|
+
when Nokogiri::XML::Text
|
119
|
+
text << child.text
|
120
|
+
when Nokogiri::XML::Element
|
121
|
+
extract_text(child, text)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
text
|
126
|
+
end
|
127
|
+
|
128
|
+
def noindex_element?(element, name, classes)
|
129
|
+
case name
|
130
|
+
when "script", "noscript", "link", "style"
|
131
|
+
return true
|
132
|
+
end
|
133
|
+
|
134
|
+
classes.each do |klass|
|
135
|
+
case klass
|
136
|
+
when "noindex", "robots-noindex"
|
137
|
+
return true
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
def header_element?(element, name, classes)
|
145
|
+
case name
|
146
|
+
when "header", "nav"
|
147
|
+
return true
|
148
|
+
end
|
149
|
+
|
150
|
+
classes.each do |klass|
|
151
|
+
case klass
|
152
|
+
when "header"
|
153
|
+
return true
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
case element["id"]
|
158
|
+
when "header"
|
159
|
+
return true
|
160
|
+
end
|
161
|
+
|
162
|
+
false
|
163
|
+
end
|
164
|
+
|
165
|
+
def footer_element?(element, name, classes)
|
166
|
+
case name
|
167
|
+
when "footer"
|
168
|
+
return true
|
169
|
+
end
|
170
|
+
|
171
|
+
classes.each do |klass|
|
172
|
+
case klass
|
173
|
+
when "footer"
|
174
|
+
return true
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
case element["id"]
|
179
|
+
when "footer"
|
180
|
+
return true
|
181
|
+
end
|
182
|
+
|
183
|
+
false
|
184
|
+
end
|
185
|
+
|
186
|
+
def navigation_element?(element, name, classes)
|
187
|
+
case name
|
188
|
+
when "nav"
|
189
|
+
return true
|
190
|
+
end
|
191
|
+
|
192
|
+
classes.each do |klass|
|
193
|
+
case klass
|
194
|
+
when "nav", "menu"
|
195
|
+
return true
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
case element["id"]
|
200
|
+
when "nav", "menu"
|
201
|
+
return true
|
202
|
+
end
|
203
|
+
|
204
|
+
false
|
205
|
+
end
|
93
206
|
end
|
94
207
|
end
|
95
208
|
end
|
data/test/test-html.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013-
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -51,6 +51,12 @@ class TestHTML < Test::Unit::TestCase
|
|
51
51
|
def test_txt
|
52
52
|
assert_false(@decomposer.target?(create_data("index.txt")))
|
53
53
|
end
|
54
|
+
|
55
|
+
def test_php
|
56
|
+
assert do
|
57
|
+
not @decomposer.target?(create_data("index.php"))
|
58
|
+
end
|
59
|
+
end
|
54
60
|
end
|
55
61
|
|
56
62
|
sub_test_case("mime-type") do
|
@@ -72,6 +78,26 @@ class TestHTML < Test::Unit::TestCase
|
|
72
78
|
assert_false(@decomposer.target?(create_data("text/plain")))
|
73
79
|
end
|
74
80
|
end
|
81
|
+
|
82
|
+
sub_test_case("content") do
|
83
|
+
def create_data(body)
|
84
|
+
data = ChupaText::Data.new
|
85
|
+
data.body = body
|
86
|
+
data
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_doctype_html
|
90
|
+
assert do
|
91
|
+
@decomposer.target?(create_data("<!DOCTYPE html "))
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_html
|
96
|
+
assert do
|
97
|
+
@decomposer.target?(create_data("<html"))
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
75
101
|
end
|
76
102
|
|
77
103
|
sub_test_case("decompose") do
|
@@ -138,12 +164,12 @@ class TestHTML < Test::Unit::TestCase
|
|
138
164
|
|
139
165
|
sub_test_case("detect") do
|
140
166
|
def test_nothing
|
141
|
-
@data.body = <<-HTML
|
167
|
+
@data.body = <<-HTML.force_encoding("UTF-8")
|
142
168
|
<html>
|
143
169
|
<body>Hello</body>
|
144
170
|
</html>
|
145
171
|
HTML
|
146
|
-
assert_equal([Encoding::
|
172
|
+
assert_equal([Encoding::UTF_8], decompose(@data))
|
147
173
|
end
|
148
174
|
|
149
175
|
def test_xml_declaration
|
@@ -181,6 +207,44 @@ class TestHTML < Test::Unit::TestCase
|
|
181
207
|
HTML5
|
182
208
|
assert_equal([Encoding::EUC_JP], decompose(@data))
|
183
209
|
end
|
210
|
+
|
211
|
+
sub_test_case("not ascii_compatible?") do
|
212
|
+
def test_iso_2022_jp
|
213
|
+
@data.body = <<-ISO_2022_JP_HTML.encode("ISO-2022-JP")
|
214
|
+
<html>
|
215
|
+
<head>
|
216
|
+
<title>タイトル</title>
|
217
|
+
</head>
|
218
|
+
<body>Hello</body>
|
219
|
+
</html>
|
220
|
+
ISO_2022_JP_HTML
|
221
|
+
assert_equal([Encoding::ISO_2022_JP], decompose(@data))
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_utf_32
|
225
|
+
@data.body = <<-UTF_32_HTML.encode("UTF-32")
|
226
|
+
<html>
|
227
|
+
<head>
|
228
|
+
<title>タイトル</title>
|
229
|
+
</head>
|
230
|
+
<body>Hello</body>
|
231
|
+
</html>
|
232
|
+
UTF_32_HTML
|
233
|
+
assert_equal([Encoding::UTF_32], decompose(@data))
|
234
|
+
end
|
235
|
+
|
236
|
+
def test_koi8_r
|
237
|
+
@data.body = <<-KOI8_R_HTML.encode("KOI8-R")
|
238
|
+
<html>
|
239
|
+
<head>
|
240
|
+
<title>название</title>
|
241
|
+
</head>
|
242
|
+
<body>Hello</body>
|
243
|
+
</html>
|
244
|
+
KOI8_R_HTML
|
245
|
+
assert_equal([Encoding::KOI8_R], decompose(@data))
|
246
|
+
end
|
247
|
+
end
|
184
248
|
end
|
185
249
|
|
186
250
|
sub_test_case("normalize") do
|
@@ -209,5 +273,189 @@ class TestHTML < Test::Unit::TestCase
|
|
209
273
|
end
|
210
274
|
end
|
211
275
|
end
|
276
|
+
|
277
|
+
sub_test_case("body") do
|
278
|
+
def normalize_decomposed_data(decomposed_data)
|
279
|
+
decomposed_data.body
|
280
|
+
end
|
281
|
+
|
282
|
+
sub_test_case("noindex") do
|
283
|
+
def test_script
|
284
|
+
@data.body = <<-HTML
|
285
|
+
<html>
|
286
|
+
<body>Before<script>var x;</script>After</body>
|
287
|
+
</html>
|
288
|
+
HTML
|
289
|
+
assert_equal(["BeforeAfter"],
|
290
|
+
decompose(@data))
|
291
|
+
end
|
292
|
+
|
293
|
+
def test_noscript
|
294
|
+
@data.body = <<-HTML
|
295
|
+
<html>
|
296
|
+
<body>Before<noscript>Enable JavaScript!</noscript>After</body>
|
297
|
+
</html>
|
298
|
+
HTML
|
299
|
+
assert_equal(["BeforeAfter"],
|
300
|
+
decompose(@data))
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_link
|
304
|
+
@data.body = <<-HTML
|
305
|
+
<html>
|
306
|
+
<body>Before<link rel="stylehseet">After</body>
|
307
|
+
</html>
|
308
|
+
HTML
|
309
|
+
assert_equal(["BeforeAfter"],
|
310
|
+
decompose(@data))
|
311
|
+
end
|
312
|
+
|
313
|
+
def test_style
|
314
|
+
@data.body = <<-HTML
|
315
|
+
<html>
|
316
|
+
<body>Before<style>a {color: "red";}</style>After</body>
|
317
|
+
</html>
|
318
|
+
HTML
|
319
|
+
assert_equal(["BeforeAfter"],
|
320
|
+
decompose(@data))
|
321
|
+
end
|
322
|
+
|
323
|
+
def test_noindex
|
324
|
+
@data.body = <<-HTML
|
325
|
+
<html>
|
326
|
+
<body>Before<div class="noindex">header</div>After</body>
|
327
|
+
</html>
|
328
|
+
HTML
|
329
|
+
assert_equal(["BeforeAfter"],
|
330
|
+
decompose(@data))
|
331
|
+
end
|
332
|
+
|
333
|
+
def test_robots_noindex
|
334
|
+
@data.body = <<-HTML
|
335
|
+
<html>
|
336
|
+
<body>Before<div class="robots-noindex">header</div>After</body>
|
337
|
+
</html>
|
338
|
+
HTML
|
339
|
+
assert_equal(["BeforeAfter"],
|
340
|
+
decompose(@data))
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
sub_test_case("header") do
|
345
|
+
def test_tag
|
346
|
+
@data.body = <<-HTML
|
347
|
+
<html>
|
348
|
+
<body>Before<header>header</header>After</body>
|
349
|
+
</html>
|
350
|
+
HTML
|
351
|
+
assert_equal(["BeforeAfter"],
|
352
|
+
decompose(@data))
|
353
|
+
end
|
354
|
+
|
355
|
+
def test_class
|
356
|
+
@data.body = <<-HTML
|
357
|
+
<html>
|
358
|
+
<body>Before<div class="header">header</div>After</body>
|
359
|
+
</html>
|
360
|
+
HTML
|
361
|
+
assert_equal(["BeforeAfter"],
|
362
|
+
decompose(@data))
|
363
|
+
end
|
364
|
+
|
365
|
+
def test_id
|
366
|
+
@data.body = <<-HTML
|
367
|
+
<html>
|
368
|
+
<body>Before<div id="header">header</div>After</body>
|
369
|
+
</html>
|
370
|
+
HTML
|
371
|
+
assert_equal(["BeforeAfter"],
|
372
|
+
decompose(@data))
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
sub_test_case("footer") do
|
377
|
+
def test_tag
|
378
|
+
@data.body = <<-HTML
|
379
|
+
<html>
|
380
|
+
<body>Before<footer>footer</footer>After</body>
|
381
|
+
</html>
|
382
|
+
HTML
|
383
|
+
assert_equal(["BeforeAfter"],
|
384
|
+
decompose(@data))
|
385
|
+
end
|
386
|
+
|
387
|
+
def test_class
|
388
|
+
@data.body = <<-HTML
|
389
|
+
<html>
|
390
|
+
<body>Before<div class="footer">footer</div>After</body>
|
391
|
+
</html>
|
392
|
+
HTML
|
393
|
+
assert_equal(["BeforeAfter"],
|
394
|
+
decompose(@data))
|
395
|
+
end
|
396
|
+
|
397
|
+
def test_id
|
398
|
+
@data.body = <<-HTML
|
399
|
+
<html>
|
400
|
+
<body>Before<div id="footer">footer</div>After</body>
|
401
|
+
</html>
|
402
|
+
HTML
|
403
|
+
assert_equal(["BeforeAfter"],
|
404
|
+
decompose(@data))
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
sub_test_case("navigation") do
|
409
|
+
def test_nav_tag
|
410
|
+
@data.body = <<-HTML
|
411
|
+
<html>
|
412
|
+
<body>Before<nav>nav</nav>After</body>
|
413
|
+
</html>
|
414
|
+
HTML
|
415
|
+
assert_equal(["BeforeAfter"],
|
416
|
+
decompose(@data))
|
417
|
+
end
|
418
|
+
|
419
|
+
def test_nav_class
|
420
|
+
@data.body = <<-HTML
|
421
|
+
<html>
|
422
|
+
<body>Before<div class="nav">nav</div>After</body>
|
423
|
+
</html>
|
424
|
+
HTML
|
425
|
+
assert_equal(["BeforeAfter"],
|
426
|
+
decompose(@data))
|
427
|
+
end
|
428
|
+
|
429
|
+
def test_menu_class
|
430
|
+
@data.body = <<-HTML
|
431
|
+
<html>
|
432
|
+
<body>Before<div class="menu">nav</div>After</body>
|
433
|
+
</html>
|
434
|
+
HTML
|
435
|
+
assert_equal(["BeforeAfter"],
|
436
|
+
decompose(@data))
|
437
|
+
end
|
438
|
+
|
439
|
+
def test_nav_id
|
440
|
+
@data.body = <<-HTML
|
441
|
+
<html>
|
442
|
+
<body>Before<div id="nav">nav</div>After</body>
|
443
|
+
</html>
|
444
|
+
HTML
|
445
|
+
assert_equal(["BeforeAfter"],
|
446
|
+
decompose(@data))
|
447
|
+
end
|
448
|
+
|
449
|
+
def test_menu_id
|
450
|
+
@data.body = <<-HTML
|
451
|
+
<html>
|
452
|
+
<body>Before<div id="menu">nav</div>After</body>
|
453
|
+
</html>
|
454
|
+
HTML
|
455
|
+
assert_equal(["BeforeAfter"],
|
456
|
+
decompose(@data))
|
457
|
+
end
|
458
|
+
end
|
459
|
+
end
|
212
460
|
end
|
213
461
|
end
|
metadata
CHANGED
@@ -1,111 +1,111 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-html
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chupa-text
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: test-unit
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: packnga
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: redcarpet
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
description: |
|
@@ -119,19 +119,19 @@ executables: []
|
|
119
119
|
extensions: []
|
120
120
|
extra_rdoc_files: []
|
121
121
|
files:
|
122
|
-
-
|
123
|
-
-
|
122
|
+
- ".yardopts"
|
123
|
+
- Gemfile
|
124
124
|
- LICENSE.txt
|
125
|
+
- README.md
|
125
126
|
- Rakefile
|
126
|
-
-
|
127
|
-
- .yardopts
|
128
|
-
- lib/chupa-text/decomposers/html.rb
|
127
|
+
- chupa-text-decomposer-html.gemspec
|
129
128
|
- doc/text/news.md
|
129
|
+
- lib/chupa-text/decomposers/html.rb
|
130
130
|
- test/run-test.rb
|
131
131
|
- test/test-html.rb
|
132
132
|
homepage: https://github.com/ranguba/chupa-text-decomposer-html
|
133
133
|
licenses:
|
134
|
-
-
|
134
|
+
- LGPL-2.1+
|
135
135
|
metadata: {}
|
136
136
|
post_install_message:
|
137
137
|
rdoc_options: []
|
@@ -139,20 +139,19 @@ require_paths:
|
|
139
139
|
- lib
|
140
140
|
required_ruby_version: !ruby/object:Gem::Requirement
|
141
141
|
requirements:
|
142
|
-
- -
|
142
|
+
- - ">="
|
143
143
|
- !ruby/object:Gem::Version
|
144
144
|
version: '0'
|
145
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
146
|
requirements:
|
147
|
-
- -
|
147
|
+
- - ">="
|
148
148
|
- !ruby/object:Gem::Version
|
149
149
|
version: '0'
|
150
150
|
requirements: []
|
151
151
|
rubyforge_project:
|
152
|
-
rubygems_version: 2.
|
152
|
+
rubygems_version: 2.5.2
|
153
153
|
signing_key:
|
154
154
|
specification_version: 4
|
155
155
|
summary: This is a ChupaText decomposer plugin for to extract text and meta-data from
|
156
156
|
HTML.
|
157
157
|
test_files: []
|
158
|
-
has_rdoc:
|