chupa-text-decomposer-html 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: '068db7f654ca70a8f65209c3d2f0d2fa1edf45ff'
4
- data.tar.gz: 715d8630a7e69c100ed38a8bb090c5282b62fe2d
3
+ metadata.gz: 5b340a505f73aa5bcc6613a55b6fcec5e7988bc4
4
+ data.tar.gz: 3f4e1ef2b30decee069f76521753f6533a51024b
5
5
  SHA512:
6
- metadata.gz: ab46f697f57427a940bd7968391648f54e2edbfab77974bc651ebe155f8dbfb6bbb9f8922d7153ee306ccb0402d30957a05fefe839437ba3ff28037f9a9e6ab0
7
- data.tar.gz: 932251f709b54256f6478d4e7103f1b483c9c83166a277c6ee4b67b24c1528389ef1602ec960536de9b727cfcfa771e323d3467e9573911486f752e715ca5fcb
6
+ metadata.gz: 51f94a9bd3eb45765aa4518f8415ec82ec235f53e99e75e4ec94afb45721869587e9ad558fc7bdfeb05fca39ed76f24d08d770428b0a775c613f8b0768e60b7c
7
+ data.tar.gz: 409267fc2e80bc9cbc119443b6dee928ab72a7e1eb747c69cb8c781654fcd6977cf2d16663622c29f46e6c86923da0a9e8f911b82f73c8cb63e0722e9a130cda
@@ -22,7 +22,7 @@ end
22
22
 
23
23
  Gem::Specification.new do |spec|
24
24
  spec.name = "chupa-text-decomposer-html"
25
- spec.version = "1.0.2"
25
+ spec.version = "1.0.3"
26
26
  spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-html"
27
27
  spec.authors = ["Kouhei Sutou"]
28
28
  spec.email = ["kou@clear-code.com"]
@@ -1,14 +1,26 @@
1
1
  # News
2
2
 
3
+ ## 1.0.3: 2017-07-10
4
+
5
+ ### Improvements
6
+
7
+ * Supported ignoring topic path content.
8
+
9
+ * Supported ignoring aside content.
10
+
11
+ ### Fixes
12
+
13
+ * Fixed a infinite loop bug.
14
+
3
15
  ## 1.0.2: 2017-07-05
4
16
 
5
- * Support content based HTML detection.
17
+ * Supported content based HTML detection.
6
18
 
7
- * Ignore common contents.
19
+ * Supported ignoring common contents.
8
20
 
9
21
  ## 1.0.1: 2014-02-18
10
22
 
11
- * Support chupa-text 1.0.4.
23
+ * Supported chupa-text 1.0.4.
12
24
 
13
25
  ## 1.0.0: 2014-01-05
14
26
 
@@ -28,6 +28,10 @@ module ChupaText
28
28
  "application/xhtml+xml",
29
29
  ]
30
30
  def target?(data)
31
+ (data["source-mime-types"] || []).each do |source_mime_type|
32
+ return false if TARGET_MIME_TYPES.include?(source_mime_type)
33
+ end
34
+
31
35
  return true if TARGET_EXTENSIONS.include?(data.extension)
32
36
  return true if TARGET_MIME_TYPES.include?(data.mime_type)
33
37
 
@@ -112,6 +116,7 @@ module ChupaText
112
116
  return text if header_element?(element, name, classes)
113
117
  return text if footer_element?(element, name, classes)
114
118
  return text if navigation_element?(element, name, classes)
119
+ return text if aside_element?(element, name, classes)
115
120
 
116
121
  element.children.each do |child|
117
122
  case child
@@ -143,7 +148,7 @@ module ChupaText
143
148
 
144
149
  def header_element?(element, name, classes)
145
150
  case name
146
- when "header", "nav"
151
+ when "header"
147
152
  return true
148
153
  end
149
154
 
@@ -191,13 +196,34 @@ module ChupaText
191
196
 
192
197
  classes.each do |klass|
193
198
  case klass
194
- when "nav", "menu"
199
+ when "nav", "menu", /\Atopic[-_]?path\z/i
200
+ return true
201
+ end
202
+ end
203
+
204
+ case element["id"]
205
+ when "nav", "menu", /\Atopic[-_]?path\z/i
206
+ return true
207
+ end
208
+
209
+ false
210
+ end
211
+
212
+ def aside_element?(element, name, classes)
213
+ case name
214
+ when "aside"
215
+ return true
216
+ end
217
+
218
+ classes.each do |klass|
219
+ case klass
220
+ when "aside"
195
221
  return true
196
222
  end
197
223
  end
198
224
 
199
225
  case element["id"]
200
- when "nav", "menu"
226
+ when "aside"
201
227
  return true
202
228
  end
203
229
 
@@ -28,6 +28,30 @@ class TestHTML < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  sub_test_case("target?") do
31
+ sub_test_case("source-mime-type") do
32
+ def create_data(uri, mime_type)
33
+ data = ChupaText::Data.new
34
+ data.body = ""
35
+ data.uri = uri
36
+ data["source-mime-types"] = [mime_type]
37
+ data
38
+ end
39
+
40
+ def test_text_html
41
+ data = create_data("index.html", "text/html")
42
+ assert do
43
+ not @decomposer.target?(data)
44
+ end
45
+ end
46
+
47
+ def test_application_xhtml_xml
48
+ data = create_data("index.html", "application/xhtml+xml")
49
+ assert do
50
+ not @decomposer.target?(data)
51
+ end
52
+ end
53
+ end
54
+
31
55
  sub_test_case("extension") do
32
56
  def create_data(uri)
33
57
  data = ChupaText::Data.new
@@ -436,6 +460,23 @@ class TestHTML < Test::Unit::TestCase
436
460
  decompose(@data))
437
461
  end
438
462
 
463
+ def test_topic_path_class
464
+ @data.body = <<-HTML
465
+ <html>
466
+ <body>
467
+ Before
468
+ <div class="topic-path">topic-path</div>
469
+ <div class="topic_path">topic_path</div>
470
+ <div class="topicpath">topicpath</div>
471
+ <div class="TopicPath">TopicPath</div>
472
+ After
473
+ </body>
474
+ </html>
475
+ HTML
476
+ assert_equal(["Before\nAfter"],
477
+ decompose(@data))
478
+ end
479
+
439
480
  def test_nav_id
440
481
  @data.body = <<-HTML
441
482
  <html>
@@ -450,6 +491,55 @@ class TestHTML < Test::Unit::TestCase
450
491
  @data.body = <<-HTML
451
492
  <html>
452
493
  <body>Before<div id="menu">nav</div>After</body>
494
+ </html>
495
+ HTML
496
+ assert_equal(["BeforeAfter"],
497
+ decompose(@data))
498
+ end
499
+
500
+ def test_topic_path_id
501
+ @data.body = <<-HTML
502
+ <html>
503
+ <body>
504
+ Before
505
+ <div id="topic-path">topic-path</div>
506
+ <div id="topic_path">topic_path</div>
507
+ <div id="topicpath">topicpath</div>
508
+ <div id="TopicPath">TopicPath</div>
509
+ After
510
+ </body>
511
+ </html>
512
+ HTML
513
+ assert_equal(["Before\nAfter"],
514
+ decompose(@data))
515
+ end
516
+ end
517
+
518
+ sub_test_case("aside") do
519
+ def test_aside_tag
520
+ @data.body = <<-HTML
521
+ <html>
522
+ <body>Before<aside>aside</aside>After</body>
523
+ </html>
524
+ HTML
525
+ assert_equal(["BeforeAfter"],
526
+ decompose(@data))
527
+ end
528
+
529
+ def test_aside_class
530
+ @data.body = <<-HTML
531
+ <html>
532
+ <body>Before<div class="aside">aside</div>After</body>
533
+ </html>
534
+ HTML
535
+ assert_equal(["BeforeAfter"],
536
+ decompose(@data))
537
+ end
538
+
539
+ def test_aside_id
540
+ @data.body = <<-HTML
541
+ <html>
542
+ <body>Before<div id="aside">aside</div>After</body>
453
543
  </html>
454
544
  HTML
455
545
  assert_equal(["BeforeAfter"],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text-decomposer-html
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-05 00:00:00.000000000 Z
11
+ date: 2017-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chupa-text