chupa-text-decomposer-html 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: '068db7f654ca70a8f65209c3d2f0d2fa1edf45ff'
4
- data.tar.gz: 715d8630a7e69c100ed38a8bb090c5282b62fe2d
3
+ metadata.gz: 5b340a505f73aa5bcc6613a55b6fcec5e7988bc4
4
+ data.tar.gz: 3f4e1ef2b30decee069f76521753f6533a51024b
5
5
  SHA512:
6
- metadata.gz: ab46f697f57427a940bd7968391648f54e2edbfab77974bc651ebe155f8dbfb6bbb9f8922d7153ee306ccb0402d30957a05fefe839437ba3ff28037f9a9e6ab0
7
- data.tar.gz: 932251f709b54256f6478d4e7103f1b483c9c83166a277c6ee4b67b24c1528389ef1602ec960536de9b727cfcfa771e323d3467e9573911486f752e715ca5fcb
6
+ metadata.gz: 51f94a9bd3eb45765aa4518f8415ec82ec235f53e99e75e4ec94afb45721869587e9ad558fc7bdfeb05fca39ed76f24d08d770428b0a775c613f8b0768e60b7c
7
+ data.tar.gz: 409267fc2e80bc9cbc119443b6dee928ab72a7e1eb747c69cb8c781654fcd6977cf2d16663622c29f46e6c86923da0a9e8f911b82f73c8cb63e0722e9a130cda
@@ -22,7 +22,7 @@ end
22
22
 
23
23
  Gem::Specification.new do |spec|
24
24
  spec.name = "chupa-text-decomposer-html"
25
- spec.version = "1.0.2"
25
+ spec.version = "1.0.3"
26
26
  spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-html"
27
27
  spec.authors = ["Kouhei Sutou"]
28
28
  spec.email = ["kou@clear-code.com"]
@@ -1,14 +1,26 @@
1
1
  # News
2
2
 
3
+ ## 1.0.3: 2017-07-10
4
+
5
+ ### Improvements
6
+
7
+ * Supported ignoring topic path content.
8
+
9
+ * Supported ignoring aside content.
10
+
11
+ ### Fixes
12
+
13
+ * Fixed a infinite loop bug.
14
+
3
15
  ## 1.0.2: 2017-07-05
4
16
 
5
- * Support content based HTML detection.
17
+ * Supported content based HTML detection.
6
18
 
7
- * Ignore common contents.
19
+ * Supported ignoring common contents.
8
20
 
9
21
  ## 1.0.1: 2014-02-18
10
22
 
11
- * Support chupa-text 1.0.4.
23
+ * Supported chupa-text 1.0.4.
12
24
 
13
25
  ## 1.0.0: 2014-01-05
14
26
 
@@ -28,6 +28,10 @@ module ChupaText
28
28
  "application/xhtml+xml",
29
29
  ]
30
30
  def target?(data)
31
+ (data["source-mime-types"] || []).each do |source_mime_type|
32
+ return false if TARGET_MIME_TYPES.include?(source_mime_type)
33
+ end
34
+
31
35
  return true if TARGET_EXTENSIONS.include?(data.extension)
32
36
  return true if TARGET_MIME_TYPES.include?(data.mime_type)
33
37
 
@@ -112,6 +116,7 @@ module ChupaText
112
116
  return text if header_element?(element, name, classes)
113
117
  return text if footer_element?(element, name, classes)
114
118
  return text if navigation_element?(element, name, classes)
119
+ return text if aside_element?(element, name, classes)
115
120
 
116
121
  element.children.each do |child|
117
122
  case child
@@ -143,7 +148,7 @@ module ChupaText
143
148
 
144
149
  def header_element?(element, name, classes)
145
150
  case name
146
- when "header", "nav"
151
+ when "header"
147
152
  return true
148
153
  end
149
154
 
@@ -191,13 +196,34 @@ module ChupaText
191
196
 
192
197
  classes.each do |klass|
193
198
  case klass
194
- when "nav", "menu"
199
+ when "nav", "menu", /\Atopic[-_]?path\z/i
200
+ return true
201
+ end
202
+ end
203
+
204
+ case element["id"]
205
+ when "nav", "menu", /\Atopic[-_]?path\z/i
206
+ return true
207
+ end
208
+
209
+ false
210
+ end
211
+
212
+ def aside_element?(element, name, classes)
213
+ case name
214
+ when "aside"
215
+ return true
216
+ end
217
+
218
+ classes.each do |klass|
219
+ case klass
220
+ when "aside"
195
221
  return true
196
222
  end
197
223
  end
198
224
 
199
225
  case element["id"]
200
- when "nav", "menu"
226
+ when "aside"
201
227
  return true
202
228
  end
203
229
 
@@ -28,6 +28,30 @@ class TestHTML < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  sub_test_case("target?") do
31
+ sub_test_case("source-mime-type") do
32
+ def create_data(uri, mime_type)
33
+ data = ChupaText::Data.new
34
+ data.body = ""
35
+ data.uri = uri
36
+ data["source-mime-types"] = [mime_type]
37
+ data
38
+ end
39
+
40
+ def test_text_html
41
+ data = create_data("index.html", "text/html")
42
+ assert do
43
+ not @decomposer.target?(data)
44
+ end
45
+ end
46
+
47
+ def test_application_xhtml_xml
48
+ data = create_data("index.html", "application/xhtml+xml")
49
+ assert do
50
+ not @decomposer.target?(data)
51
+ end
52
+ end
53
+ end
54
+
31
55
  sub_test_case("extension") do
32
56
  def create_data(uri)
33
57
  data = ChupaText::Data.new
@@ -436,6 +460,23 @@ class TestHTML < Test::Unit::TestCase
436
460
  decompose(@data))
437
461
  end
438
462
 
463
+ def test_topic_path_class
464
+ @data.body = <<-HTML
465
+ <html>
466
+ <body>
467
+ Before
468
+ <div class="topic-path">topic-path</div>
469
+ <div class="topic_path">topic_path</div>
470
+ <div class="topicpath">topicpath</div>
471
+ <div class="TopicPath">TopicPath</div>
472
+ After
473
+ </body>
474
+ </html>
475
+ HTML
476
+ assert_equal(["Before\nAfter"],
477
+ decompose(@data))
478
+ end
479
+
439
480
  def test_nav_id
440
481
  @data.body = <<-HTML
441
482
  <html>
@@ -450,6 +491,55 @@ class TestHTML < Test::Unit::TestCase
450
491
  @data.body = <<-HTML
451
492
  <html>
452
493
  <body>Before<div id="menu">nav</div>After</body>
494
+ </html>
495
+ HTML
496
+ assert_equal(["BeforeAfter"],
497
+ decompose(@data))
498
+ end
499
+
500
+ def test_topic_path_id
501
+ @data.body = <<-HTML
502
+ <html>
503
+ <body>
504
+ Before
505
+ <div id="topic-path">topic-path</div>
506
+ <div id="topic_path">topic_path</div>
507
+ <div id="topicpath">topicpath</div>
508
+ <div id="TopicPath">TopicPath</div>
509
+ After
510
+ </body>
511
+ </html>
512
+ HTML
513
+ assert_equal(["Before\nAfter"],
514
+ decompose(@data))
515
+ end
516
+ end
517
+
518
+ sub_test_case("aside") do
519
+ def test_aside_tag
520
+ @data.body = <<-HTML
521
+ <html>
522
+ <body>Before<aside>aside</aside>After</body>
523
+ </html>
524
+ HTML
525
+ assert_equal(["BeforeAfter"],
526
+ decompose(@data))
527
+ end
528
+
529
+ def test_aside_class
530
+ @data.body = <<-HTML
531
+ <html>
532
+ <body>Before<div class="aside">aside</div>After</body>
533
+ </html>
534
+ HTML
535
+ assert_equal(["BeforeAfter"],
536
+ decompose(@data))
537
+ end
538
+
539
+ def test_aside_id
540
+ @data.body = <<-HTML
541
+ <html>
542
+ <body>Before<div id="aside">aside</div>After</body>
453
543
  </html>
454
544
  HTML
455
545
  assert_equal(["BeforeAfter"],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text-decomposer-html
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-05 00:00:00.000000000 Z
11
+ date: 2017-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chupa-text