slasher 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b8fd01b12d6c5944f17ea1c2b6ee3a6c0cf59fee
4
+ data.tar.gz: a91be77c10b7467986f0bedefb09ea96c5fef00a
5
+ SHA512:
6
+ metadata.gz: 439798d0ff97d07ed81519e7db4e9680b4edc9412cd8ab36b3b6340c143d09bbbdaeebd01c4f477be5c9a12e91b6ae8898b7c61fcf6b1f666f8cca4bfe2a7e8e
7
+ data.tar.gz: 1bc395d88baf44337bdfcbe767a961f8122acd1cceb877ac699650c95b4dd2fe871c7b5888400bcce382a893f76326b625b905b64215c6893343b06a7200d986
data/.gitignore ADDED
@@ -0,0 +1,38 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /rdoc/
21
+
22
+ ## Environment normalisation:
23
+ /.bundle/
24
+ /vendor/bundle
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
35
+ *.gemspec
36
+
37
+ /spec/cases/
38
+ /spec/cases_spec.rb
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require spec_helper
3
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rspec'
4
+ gem 'rspec-collection_matchers'
5
+ gem 'capybara'
6
+ gem 'pry'
7
+ gem 'faker'
8
+ gem 'nokogiri'
data/Gemfile.lock ADDED
@@ -0,0 +1,55 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ capybara (2.4.4)
5
+ mime-types (>= 1.16)
6
+ nokogiri (>= 1.3.3)
7
+ rack (>= 1.0.0)
8
+ rack-test (>= 0.5.4)
9
+ xpath (~> 2.0)
10
+ coderay (1.1.0)
11
+ diff-lcs (1.2.5)
12
+ faker (1.4.3)
13
+ i18n (~> 0.5)
14
+ i18n (0.7.0)
15
+ method_source (0.8.2)
16
+ mime-types (2.6.1)
17
+ mini_portile (0.6.0)
18
+ nokogiri (1.6.5)
19
+ mini_portile (~> 0.6.0)
20
+ pry (0.10.1)
21
+ coderay (~> 1.1.0)
22
+ method_source (~> 0.8.1)
23
+ slop (~> 3.4)
24
+ rack (1.5.3)
25
+ rack-test (0.6.3)
26
+ rack (>= 1.0)
27
+ rspec (3.2.0)
28
+ rspec-core (~> 3.2.0)
29
+ rspec-expectations (~> 3.2.0)
30
+ rspec-mocks (~> 3.2.0)
31
+ rspec-collection_matchers (1.1.2)
32
+ rspec-expectations (>= 2.99.0.beta1)
33
+ rspec-core (3.2.3)
34
+ rspec-support (~> 3.2.0)
35
+ rspec-expectations (3.2.1)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.2.0)
38
+ rspec-mocks (3.2.1)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.2.0)
41
+ rspec-support (3.2.2)
42
+ slop (3.6.0)
43
+ xpath (2.0.0)
44
+ nokogiri (~> 1.3)
45
+
46
+ PLATFORMS
47
+ ruby
48
+
49
+ DEPENDENCIES
50
+ capybara
51
+ faker
52
+ nokogiri
53
+ pry
54
+ rspec
55
+ rspec-collection_matchers
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # slasherrb
2
+ [![Build Status](https://semaphoreci.com/api/v1/projects/58c6aef2-91c2-428e-a803-37a8e6ffac2d/445101/badge.svg)](https://semaphoreci.com/hafizbadrie/slasherrb)
3
+
4
+ This project is actually the ruby version of [slasherjs](https://github.com/hafizbadrie/slasherjs). Slasher is a library that could extract the main content of an HTML article document.
5
+ The result of extraction is depending of assumption on HTML document structure itself. Therefore, there may be flaws in the result if the document doesn't match the structure that is recognised by the library.
6
+ This condition will make the library will be improved from time to time.
7
+
8
+ ## How To Use
9
+
10
+ To use the library, you need to have an HTML document first.
11
+ ```ruby
12
+ require 'net/http'
13
+ require 'slasher'
14
+
15
+ uri = URI("http://sea-games-2015.liputan6.com/read/2252937/all-indonesia-finals-ganda-putra-sumbang-emas")
16
+ html = Net::HTTP.get(uri)
17
+
18
+ slasher = Slasher.new(html)
19
+ content = slasher.slash
20
+
21
+ #content variable will have the main content of the HTML document (article).
22
+ ```
23
+
24
+ ## Website Coverage
25
+
26
+ This library has been tested against some websites and you can see the complete list in this [document](https://github.com/hafizbadrie/slasherrb/blob/master/doc/website_coverage.txt)
27
+
28
+ ## TODO
29
+ 1. Add more test cases: international websites
30
+ 2. Anytime I want to slash a new site, I don't need to re initialize the object.
@@ -0,0 +1,21 @@
1
+ 1. liputan6.com
2
+ 2. kompas.com
3
+ 3. detik.com
4
+ 4. thejakartapost.com
5
+ 5. thejakartaglobe.beritasatu.com
6
+ 6. tribunnews.com
7
+ 7. merdeka.com
8
+ 8. okezone.com
9
+ 9. suara.com
10
+ 10. viva.co.id
11
+ 11. tempo.co
12
+ 12. republika.co.id
13
+ 13. metrotvnews.com
14
+ 14. bola.net
15
+ 15. bisnis.com
16
+ 16. cnnindonesia.com
17
+ 17. sindonews.com
18
+ 18. ttwigo.com
19
+ 19. jakpost.travel
20
+ 20. dailysocial.net
21
+ 21. teknojurnal.com
@@ -0,0 +1,23 @@
1
+ class Slasher
2
+ class Content
3
+ attr_accessor :collection
4
+
5
+ def initialize
6
+ @collection = []
7
+ end
8
+
9
+ def push_content(content)
10
+ stored_content = {
11
+ length: content.gsub(/\s/, '').size,
12
+ content: content
13
+ }
14
+ @collection << stored_content
15
+ end
16
+
17
+ def get_longest_length
18
+ collection.sort_by do |content|
19
+ content[:length]
20
+ end.last
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,43 @@
1
+ class Slasher
2
+ class DOM
3
+ REMOVED_ELEMENTS = ['iframe', 'script', 'style', 'noscript', 'header', 'footer', 'br', 'img']
4
+ STRIPPED_ELEMENTS = ['blockquote', 'strong', 'a', 'em', 'b']
5
+
6
+ attr_accessor :document
7
+
8
+ def initialize(document)
9
+ @document = Nokogiri::HTML(document)
10
+ end
11
+
12
+ def remove_elements
13
+ REMOVED_ELEMENTS.each do |element|
14
+ @document.xpath("//#{element}").remove
15
+ end
16
+ end
17
+
18
+ def strip_elements
19
+ STRIPPED_ELEMENTS.each do |element|
20
+ @document.search("//#{element}").each do |node|
21
+ node.replace(Nokogiri::XML::Text.new(node.text, node.document))
22
+ end
23
+ end
24
+ end
25
+
26
+ def get_paragraphs_content(node)
27
+ content = ""
28
+ node.send(:>, "p").each do |p|
29
+ content += p.text
30
+ p.remove
31
+ end
32
+ content
33
+ end
34
+
35
+ def get_texts(node)
36
+ content = ""
37
+ node.children.each do |child|
38
+ content += child.text.delete("\n").strip if child.text?
39
+ end
40
+ content
41
+ end
42
+ end
43
+ end
data/lib/slasher.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'slasher/content'
2
+ require 'slasher/dom'
3
+
4
+ class Slasher
5
+ attr_accessor :dom, :content
6
+
7
+ def initialize(html)
8
+ @dom = Slasher::DOM.new(html)
9
+ @content = Slasher::Content.new
10
+ end
11
+
12
+ def recursive_slash(doc)
13
+ content.push_content(dom.get_texts(doc))
14
+
15
+ doc.children.each do |child|
16
+ if child.send(:>, "p").count > 0
17
+ p_content = dom.get_paragraphs_content(child)
18
+ content.push_content(p_content)
19
+ end
20
+
21
+ if child.children.count > 0
22
+ recursive_slash(child)
23
+ else
24
+ if child.text != '' && !child.text.nil?
25
+ content.push_content(child.text)
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ def slash
32
+ dom.remove_elements
33
+ dom.strip_elements
34
+ recursive_slash(dom.document)
35
+ content.get_longest_length[:content]
36
+ end
37
+ end
@@ -0,0 +1,22 @@
1
+ <html>
2
+ <head>
3
+ <title>Slasher.rb Test</title>
4
+ </head>
5
+ <body>
6
+ <style>h1 { font-size: 36px; }</style>
7
+ <script type="text/javascript">console.log("Hello World");</script>
8
+ <iframe src="http://facebook.com"></iframe>
9
+ <iframe src="http://twitter.com"></iframe>
10
+ <noscript>Hello</noscript>
11
+ <header>This is header</header>
12
+ <br>
13
+ <img src="https://avatars0.githubusercontent.com/u/494642?v=3&s=460">
14
+ <footer>This is footer</footer>
15
+ <div class="content">
16
+ <blockquote><h2>This is quote</h2></blockquote>
17
+ <strong>This is strong</strong>
18
+ <a href='#'>This is a link</a>
19
+ <em>This is italic sentence</em>
20
+ </div>
21
+ </body>
22
+ </html>
@@ -0,0 +1,21 @@
1
+ <html>
2
+ <head>
3
+ <title>Slasher.rb Test</title>
4
+ </head>
5
+ <body>
6
+ <div class="content">
7
+ <div class="content-header">
8
+ This is just a content header
9
+ </div>
10
+ <div class="content-body">
11
+ <p>This is first paragraph.</p>
12
+ <p>This is second paragraph.</p>
13
+ <p>This is third paragraph.</p>
14
+ </div>
15
+ </div>
16
+ <div class="sidebar">
17
+ <p>This is paragraph</p>
18
+ </div>
19
+ </body>
20
+ </html>
21
+
@@ -0,0 +1,16 @@
1
+ <html>
2
+ <head>
3
+ <title>Slasher.rb Test</title>
4
+ </head>
5
+ <body>
6
+ <div class="content">
7
+ <p>This is first paragraph.</p>
8
+ <p>This is second paragraph.</p>
9
+ <p>This is third paragraph.</p>
10
+ </div>
11
+ <div class="sidebar">
12
+ <p>This is paragraph</p>
13
+ </div>
14
+ </body>
15
+ </html>
16
+
@@ -0,0 +1,20 @@
1
+ <html>
2
+ <head>
3
+ <title>Slasher.rb Test</title>
4
+ </head>
5
+ <body>
6
+ <div class="content">
7
+ This is first paragraph.
8
+ <br>
9
+ <br>
10
+ This is second paragraph.
11
+ <br>
12
+ <br>
13
+ This is third paragraph.
14
+ <br>
15
+ <br>
16
+ </div>
17
+ </body>
18
+ </html>
19
+
20
+
@@ -0,0 +1,38 @@
1
+ describe Slasher::Content do
2
+ describe "#initialize" do
3
+ let(:content) { Slasher::Content.new }
4
+
5
+ it "will assign document based on provided data in initialisation" do
6
+ expect(content.collection).to be_empty
7
+ end
8
+ end
9
+
10
+ describe "#push_content" do
11
+ let(:content_1) { "This is just a content that needs to be stored in a collection" }
12
+ let(:content_2) { "This is just a content" }
13
+ let(:content) { Slasher::Content.new }
14
+
15
+ it "will store content in an array of hash" do
16
+ content.push_content(content_1)
17
+ content.push_content(content_2)
18
+ expect(content.collection).to have(2).items
19
+ expect(content.collection.first[:length]).to eq content_1.gsub(/\s/, '').size
20
+ expect(content.collection.first[:content]).to eq content_1
21
+ end
22
+ end
23
+
24
+ describe "#get_longest_length" do
25
+ let(:content) { Slasher::Content.new }
26
+ let(:content_1) { "This is the first content" }
27
+ let(:content_2) { "This should have the highest length among all"}
28
+ let(:content_3) { "Sortest" }
29
+
30
+ it "will return highest length from contents" do
31
+ content.push_content(content_1)
32
+ content.push_content(content_2)
33
+ content.push_content(content_3)
34
+
35
+ expect(content.get_longest_length[:content]).to eq content_2
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,70 @@
1
+ describe Slasher::DOM do
2
+ describe "#initialize" do
3
+ let(:html) { "<html><head><title>Hello World</title></head><body><h1>Hello World</h1></body></html>" }
4
+
5
+ it "will assign document based on provided data in initialisation" do
6
+ dom = Slasher::DOM.new(html)
7
+ document = Nokogiri::HTML(html)
8
+ expect(dom.document).to be_a Nokogiri::HTML::Document
9
+ end
10
+ end
11
+
12
+ describe "#remove_elements" do
13
+ let(:html) { File.open("spec/fixtures/test.html").read }
14
+ let(:dom) { Slasher::DOM.new(html) }
15
+
16
+ it "will remove elements like script, iframe, style, noscript, header, footer, br, and img" do
17
+ dom.remove_elements
18
+ document = Capybara.string(dom.document)
19
+ expect(document).not_to have_css "script"
20
+ expect(document).not_to have_css "iframe"
21
+ expect(document).not_to have_css "style"
22
+ expect(document).not_to have_css "noscript"
23
+ expect(document).not_to have_css "header"
24
+ expect(document).not_to have_css "footer"
25
+ expect(document).not_to have_css "br"
26
+ expect(document).not_to have_css "img"
27
+ end
28
+ end
29
+
30
+ describe "#strip_elements" do
31
+ let(:html) { File.open("spec/fixtures/test.html").read }
32
+ let(:dom) { Slasher::DOM.new(html) }
33
+
34
+ it "will remove element but not with the content" do
35
+ dom.strip_elements
36
+ document = Capybara.string(dom.document)
37
+ expect(document).not_to have_css "blockquote"
38
+ expect(document).to have_content "This is quote"
39
+ expect(document).not_to have_css "strong"
40
+ expect(document).to have_content "This is strong"
41
+ expect(document).not_to have_css "a"
42
+ expect(document).to have_content "This is a link"
43
+ expect(document).not_to have_css "em"
44
+ expect(document).to have_content "This is italic sentence"
45
+ end
46
+ end
47
+
48
+ describe "#get_paragraphs_content" do
49
+ let(:html) { File.open("spec/fixtures/test_paragraph.html").read }
50
+ let(:dom) { Slasher::DOM.new(html) }
51
+
52
+ it "will get all the content inside tag p from specific parent" do
53
+ content = dom.get_paragraphs_content(dom.document.xpath("//div[@class='content']"))
54
+ expect(content).to eq "This is first paragraph.This is second paragraph.This is third paragraph."
55
+
56
+ content = dom.get_paragraphs_content(dom.document.xpath("//div[@class='sidebar']"))
57
+ expect(content).to eq "This is paragraph"
58
+ end
59
+ end
60
+
61
+ describe "#get_texts" do
62
+ let(:html) { File.open("spec/fixtures/test_text.html").read }
63
+ let(:dom) { Slasher::DOM.new(html) }
64
+
65
+ it "will concat all Text children into 1 content" do
66
+ content = dom.get_texts(dom.document.xpath("//div[@class='content']"))
67
+ expect(content).to eq "This is first paragraph.This is second paragraph.This is third paragraph."
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,33 @@
1
+ describe Slasher do
2
+
3
+ describe "#initialize" do
4
+ let(:html) { "<html><head><title>Hello World</title></head><body><h1>Hello World</h1></body></html>" }
5
+
6
+ it "will assign document based on provided data in initialisation" do
7
+ slasher = Slasher.new(html)
8
+ expect(slasher.dom).to be_a Slasher::DOM
9
+ expect(slasher.content).to be_a Slasher::Content
10
+ end
11
+ end
12
+
13
+ describe "#recursive_slash" do
14
+ let(:html) { File.open("spec/fixtures/test_doc.html") }
15
+ let(:slasher) { Slasher.new(html) }
16
+
17
+ it "will recursively turn document into array of hash" do
18
+ slasher.recursive_slash(slasher.dom.document)
19
+ content = slasher.content
20
+ expect(content.collection.size).to eq 30
21
+ end
22
+ end
23
+
24
+ describe "#slash" do
25
+ let(:html) { File.open("spec/fixtures/test_doc.html") }
26
+ let(:slasher) { Slasher.new(html) }
27
+
28
+ it "will return the longest/highest content" do
29
+ content = slasher.slash
30
+ expect(content).to eq "This is first paragraph.This is second paragraph.This is third paragraph."
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,96 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
4
+ # this file to always be loaded, without a need to explicitly require it in any
5
+ # files.
6
+ #
7
+ # Given that it is always loaded, you are encouraged to keep this file as
8
+ # light-weight as possible. Requiring heavyweight dependencies from this file
9
+ # will add to the boot time of your test suite on EVERY test run, even for an
10
+ # individual file that may not need all of that loaded. Instead, consider making
11
+ # a separate helper file that requires the additional dependencies and performs
12
+ # the additional setup, and require it from the spec files that actually need
13
+ # it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+
20
+ require 'bundler'
21
+ Bundler.require(:default)
22
+ Dir.glob("./lib/**/*.rb") {|f| require f }
23
+
24
+ RSpec.configure do |config|
25
+ # rspec-expectations config goes here. You can use an alternate
26
+ # assertion/expectation library such as wrong or the stdlib/minitest
27
+ # assertions if you prefer.
28
+ config.expect_with :rspec do |expectations|
29
+ # This option will default to `true` in RSpec 4. It makes the `description`
30
+ # and `failure_message` of custom matchers include text for helper methods
31
+ # defined using `chain`, e.g.:
32
+ # be_bigger_than(2).and_smaller_than(4).description
33
+ # # => "be bigger than 2 and smaller than 4"
34
+ # ...rather than:
35
+ # # => "be bigger than 2"
36
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
37
+ end
38
+
39
+ # rspec-mocks config goes here. You can use an alternate test double
40
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
41
+ config.mock_with :rspec do |mocks|
42
+ # Prevents you from mocking or stubbing a method that does not exist on
43
+ # a real object. This is generally recommended, and will default to
44
+ # `true` in RSpec 4.
45
+ mocks.verify_partial_doubles = true
46
+ end
47
+
48
+ # The settings below are suggested to provide a good initial experience
49
+ # with RSpec, but feel free to customize to your heart's content.
50
+ =begin
51
+ # These two settings work together to allow you to limit a spec run
52
+ # to individual examples or groups you care about by tagging them with
53
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
54
+ # get run.
55
+ config.filter_run :focus
56
+ config.run_all_when_everything_filtered = true
57
+
58
+ # Limits the available syntax to the non-monkey patched syntax that is
59
+ # recommended. For more details, see:
60
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
61
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
62
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
63
+ config.disable_monkey_patching!
64
+
65
+ # This setting enables warnings. It's recommended, but in some cases may
66
+ # be too noisy due to issues in dependencies.
67
+ config.warnings = true
68
+
69
+ # Many RSpec users commonly either run the entire suite or an individual
70
+ # file, and it's useful to allow more verbose output when running an
71
+ # individual spec file.
72
+ if config.files_to_run.one?
73
+ # Use the documentation formatter for detailed output,
74
+ # unless a formatter has already been configured
75
+ # (e.g. via a command-line flag).
76
+ config.default_formatter = 'doc'
77
+ end
78
+
79
+ # Print the 10 slowest examples and example groups at the
80
+ # end of the spec run, to help surface which specs are running
81
+ # particularly slow.
82
+ config.profile_examples = 10
83
+
84
+ # Run specs in random order to surface order dependencies. If you find an
85
+ # order dependency and want to debug it, you can fix the order by providing
86
+ # the seed, which is printed after each run.
87
+ # --seed 1234
88
+ config.order = :random
89
+
90
+ # Seed global randomization in this process using the `--seed` CLI option.
91
+ # Setting this allows you to use `--seed` to deterministically reproduce
92
+ # test failures related to randomization by passing the same `--seed` value
93
+ # as the one that triggered the failure.
94
+ Kernel.srand config.seed
95
+ =end
96
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: slasher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Hafiz Badrie Lubis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-16 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Extract the content of an HTML article
14
+ email: hafizbadrie@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - ".gitignore"
20
+ - ".rspec"
21
+ - Gemfile
22
+ - Gemfile.lock
23
+ - README.md
24
+ - doc/website_coverage.txt
25
+ - lib/slasher.rb
26
+ - lib/slasher/content.rb
27
+ - lib/slasher/dom.rb
28
+ - spec/fixtures/test.html
29
+ - spec/fixtures/test_doc.html
30
+ - spec/fixtures/test_paragraph.html
31
+ - spec/fixtures/test_text.html
32
+ - spec/slasher/content_spec.rb
33
+ - spec/slasher/dom_spec.rb
34
+ - spec/slasher_spec.rb
35
+ - spec/spec_helper.rb
36
+ homepage: http://github.com/hafizbadrie/slasherrb
37
+ licenses:
38
+ - MIT
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 2.4.5
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Extract the content of an HTML article
60
+ test_files: []