feeds-crawler 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ae6d80efb92d85a3e18719e4c23a93a74edb888b
4
+ data.tar.gz: 4f2d7eb77ba67427f932f1779d90299ab2b3f35a
5
+ SHA512:
6
+ metadata.gz: 72c2ff85052f577433507bffb91fce51eeb14518acfc5b194c222c0e00576ca156329caa77b51a887128835c883a7b9923248b187c9b11d1a3a199d8e147ac35
7
+ data.tar.gz: 169ce845207dd426522651276642cab9d89c261a393a0e9c1fbcc3ee9d014b0c5778ea5ebc5f46b5628beafc40554818c926eb8398f662e7ac2646f681e44d4e
data/.gitignore ADDED
@@ -0,0 +1,56 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ # RubyMine files
53
+ .idea/
54
+
55
+ # RSpec files
56
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,77 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ feeds-crawler (0.2.0)
5
+ parallel (~> 1.12, >= 1.12.0)
6
+ ruby-readability (~> 0.7, >= 0.7.0)
7
+ sanitize (~> 4.5, >= 4.5.0)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ ast (2.3.0)
13
+ coderay (1.1.1)
14
+ crass (1.0.2)
15
+ diff-lcs (1.3)
16
+ guess_html_encoding (0.0.11)
17
+ method_source (0.8.2)
18
+ mini_portile2 (2.2.0)
19
+ nokogiri (1.8.0)
20
+ mini_portile2 (~> 2.2.0)
21
+ nokogumbo (1.4.13)
22
+ nokogiri
23
+ parallel (1.12.0)
24
+ parser (2.4.0.0)
25
+ ast (~> 2.2)
26
+ powerpack (0.1.1)
27
+ pry (0.10.4)
28
+ coderay (~> 1.1.0)
29
+ method_source (~> 0.8.1)
30
+ slop (~> 3.4)
31
+ rainbow (2.2.2)
32
+ rake
33
+ rake (10.5.0)
34
+ rspec (3.6.0)
35
+ rspec-core (~> 3.6.0)
36
+ rspec-expectations (~> 3.6.0)
37
+ rspec-mocks (~> 3.6.0)
38
+ rspec-core (3.6.0)
39
+ rspec-support (~> 3.6.0)
40
+ rspec-expectations (3.6.0)
41
+ diff-lcs (>= 1.2.0, < 2.0)
42
+ rspec-support (~> 3.6.0)
43
+ rspec-mocks (3.6.0)
44
+ diff-lcs (>= 1.2.0, < 2.0)
45
+ rspec-support (~> 3.6.0)
46
+ rspec-support (3.6.0)
47
+ rubocop (0.50.0)
48
+ parallel (~> 1.10)
49
+ parser (>= 2.3.3.1, < 3.0)
50
+ powerpack (~> 0.1)
51
+ rainbow (>= 2.2.2, < 3.0)
52
+ ruby-progressbar (~> 1.7)
53
+ unicode-display_width (~> 1.0, >= 1.0.1)
54
+ ruby-progressbar (1.8.3)
55
+ ruby-readability (0.7.0)
56
+ guess_html_encoding (>= 0.0.4)
57
+ nokogiri (>= 1.6.0)
58
+ sanitize (4.5.0)
59
+ crass (~> 1.0.2)
60
+ nokogiri (>= 1.4.4)
61
+ nokogumbo (~> 1.4.1)
62
+ slop (3.6.0)
63
+ unicode-display_width (1.3.0)
64
+
65
+ PLATFORMS
66
+ ruby
67
+
68
+ DEPENDENCIES
69
+ bundler (~> 1.15)
70
+ feeds-crawler!
71
+ pry
72
+ rake (~> 10.0)
73
+ rspec (~> 3.0)
74
+ rubocop
75
+
76
+ BUNDLED WITH
77
+ 1.15.3
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Andrey Tatarenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # Feeds Crawler
2
+
3
+ This gem allows to crawl news articles from RSS feeds. It realizes parallel execution for better performance and sanitize collected text from unnecessary HTML tags.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'feeds-crawler'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install feeds-crawler
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+ require 'feeds_crawler'
25
+
26
+ FeedsCrawler.crawl('rss_feed_url')
27
+ # => ["Article 1", "Article 2", "Article 3"]
28
+ ```
29
+
30
+ It accepts as many RSS feeds as you want:
31
+ ```ruby
32
+ my_favorite_rss_feeds = %w[
33
+ first_rss_feed
34
+ second_rss_feed
35
+ ]
36
+
37
+ FeedsCrawler.crawl(my_favorite_rss_feeds)
38
+ # => ["Article from feed #1", "Article from feed #2"]
39
+ ```
40
+ or:
41
+ ```ruby
42
+ FeedsCrawler.crawl('first_rss_feed_url', 'second_rss_feed_url')
43
+ # => ["Article from feed #1", "Article from feed #2"]
44
+ ```
45
+
46
+
47
+ ## Contributing
48
+
49
+ Bug reports and pull requests are welcome on GitHub at https://github.com/andrey17076/feeds-crawler.
50
+
51
+ ## License
52
+
53
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
7
+ task c: :console
8
+ task(:console) { sh('./bin/console') }
data/bin/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'feeds_crawler'
5
+ require 'pry'
6
+
7
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
@@ -0,0 +1,28 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = 'feeds-crawler'
6
+ spec.version = '0.2.0'
7
+ spec.authors = ['Andrey Tatarenko']
8
+ spec.email = ['andrey17076@gmail.com']
9
+
10
+ spec.summary = 'This gem allows to crawl news articles from RSS feeds.'
11
+ spec.homepage = 'https://github.com/andrey17076/feed-crawler'
12
+ spec.license = 'MIT'
13
+ spec.files = `git ls-files -z`.split("\x0")
14
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
15
+ spec.test_files = spec.files.grep(%r{^spec/})
16
+ spec.require_paths = ['lib']
17
+
18
+ spec.add_development_dependency 'bundler', '~> 1.15'
19
+ spec.add_development_dependency 'rake', '~> 10.0'
20
+ spec.add_development_dependency 'rspec', '~> 3.0'
21
+ spec.add_development_dependency 'pry'
22
+ spec.add_development_dependency 'rubocop'
23
+
24
+ spec.add_runtime_dependency 'parallel', '~> 1.12', '>= 1.12.0'
25
+ spec.add_runtime_dependency 'ruby-readability', '~> 0.7', '>= 0.7.0'
26
+ spec.add_runtime_dependency 'sanitize', '~> 4.5', '>= 4.5.0'
27
+ end
28
+
@@ -0,0 +1,20 @@
1
+ require 'ruby-readability'
2
+ require 'sanitize'
3
+
4
+ module Crawlers
5
+ module Helpers
6
+ module Content
7
+ def extract_primary_content(html_text)
8
+ content = Readability::Document.new(html_text).content
9
+ sanitized_content = Sanitize.clean(content)
10
+ remove_trailing_spaces(sanitized_content)
11
+ end
12
+
13
+ private
14
+
15
+ def remove_trailing_spaces(text)
16
+ text.strip.gsub(/(?<=\n)\s+/, '')
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,45 @@
1
+ require 'rss'
2
+ require 'open-uri'
3
+ require 'parallel'
4
+ require 'crawlers/helpers/content'
5
+
6
+ module Crawlers
7
+ class Rss
8
+ include Helpers::Content
9
+
10
+ def initialize(rss_url)
11
+ @rss_url = rss_url
12
+ end
13
+
14
+ def articles
15
+ articles = Parallel.map(rss_feed_items) do |feed_item|
16
+ crawl_article(feed_item)
17
+ end
18
+ articles.reject(&:empty?)
19
+ end
20
+
21
+ private
22
+
23
+ def rss_feed_items
24
+ rss_feed = page_content(@rss_url)
25
+ parse_feed(rss_feed)
26
+ end
27
+
28
+ def parse_feed(rss_feed)
29
+ RSS::Parser.parse(rss_feed)&.items
30
+ rescue RSS::Error
31
+ []
32
+ end
33
+
34
+ def crawl_article(feed_item)
35
+ page_with_article = page_content(feed_item.link)
36
+ extract_primary_content(page_with_article)
37
+ end
38
+
39
+ def page_content(page_url)
40
+ open(page_url).read
41
+ rescue StandardError
42
+ ''
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,16 @@
1
+ require 'parallel'
2
+ require 'crawlers/rss'
3
+
4
+ class FeedsCrawler
5
+ def self.crawl(*rss_urls)
6
+ urls = Array(rss_urls)
7
+ crawl_feeds_articles(urls).flatten
8
+ end
9
+
10
+ def self.crawl_feeds_articles(rss_urls)
11
+ Parallel.map(rss_urls) do |url|
12
+ Crawlers::Rss.new(url).articles
13
+ end
14
+ end
15
+ private_class_method :crawl_feeds_articles
16
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Crawlers::Helpers::Content do
4
+ let(:dummy_class) { Class.new { include Crawlers::Helpers::Content } }
5
+ describe '.extract_primary_content' do
6
+ let(:html_text) { open('spec/fixtures/page_with_content.html').read }
7
+ subject { dummy_class.new.extract_primary_content(html_text) }
8
+
9
+ it 'extracts real content of a document' do
10
+ is_expected.to eq('Real Content')
11
+ end
12
+
13
+ context 'when real content still has markup element' do
14
+ let(:markup) { '<div><p>Content</p></div>' }
15
+ before { allow_any_instance_of(Readability::Document).to receive(:content).and_return(markup) }
16
+
17
+ it 'removes any markup' do
18
+ is_expected.to eq('Content')
19
+ end
20
+ end
21
+
22
+ context 'when real content has trailing spaces' do
23
+ let(:content) { "First line\n Second line" }
24
+ before { allow(Sanitize).to receive(:clean).and_return(content) }
25
+
26
+ it 'removes this spaces' do
27
+ is_expected.to eq("First line\nSecond line")
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,72 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Crawlers::Rss do
4
+ describe '#articles' do
5
+ subject { described_class.new('url').articles }
6
+
7
+ context 'when rss url is invalid' do
8
+ before { allow(Kernel).to receive(:open).and_raise(Errno::ENOENT) }
9
+
10
+ it { is_expected.to be_empty }
11
+ end
12
+
13
+ context 'when rss url leads to error page' do
14
+ before { allow(Kernel).to receive(:open).and_raise(OpenURI::HTTPError) }
15
+
16
+ it { is_expected.to be_empty }
17
+ end
18
+
19
+ context 'when rss url leads to page with redirect' do
20
+ before { allow(Kernel).to receive(:open).and_raise(RuntimeError) }
21
+
22
+ it { is_expected.to be_empty }
23
+ end
24
+
25
+ context 'when something goes wrong even when page loads' do
26
+ before do
27
+ allow(Kernel).to receive_message_chain(:open, :read).and_raise(StandardError)
28
+ end
29
+
30
+ it { is_expected.to be_empty }
31
+ end
32
+
33
+ context 'when rss url is valid' do
34
+ before do
35
+ allow(Kernel).to receive_message_chain(:open, :read).and_return('page content')
36
+ end
37
+
38
+ context 'when page content is bad formatted markup' do
39
+ before { allow(RSS::Parser).to receive(:parse).and_raise(RSS::Error) }
40
+
41
+ it { is_expected.to be_empty }
42
+ end
43
+
44
+ context 'when page content is not rss feed' do
45
+ before { allow(RSS::Parser).to receive(:parse).and_return(nil) }
46
+
47
+ it { is_expected.to be_empty }
48
+ end
49
+
50
+ context 'when page content is real rss feed' do
51
+ let(:feed_items) { [instance_double(RSS::Rss::Channel::Item, link: '')] }
52
+
53
+ before do
54
+ allow(RSS::Parser).to receive_message_chain(:parse, :items).and_return(feed_items)
55
+ allow_any_instance_of(described_class).to receive(:extract_primary_content).and_return(feed_item_article)
56
+ end
57
+
58
+ context 'when link does not leads to page with content' do
59
+ let(:feed_item_article) { '' }
60
+
61
+ it { is_expected.to be_empty }
62
+ end
63
+
64
+ context 'when link leads to page with content' do
65
+ let(:feed_item_article) { 'Content' }
66
+
67
+ it { is_expected.to match_array([feed_item_article]) }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe FeedsCrawler do
4
+ describe '.crawl' do
5
+ let(:rss_url) { 'spec/fixtures/rss_feed.xml' }
6
+
7
+ subject { described_class.crawl(rss_url) }
8
+
9
+ it 'crawls crawlers feeds articles' do
10
+ is_expected.to match_array(['Real Content'])
11
+ end
12
+
13
+ context 'when passing an argument list' do
14
+ let(:article) { 'article' }
15
+ let(:rss_urls) { %w[first_url second_url] }
16
+
17
+ before do
18
+ allow_any_instance_of(Crawlers::Rss).to receive(:articles).and_return([article])
19
+ end
20
+
21
+ subject { described_class.crawl(*rss_urls).count }
22
+
23
+ it 'crawls articles for each argument' do
24
+ is_expected.to eq(rss_urls.length)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>title!</title>
4
+ </head>
5
+ <body class='foo'>
6
+ <div class='comment'>Real Content</div>
7
+ </body>
8
+ </html>
@@ -0,0 +1,10 @@
1
+ <rss version="2.0">
2
+ <channel>
3
+ <title>Feed</title>
4
+ <link>link.com</link>
5
+ <description>Description</description>
6
+ <item>
7
+ <link>spec/fixtures/page_with_content.html</link>
8
+ </item>
9
+ </channel>
10
+ </rss>
@@ -0,0 +1,13 @@
1
+ require 'bundler/setup'
2
+ require 'pry'
3
+ require 'feeds_crawler'
4
+
5
+ RSpec.configure do |config|
6
+ config.example_status_persistence_file_path = '.rspec_status'
7
+
8
+ config.disable_monkey_patching!
9
+
10
+ config.expect_with :rspec do |c|
11
+ c.syntax = :expect
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,201 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: feeds-crawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrey Tatarenko
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-09-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.15'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.15'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: parallel
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.12'
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.12.0
93
+ type: :runtime
94
+ prerelease: false
95
+ version_requirements: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '1.12'
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 1.12.0
103
+ - !ruby/object:Gem::Dependency
104
+ name: ruby-readability
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '0.7'
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 0.7.0
113
+ type: :runtime
114
+ prerelease: false
115
+ version_requirements: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: '0.7'
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: 0.7.0
123
+ - !ruby/object:Gem::Dependency
124
+ name: sanitize
125
+ requirement: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - "~>"
128
+ - !ruby/object:Gem::Version
129
+ version: '4.5'
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: 4.5.0
133
+ type: :runtime
134
+ prerelease: false
135
+ version_requirements: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - "~>"
138
+ - !ruby/object:Gem::Version
139
+ version: '4.5'
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ version: 4.5.0
143
+ description:
144
+ email:
145
+ - andrey17076@gmail.com
146
+ executables:
147
+ - console
148
+ - setup
149
+ extensions: []
150
+ extra_rdoc_files: []
151
+ files:
152
+ - ".gitignore"
153
+ - ".rspec"
154
+ - Gemfile
155
+ - Gemfile.lock
156
+ - LICENSE
157
+ - README.md
158
+ - Rakefile
159
+ - bin/console
160
+ - bin/setup
161
+ - feeds-crawler.gemspec
162
+ - lib/crawlers/helpers/content.rb
163
+ - lib/crawlers/rss.rb
164
+ - lib/feeds_crawler.rb
165
+ - spec/crawlers/helpers/content_spec.rb
166
+ - spec/crawlers/rss_spec.rb
167
+ - spec/feeds_crawler_spec.rb
168
+ - spec/fixtures/page_with_content.html
169
+ - spec/fixtures/rss_feed.xml
170
+ - spec/spec_helper.rb
171
+ homepage: https://github.com/andrey17076/feed-crawler
172
+ licenses:
173
+ - MIT
174
+ metadata: {}
175
+ post_install_message:
176
+ rdoc_options: []
177
+ require_paths:
178
+ - lib
179
+ required_ruby_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ version: '0'
184
+ required_rubygems_version: !ruby/object:Gem::Requirement
185
+ requirements:
186
+ - - ">="
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ requirements: []
190
+ rubyforge_project:
191
+ rubygems_version: 2.6.12
192
+ signing_key:
193
+ specification_version: 4
194
+ summary: This gem allows to crawl news articles from RSS feeds.
195
+ test_files:
196
+ - spec/crawlers/helpers/content_spec.rb
197
+ - spec/crawlers/rss_spec.rb
198
+ - spec/feeds_crawler_spec.rb
199
+ - spec/fixtures/page_with_content.html
200
+ - spec/fixtures/rss_feed.xml
201
+ - spec/spec_helper.rb