scruber 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +6 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/exe/scruber +4 -0
  12. data/lib/scruber/app_searcher.rb +31 -0
  13. data/lib/scruber/cli/project_generator.rb +47 -0
  14. data/lib/scruber/cli/templates/Gemfile.tt +6 -0
  15. data/lib/scruber/cli/templates/application.tt +18 -0
  16. data/lib/scruber/cli/templates/bin/scruber.tt +6 -0
  17. data/lib/scruber/cli/templates/boot.tt +3 -0
  18. data/lib/scruber/cli/templates/gitignore.tt +12 -0
  19. data/lib/scruber/cli/templates/initializers/proxies.tt +10 -0
  20. data/lib/scruber/cli/templates/initializers/user_agents.tt +14 -0
  21. data/lib/scruber/cli/templates/scrapers/sample.tt +7 -0
  22. data/lib/scruber/cli.rb +40 -0
  23. data/lib/scruber/core/configuration.rb +30 -0
  24. data/lib/scruber/core/crawler.rb +92 -0
  25. data/lib/scruber/core/extensions/base.rb +26 -0
  26. data/lib/scruber/core/extensions/csv_output.rb +62 -0
  27. data/lib/scruber/core/extensions/loop.rb +39 -0
  28. data/lib/scruber/core/page_format/base.rb +11 -0
  29. data/lib/scruber/core/page_format/html.rb +13 -0
  30. data/lib/scruber/core/page_format/xml.rb +13 -0
  31. data/lib/scruber/core/page_format.rb +33 -0
  32. data/lib/scruber/fetcher.rb +34 -0
  33. data/lib/scruber/fetcher_adapters/abstract_adapter.rb +119 -0
  34. data/lib/scruber/fetcher_adapters/typhoeus_fetcher.rb +78 -0
  35. data/lib/scruber/helpers/dictionary_reader/csv.rb +27 -0
  36. data/lib/scruber/helpers/dictionary_reader/xml.rb +23 -0
  37. data/lib/scruber/helpers/dictionary_reader.rb +33 -0
  38. data/lib/scruber/helpers/fetcher_agent.rb +40 -0
  39. data/lib/scruber/helpers/fetcher_agent_adapters/abstract_adapter.rb +69 -0
  40. data/lib/scruber/helpers/fetcher_agent_adapters/memory.rb +41 -0
  41. data/lib/scruber/helpers/proxy_rotator.rb +125 -0
  42. data/lib/scruber/helpers/user_agent_rotator.rb +91 -0
  43. data/lib/scruber/queue.rb +34 -0
  44. data/lib/scruber/queue_adapters/abstract_adapter.rb +112 -0
  45. data/lib/scruber/queue_adapters/memory.rb +70 -0
  46. data/lib/scruber/version.rb +3 -0
  47. data/lib/scruber.rb +69 -0
  48. data/scruber.gemspec +43 -0
  49. metadata +233 -0
@@ -0,0 +1,91 @@
1
+ module Scruber
2
+ module Helpers
3
+ class UserAgentRotator
4
+
5
+ class UserAgent
6
+ attr_accessor :name, :tags
7
+
8
+ def initialize(name, options={})
9
+ @name = name
10
+ raise Scruber::ArgumentError.new("You need to specify name") if @name.blank?
11
+ @tags = options.fetch(:tags){ [] } || []
12
+ if !@tags.is_a?(Array)
13
+ @tags = [@tags]
14
+ end
15
+ @tags = @tags.compact.map(&:to_sym)
16
+ end
17
+
18
+ def id
19
+ @name
20
+ end
21
+ end
22
+
23
+ class Configuration
24
+ include Scruber::Core::Extensions::Loop::CoreMethods
25
+
26
+ attr_reader :user_agents, :tags
27
+
28
+ def initialize
29
+ @tags = :all
30
+ @user_agents = []
31
+ end
32
+
33
+ def configure(&block)
34
+ instance_eval &block
35
+ end
36
+
37
+ def clean
38
+ @user_agents = []
39
+ end
40
+
41
+ def add(name, options={})
42
+ ua = UserAgent.new(name, options)
43
+ @user_agents.push ua
44
+ end
45
+
46
+ def set_filter(tags)
47
+ @tags = tags
48
+ end
49
+ end
50
+
51
+ class << self
52
+ attr_writer :configuration
53
+ attr_accessor :cursor
54
+
55
+ def configuration
56
+ @configuration ||= Configuration.new
57
+ end
58
+
59
+ def configure(&block)
60
+ configuration.configure(&block)
61
+ end
62
+
63
+ def next(tags=nil)
64
+ raise Scruber::ArgumentError.new("UserAgent rotator not configured") if @configuration.nil?
65
+ tags = @configuration.tags if tags.blank?
66
+ user_agents = get_collection(tags)
67
+ if @cursor.nil? || @cursor >= user_agents.count-1
68
+ @cursor = 0
69
+ else
70
+ @cursor += 1
71
+ end
72
+ user_agents[@cursor].name
73
+ end
74
+
75
+ private
76
+
77
+ def get_collection(tags)
78
+ if tags == :all
79
+ @configuration.user_agents
80
+ else
81
+ if !tags.is_a?(Array)
82
+ tags = [tags]
83
+ end
84
+ tags = tags.compact.map(&:to_sym)
85
+ @configuration.user_agents.select{|ua| tags.all?{|t| ua.tags.include?(t) } }
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,34 @@
1
+ module Scruber
2
+ module Queue
3
+ class << self
4
+ attr_writer :adapter
5
+
6
+ def new(options={})
7
+ adapter.new(::Scruber.configuration.queue_options.merge(options))
8
+ end
9
+
10
+ def adapter
11
+ unless @adapter
12
+ @adapter = ::Scruber.configuration.queue_adapter || _adapters.keys.first
13
+ end
14
+ raise Scruber::ArgumentError.new("Adapter not found") unless @adapter
15
+ _adapters[@adapter]
16
+ end
17
+
18
+ def add_adapter(label, claz)
19
+ unless claz.method_defined?(:add)
20
+ raise NoMethodError, "add is not declared in the #{label.inspect}"
21
+ end
22
+ _adapters[label] = claz
23
+ end
24
+
25
+ def [](label)
26
+ _adapters[label]
27
+ end
28
+
29
+ def _adapters
30
+ @_adapters ||= {}
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,112 @@
1
+ module Scruber
2
+ module QueueAdapters
3
+ class AbstractAdapter
4
+
5
+ class Page
6
+ attr_accessor :url,
7
+ :method,
8
+ :user_agent,
9
+ :post_body,
10
+ :headers,
11
+ :fetcher_agent_id,
12
+ :proxy_id,
13
+ :response_body,
14
+ :response_code,
15
+ :response_headers,
16
+ :response_total_time,
17
+ :retry_at,
18
+ :fetched_at,
19
+ :retry_count,
20
+ :max_retry_times,
21
+ :enqueued_at,
22
+ :page_type,
23
+ :queue,
24
+ :priority,
25
+ :processed_at,
26
+ :options
27
+
28
+ def initialize(queue, url, options={})
29
+ @queue = queue
30
+ @url = url
31
+ @method = options.fetch(:method) { :get }
32
+ @user_agent = options.fetch(:user_agent) { nil }
33
+ @post_body = options.fetch(:post_body) { nil }
34
+ @headers = options.fetch(:headers) { {} }
35
+ @fetcher_agent_id = options.fetch(:fetcher_agent_id) { nil }
36
+ @proxy_id = options.fetch(:proxy_id) { nil }
37
+ @response_body = options.fetch(:response_body) { nil }
38
+ @response_code = options.fetch(:response_code) { nil }
39
+ @response_headers = options.fetch(:response_headers) { {} }
40
+ @response_total_time = options.fetch(:response_total_time) { nil }
41
+ @retry_at = options.fetch(:retry_at) { 0 }
42
+ @fetched_at = options.fetch(:fetched_at) { 0 }
43
+ @retry_count = options.fetch(:retry_count) { 0 }
44
+ @max_retry_times = options.fetch(:max_retry_times) { nil }
45
+ @enqueued_at = options.fetch(:enqueued_at) { 0 }
46
+ @page_type = options.fetch(:page_type) { :seed }
47
+ # @queue = options.fetch(:queue) { 'default' }
48
+ @priority = options.fetch(:priority) { 0 }
49
+ @processed_at = options.fetch(:processed_at) { 0 }
50
+ @options = options
51
+
52
+ @_fetcher_agent = false
53
+ @_proxy = false
54
+ end
55
+
56
+ def fetcher_agent
57
+ if @_fetcher_agent == false
58
+ @_fetcher_agent = (@fetcher_agent_id ? Scruber::Helpers::FetcherAgent.find(@fetcher_agent_id) : nil)
59
+ else
60
+ @_fetcher_agent
61
+ end
62
+ end
63
+
64
+ def proxy
65
+ if @_proxy == false
66
+ @_proxy = (@proxy_id ? Scruber::Helpers::ProxyRotator.find(@proxy_id) : nil)
67
+ else
68
+ @_proxy
69
+ end
70
+ end
71
+
72
+ def response_cookies
73
+ cookies = self.response_headers['Set-Cookie']
74
+ if cookies.blank?
75
+ []
76
+ else
77
+ if cookies.is_a?(Array)
78
+ cookies
79
+ else
80
+ [cookies]
81
+ end
82
+ end
83
+ end
84
+
85
+ def save
86
+ raise NotImplementedError
87
+ end
88
+
89
+ def [](k)
90
+ instance_variable_get("@#{k.to_s}")
91
+ end
92
+
93
+ end
94
+
95
+ def initialize(options={})
96
+ @options = options
97
+ end
98
+
99
+ def add(url, options={})
100
+ raise NotImplementedError
101
+ end
102
+
103
+ def fetch_pending(count=nil)
104
+ raise NotImplementedError
105
+ end
106
+
107
+ def fetch_downloaded(count=nil)
108
+ raise NotImplementedError
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,70 @@
1
+ module Scruber
2
+ module QueueAdapters
3
+ class Memory < AbstractAdapter
4
+ attr_reader :error_pages
5
+
6
+ class Page < Scruber::QueueAdapters::AbstractAdapter::Page
7
+ def save
8
+ if self.fetched_at > 0
9
+ @queue.add_downloaded self
10
+ elsif self.retry_count >= self.max_retry_times.to_i
11
+ @queue.add_error_page self
12
+ else
13
+ @queue.push self
14
+ end
15
+ end
16
+ end
17
+
18
+ def initialize(options={})
19
+ super(options)
20
+ @queue = []
21
+ @downloaded_pages = []
22
+ @error_pages = []
23
+ end
24
+
25
+ def push(url_or_page, options={})
26
+ if url_or_page.is_a?(Page)
27
+ @queue.push url_or_page
28
+ else
29
+ @queue.push Page.new(self, url_or_page, options)
30
+ end
31
+ end
32
+ alias_method :add, :push
33
+
34
+ def queue_size
35
+ @queue.count
36
+ end
37
+
38
+ def fetch_downloaded(count=nil)
39
+ if count.nil?
40
+ @downloaded_pages.shift
41
+ else
42
+ @downloaded_pages.shift(count)
43
+ end
44
+ end
45
+
46
+ def fetch_pending(count=nil)
47
+ if count.nil?
48
+ @queue.shift
49
+ else
50
+ @queue.shift(count)
51
+ end
52
+ end
53
+
54
+ def add_downloaded(page)
55
+ @downloaded_pages.push page
56
+ end
57
+
58
+ def add_error_page(page)
59
+ @error_pages.push page
60
+ end
61
+
62
+ def has_work?
63
+ @queue.count > 0 || @downloaded_pages.count > 0
64
+ end
65
+
66
+ end
67
+ end
68
+ end
69
+
70
+ Scruber::Queue.add_adapter(:memory, Scruber::QueueAdapters::Memory)
@@ -0,0 +1,3 @@
1
+ module Scruber
2
+ VERSION = "0.1.1"
3
+ end
data/lib/scruber.rb ADDED
@@ -0,0 +1,69 @@
1
+ require "scruber/version"
2
+ require 'nokogiri'
3
+ require 'http-cookie'
4
+ require 'pickup'
5
+ require 'csv'
6
+ require 'active_support'
7
+ require 'active_support/core_ext/object'
8
+ require 'active_support/core_ext/hash'
9
+
10
+ require "scruber/fetcher"
11
+ require "scruber/fetcher_adapters/abstract_adapter"
12
+ require "scruber/fetcher_adapters/typhoeus_fetcher"
13
+
14
+ require "scruber/queue"
15
+ require "scruber/queue_adapters/abstract_adapter"
16
+ require "scruber/queue_adapters/memory"
17
+
18
+ require "scruber/core/page_format"
19
+ require "scruber/core/page_format/base"
20
+ require "scruber/core/page_format/xml"
21
+ require "scruber/core/page_format/html"
22
+
23
+ require "scruber/core/extensions/base"
24
+ require "scruber/core/extensions/loop"
25
+ require "scruber/core/extensions/csv_output"
26
+
27
+ # require "scruber/core/configuration"
28
+ # require "scruber/core/configuration"
29
+
30
+ module Scruber
31
+ class ArgumentError < ::ArgumentError; end
32
+ module Core
33
+ autoload :Configuration, "scruber/core/configuration"
34
+ autoload :Crawler, "scruber/core/crawler"
35
+ end
36
+
37
+ module Helpers
38
+ autoload :UserAgentRotator, "scruber/helpers/user_agent_rotator"
39
+ autoload :ProxyRotator, "scruber/helpers/proxy_rotator"
40
+ autoload :FetcherAgent, "scruber/helpers/fetcher_agent"
41
+ module FetcherAgentAdapters
42
+ autoload :AbstractAdapter, "scruber/helpers/fetcher_agent_adapters/abstract_adapter"
43
+ autoload :Memory, "scruber/helpers/fetcher_agent_adapters/memory"
44
+ end
45
+ autoload :DictionaryReader, "scruber/helpers/dictionary_reader"
46
+ module DictionaryReader
47
+ autoload :Xml, "scruber/helpers/dictionary_reader/xml"
48
+ autoload :Csv, "scruber/helpers/dictionary_reader/csv"
49
+ end
50
+ end
51
+
52
+ class << self
53
+ attr_writer :configuration
54
+
55
+ def run(options={}, &block)
56
+ raise "You need a block to build!" unless block_given?
57
+
58
+ Core::Crawler.new(options).run(&block)
59
+ end
60
+
61
+ def configuration
62
+ @configuration ||= Core::Configuration.new
63
+ end
64
+
65
+ def configure(&block)
66
+ yield configuration
67
+ end
68
+ end
69
+ end
data/scruber.gemspec ADDED
@@ -0,0 +1,43 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "scruber/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scruber"
8
+ spec.version = Scruber::VERSION
9
+ spec.authors = ["Ivan Goncharov"]
10
+ spec.email = ["revis0r.mob@gmail.com"]
11
+
12
+ spec.summary = %q{Crawling framework}
13
+ spec.description = %q{Crawling framework}
14
+ spec.homepage = "https://github.com/scruber/scruber"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ if spec.respond_to?(:metadata)
20
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against " \
23
+ "public gem pushes."
24
+ end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_dependency "typhoeus", "1.1.2"
34
+ spec.add_dependency "pickup", "0.0.11"
35
+ spec.add_dependency "nokogiri", "1.8.2"
36
+ spec.add_dependency "http-cookie", "1.0.3"
37
+ spec.add_dependency "activesupport", "5.1.5"
38
+ spec.add_runtime_dependency "thor", "0.20.0"
39
+ spec.add_development_dependency "bundler", "~> 1.15"
40
+ spec.add_development_dependency "rake", "~> 10.0"
41
+ spec.add_development_dependency "rspec", "~> 3.0"
42
+ spec.add_development_dependency "webmock", "3.0.1"
43
+ end
metadata ADDED
@@ -0,0 +1,233 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scruber
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Ivan Goncharov
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-03-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: typhoeus
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.1.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.1.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: pickup
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.0.11
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.0.11
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.8.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.8.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: http-cookie
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 1.0.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 1.0.3
69
+ - !ruby/object:Gem::Dependency
70
+ name: activesupport
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 5.1.5
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 5.1.5
83
+ - !ruby/object:Gem::Dependency
84
+ name: thor
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 0.20.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 0.20.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: bundler
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.15'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.15'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rake
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '10.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '10.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rspec
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '3.0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '3.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: webmock
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '='
144
+ - !ruby/object:Gem::Version
145
+ version: 3.0.1
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '='
151
+ - !ruby/object:Gem::Version
152
+ version: 3.0.1
153
+ description: Crawling framework
154
+ email:
155
+ - revis0r.mob@gmail.com
156
+ executables:
157
+ - scruber
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - ".gitignore"
162
+ - ".rspec"
163
+ - ".travis.yml"
164
+ - Gemfile
165
+ - LICENSE.txt
166
+ - README.md
167
+ - Rakefile
168
+ - bin/console
169
+ - bin/setup
170
+ - exe/scruber
171
+ - lib/scruber.rb
172
+ - lib/scruber/app_searcher.rb
173
+ - lib/scruber/cli.rb
174
+ - lib/scruber/cli/project_generator.rb
175
+ - lib/scruber/cli/templates/Gemfile.tt
176
+ - lib/scruber/cli/templates/application.tt
177
+ - lib/scruber/cli/templates/bin/scruber.tt
178
+ - lib/scruber/cli/templates/boot.tt
179
+ - lib/scruber/cli/templates/gitignore.tt
180
+ - lib/scruber/cli/templates/initializers/proxies.tt
181
+ - lib/scruber/cli/templates/initializers/user_agents.tt
182
+ - lib/scruber/cli/templates/scrapers/sample.tt
183
+ - lib/scruber/core/configuration.rb
184
+ - lib/scruber/core/crawler.rb
185
+ - lib/scruber/core/extensions/base.rb
186
+ - lib/scruber/core/extensions/csv_output.rb
187
+ - lib/scruber/core/extensions/loop.rb
188
+ - lib/scruber/core/page_format.rb
189
+ - lib/scruber/core/page_format/base.rb
190
+ - lib/scruber/core/page_format/html.rb
191
+ - lib/scruber/core/page_format/xml.rb
192
+ - lib/scruber/fetcher.rb
193
+ - lib/scruber/fetcher_adapters/abstract_adapter.rb
194
+ - lib/scruber/fetcher_adapters/typhoeus_fetcher.rb
195
+ - lib/scruber/helpers/dictionary_reader.rb
196
+ - lib/scruber/helpers/dictionary_reader/csv.rb
197
+ - lib/scruber/helpers/dictionary_reader/xml.rb
198
+ - lib/scruber/helpers/fetcher_agent.rb
199
+ - lib/scruber/helpers/fetcher_agent_adapters/abstract_adapter.rb
200
+ - lib/scruber/helpers/fetcher_agent_adapters/memory.rb
201
+ - lib/scruber/helpers/proxy_rotator.rb
202
+ - lib/scruber/helpers/user_agent_rotator.rb
203
+ - lib/scruber/queue.rb
204
+ - lib/scruber/queue_adapters/abstract_adapter.rb
205
+ - lib/scruber/queue_adapters/memory.rb
206
+ - lib/scruber/version.rb
207
+ - scruber.gemspec
208
+ homepage: https://github.com/scruber/scruber
209
+ licenses:
210
+ - MIT
211
+ metadata:
212
+ allowed_push_host: https://rubygems.org
213
+ post_install_message:
214
+ rdoc_options: []
215
+ require_paths:
216
+ - lib
217
+ required_ruby_version: !ruby/object:Gem::Requirement
218
+ requirements:
219
+ - - ">="
220
+ - !ruby/object:Gem::Version
221
+ version: '0'
222
+ required_rubygems_version: !ruby/object:Gem::Requirement
223
+ requirements:
224
+ - - ">="
225
+ - !ruby/object:Gem::Version
226
+ version: '0'
227
+ requirements: []
228
+ rubyforge_project:
229
+ rubygems_version: 2.6.14
230
+ signing_key:
231
+ specification_version: 4
232
+ summary: Crawling framework
233
+ test_files: []