simple-scraper 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.rubocop.yml +33 -0
- data/.simplecov +4 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +71 -0
- data/LICENSE.txt +21 -0
- data/README.md +127 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/simple/scraper.rb +32 -0
- data/lib/simple/scraper/configuration.rb +21 -0
- data/lib/simple/scraper/finder.rb +23 -0
- data/lib/simple/scraper/parser.rb +24 -0
- data/lib/simple/scraper/version.rb +5 -0
- data/simple-scraper.gemspec +34 -0
- metadata +188 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 738f971a98b52a7e72b34a7d04c1265d61e5f58358f1353308547b2d3e2ac40a
|
4
|
+
data.tar.gz: 53c9617d9fc9ee52006b47c7474b1ed4612ac05441c59bf08a9f9d7c0b9087ea
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9b1acd300ae5549d8c4287f224b493bc5e1bdf6f4e99b23957eea18b85cf07ac07ba1701ad6012e867bc0fa0259b3a6a1caa927ed72c1c6ddaa66ffb3f908550
|
7
|
+
data.tar.gz: 167ac5fb740df8131272e4296d8c66d5a179b6bd526e78d00f3ddee6be9721830fa59b1db5d91b0b43d84f686480e1951ad36106910244f577c906d4d3342857
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
|
2
|
+
AllCops:
|
3
|
+
TargetRubyVersion: 2.6.1
|
4
|
+
|
5
|
+
Style/FrozenStringLiteralComment:
|
6
|
+
Enabled: false
|
7
|
+
|
8
|
+
Style/Documentation:
|
9
|
+
Enabled: false
|
10
|
+
|
11
|
+
Style/ClassAndModuleChildren:
|
12
|
+
Enabled: false
|
13
|
+
|
14
|
+
Metrics/LineLength:
|
15
|
+
Max: 200
|
16
|
+
|
17
|
+
Metrics/ClassLength:
|
18
|
+
Max: 350
|
19
|
+
|
20
|
+
Metrics/ModuleLength:
|
21
|
+
Max: 350
|
22
|
+
|
23
|
+
Metrics/AbcSize:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 30
|
28
|
+
|
29
|
+
Metrics/CyclomaticComplexity:
|
30
|
+
Max: 7
|
31
|
+
|
32
|
+
Metrics/BlockLength:
|
33
|
+
Max: 30
|
data/.simplecov
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
simple-scraper (1.0.0)
|
5
|
+
httparty (~> 0.16.4)
|
6
|
+
nokogiri (~> 1.6)
|
7
|
+
parallel (~> 1.11)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
addressable (2.6.0)
|
13
|
+
public_suffix (>= 2.0.2, < 4.0)
|
14
|
+
crack (0.4.3)
|
15
|
+
safe_yaml (~> 1.0.0)
|
16
|
+
diff-lcs (1.3)
|
17
|
+
docile (1.3.1)
|
18
|
+
hashdiff (0.3.8)
|
19
|
+
httparty (0.16.4)
|
20
|
+
mime-types (~> 3.0)
|
21
|
+
multi_xml (>= 0.5.2)
|
22
|
+
json (2.2.0)
|
23
|
+
mime-types (3.2.2)
|
24
|
+
mime-types-data (~> 3.2015)
|
25
|
+
mime-types-data (3.2019.0331)
|
26
|
+
mini_portile2 (2.4.0)
|
27
|
+
multi_xml (0.6.0)
|
28
|
+
nokogiri (1.10.3)
|
29
|
+
mini_portile2 (~> 2.4.0)
|
30
|
+
parallel (1.17.0)
|
31
|
+
public_suffix (3.0.3)
|
32
|
+
rake (10.5.0)
|
33
|
+
rspec (3.8.0)
|
34
|
+
rspec-core (~> 3.8.0)
|
35
|
+
rspec-expectations (~> 3.8.0)
|
36
|
+
rspec-mocks (~> 3.8.0)
|
37
|
+
rspec-core (3.8.0)
|
38
|
+
rspec-support (~> 3.8.0)
|
39
|
+
rspec-expectations (3.8.2)
|
40
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
41
|
+
rspec-support (~> 3.8.0)
|
42
|
+
rspec-mocks (3.8.0)
|
43
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
44
|
+
rspec-support (~> 3.8.0)
|
45
|
+
rspec-support (3.8.0)
|
46
|
+
safe_yaml (1.0.5)
|
47
|
+
simplecov (0.16.1)
|
48
|
+
docile (~> 1.1)
|
49
|
+
json (>= 1.8, < 3)
|
50
|
+
simplecov-html (~> 0.10.0)
|
51
|
+
simplecov-html (0.10.2)
|
52
|
+
vcr (3.0.3)
|
53
|
+
webmock (3.5.1)
|
54
|
+
addressable (>= 2.3.6)
|
55
|
+
crack (>= 0.3.2)
|
56
|
+
hashdiff
|
57
|
+
|
58
|
+
PLATFORMS
|
59
|
+
ruby
|
60
|
+
|
61
|
+
DEPENDENCIES
|
62
|
+
bundler (~> 1.17)
|
63
|
+
rake (~> 10.0)
|
64
|
+
rspec (~> 3.0)
|
65
|
+
simple-scraper!
|
66
|
+
simplecov (~> 0.16.1)
|
67
|
+
vcr (~> 3.0)
|
68
|
+
webmock (~> 3.5)
|
69
|
+
|
70
|
+
BUNDLED WITH
|
71
|
+
1.17.3
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2019 Codica.com
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
# Simple Scraper
|
2
|
+
|
3
|
+
This is a fairly simple gem that will help you simplify the parsing of web pages.
|
4
|
+
|
5
|
+
## How it works
|
6
|
+
|
7
|
+
Gem is based on several libraries that do most of the work:
|
8
|
+
- [HTTParty](https://github.com/jnunemaker/httparty) is an HTTP client
|
9
|
+
- [Parallel](https://github.com/grosser/parallel) allows performing queries in multiple threads
|
10
|
+
- [Nokogiri](https://github.com/sparklemotion/nokogiri) is an HTML, XML, SAX, and Reader parser
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'simple-scraper'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle
|
23
|
+
|
24
|
+
Or install it yourself in the following way:
|
25
|
+
|
26
|
+
$ gem install simple-scraper
|
27
|
+
|
28
|
+
## Usage
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require 'simple/scraper'
|
32
|
+
|
33
|
+
scraper = Simple::Scraper::Parser.new(
|
34
|
+
title: { selector: "//h1[@class='title']", handler: ->(els) { els.first.text }, default: 'Ruby' },
|
35
|
+
summary: { selector: "//h2[@class='summary']", handler: ->(els) { els.first.text } },
|
36
|
+
link: { selector: "//a[@class='link']", handler: ->(els) { els.first['href'] } },
|
37
|
+
text_array: { selector: "//*[@class='link']", handler: ->(els) { els.map(&:text) } }
|
38
|
+
)
|
39
|
+
|
40
|
+
result1 = scraper.parse('https://www.codica.com/')
|
41
|
+
result2 = scraper.parse(['https://www.codica.com/1', 'https://www.codica.com/2'])
|
42
|
+
```
|
43
|
+
The response will be similar to:
|
44
|
+
```json
|
45
|
+
[
|
46
|
+
{
|
47
|
+
"title": "scraped title text",
|
48
|
+
"summary": "scraped summary text",
|
49
|
+
"link": "https://www.codica.com/blog/top-ruby-gems-we-cant-live-without/",
|
50
|
+
"text_array": ["text", "text" ...]
|
51
|
+
},
|
52
|
+
...
|
53
|
+
]
|
54
|
+
```
|
55
|
+
Or just find a page:
|
56
|
+
```ruby
|
57
|
+
Simple::Scraper::Finder.find(url: 'https://www.codica.com/', query: {}, headers: {}) do |page|
|
58
|
+
# page is an instance of Nokogiri::HTML::Document
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
### Scraper attributes
|
63
|
+
|
64
|
+
- *`title, summary, link, text_array`* - Random hash keys, they may be whatever you want.
|
65
|
+
- *`selector`* - XPath. With its help you can find desired elements on the page.
|
66
|
+
- *`handler`* - Any ruby object that can respond to `#call` method (`proc`, `lambda` or plain ruby class that has defined `#call` method). One argument will be passed to the handler which is an array of the elements found on the page. Each element is an instance of `Nokogiri::XML::Element`. You can read [Nokogiri](https://github.com/sparklemotion/nokogiri) documentation for more info.
|
67
|
+
- *`default`* - In case scraper cannot find the desired element using `selector`, the value provided for the `default` attribute will be returned.
|
68
|
+
|
69
|
+
### Query parameters and headers
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
query = { page: 2 }
|
73
|
+
headers = { 'Authorization': 'Bearer' }
|
74
|
+
result = scraper.parse('https://www.codica.com/', query: query, headers: headers)
|
75
|
+
```
|
76
|
+
|
77
|
+
## Configuration
|
78
|
+
|
79
|
+
### Proxy
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
Simple::Scraper.configure do |config|
|
83
|
+
config.proxy_addr = 'proxy.something.com'
|
84
|
+
config.proxy_port = 80
|
85
|
+
config.proxy_user = 'user:'
|
86
|
+
config.proxy_pass = 'password'
|
87
|
+
end
|
88
|
+
```
|
89
|
+
|
90
|
+
### Logging
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
Simple::Scraper.configure do |config|
|
94
|
+
config.logger = Logger.new('path/to/my/logs')
|
95
|
+
end
|
96
|
+
```
|
97
|
+
> By default the logging is turned off
|
98
|
+
|
99
|
+
### Multithreading
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
Simple::Scraper.configure do |config|
|
103
|
+
config.number_of_threads = 20
|
104
|
+
end
|
105
|
+
```
|
106
|
+
> By default scraper works in 1 thread.
|
107
|
+
|
108
|
+
### Reset
|
109
|
+
|
110
|
+
You might need to reset configuration to defaults
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
Simple::Scraper.reset
|
114
|
+
```
|
115
|
+
|
116
|
+
> Now you can provide new configuration if needed
|
117
|
+
|
118
|
+
## License
|
119
|
+
Copyright © 2015-2019 Codica. It is released under the [MIT License](https://opensource.org/licenses/MIT).
|
120
|
+
|
121
|
+
## About Codica
|
122
|
+
|
123
|
+
[![Codica logo](https://www.codica.com/assets/images/logo/logo.svg)](https://www.codica.com)
|
124
|
+
|
125
|
+
simple-scraper is maintained and funded by Codica. The names and logos for Codica are trademarks of Codica.
|
126
|
+
|
127
|
+
We love open source software! See [our other projects](https://github.com/codica2) or [hire us](https://www.codica.com/) to design, develop, and grow your product.
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'simple/scraper'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require 'irb'
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'httparty'
|
3
|
+
require 'parallel'
|
4
|
+
|
5
|
+
require 'simple/scraper/version'
|
6
|
+
require 'simple/scraper/configuration'
|
7
|
+
require 'simple/scraper/finder'
|
8
|
+
require 'simple/scraper/parser'
|
9
|
+
|
10
|
+
module Simple
|
11
|
+
module Scraper
|
12
|
+
class << self
|
13
|
+
attr_writer :configuration, :logger
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.configuration
|
17
|
+
@configuration ||= Configuration.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.reset
|
21
|
+
@configuration = Configuration.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.configure
|
25
|
+
yield(configuration)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.logger
|
29
|
+
configuration.logger
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Simple
|
2
|
+
module Scraper
|
3
|
+
class Configuration
|
4
|
+
attr_accessor :proxy_addr,
|
5
|
+
:proxy_port,
|
6
|
+
:proxy_user,
|
7
|
+
:proxy_pass,
|
8
|
+
:number_of_threads,
|
9
|
+
:logger
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@proxy_addr = nil
|
13
|
+
@proxy_port = nil
|
14
|
+
@proxy_user = nil
|
15
|
+
@proxy_pass = nil
|
16
|
+
@logger = nil
|
17
|
+
@number_of_threads = 1
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Simple
|
2
|
+
module Scraper
|
3
|
+
class Finder
|
4
|
+
def self.find(url:, query: {}, headers: {})
|
5
|
+
default_options[:query] = query
|
6
|
+
default_options[:headers] = headers
|
7
|
+
yield(Nokogiri::HTML(HTTParty.get(url, default_options)))
|
8
|
+
rescue StandardError => e
|
9
|
+
Simple::Scraper.logger&.error e
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.default_options
|
13
|
+
@default_options ||= {
|
14
|
+
http_proxyaddr: Simple::Scraper.configuration.proxy_addr,
|
15
|
+
http_proxyport: Simple::Scraper.configuration.proxy_port,
|
16
|
+
http_proxyuser: Simple::Scraper.configuration.proxy_user,
|
17
|
+
http_proxypass: Simple::Scraper.configuration.proxy_pass,
|
18
|
+
verify: false
|
19
|
+
}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Simple
|
2
|
+
module Scraper
|
3
|
+
class Parser
|
4
|
+
attr_reader :attributes
|
5
|
+
|
6
|
+
def initialize(attributes)
|
7
|
+
@attributes = attributes || {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(urls, query: {}, headers: {})
|
11
|
+
Parallel.map(Array(urls), in_threads: Simple::Scraper.configuration.number_of_threads) do |url|
|
12
|
+
Finder.find(url: url, query: query, headers: headers) do |page|
|
13
|
+
attributes.each_with_object({}) do |(key, options), hsh|
|
14
|
+
hsh[key] = options[:handler].call(page.xpath(options[:selector]))
|
15
|
+
rescue StandardError => e
|
16
|
+
Simple::Scraper.logger&.error e
|
17
|
+
hsh[key] = options[:default]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end.compact
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'simple/scraper/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'simple-scraper'
|
7
|
+
spec.version = Simple::Scraper::VERSION
|
8
|
+
spec.authors = ['Igor Tikhonenko']
|
9
|
+
spec.email = ['tikhonenkoigor@gmail.com']
|
10
|
+
|
11
|
+
spec.summary = 'Library for parsing/scraping web pages.'
|
12
|
+
spec.description = 'Library was built on top of nokogiri, parallel and httparty gems that do most of the work'
|
13
|
+
spec.homepage = 'https://github.com/codica2/simple-scraper'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
# Specify which files should be added to the gem when it is released.
|
17
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
18
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = 'exe'
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ['lib']
|
24
|
+
|
25
|
+
spec.add_development_dependency 'bundler', '~> 1.17'
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
27
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
28
|
+
spec.add_development_dependency 'vcr', '~> 3.0'
|
29
|
+
spec.add_development_dependency 'webmock', '~> 3.5'
|
30
|
+
spec.add_development_dependency 'simplecov', '~> 0.16.1'
|
31
|
+
spec.add_dependency 'httparty', '~> 0.16.4'
|
32
|
+
spec.add_dependency 'nokogiri', '~> 1.6'
|
33
|
+
spec.add_dependency 'parallel', '~> 1.11'
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,188 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple-scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Igor Tikhonenko
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-04-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.17'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.17'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: vcr
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: webmock
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.5'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.5'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: simplecov
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.16.1
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.16.1
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: httparty
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.16.4
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.16.4
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.6'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.6'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: parallel
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '1.11'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '1.11'
|
139
|
+
description: Library was built on top of nokogiri, parallel and httparty gems that
|
140
|
+
do most of the work
|
141
|
+
email:
|
142
|
+
- tikhonenkoigor@gmail.com
|
143
|
+
executables: []
|
144
|
+
extensions: []
|
145
|
+
extra_rdoc_files: []
|
146
|
+
files:
|
147
|
+
- ".gitignore"
|
148
|
+
- ".rspec"
|
149
|
+
- ".rubocop.yml"
|
150
|
+
- ".simplecov"
|
151
|
+
- ".travis.yml"
|
152
|
+
- Gemfile
|
153
|
+
- Gemfile.lock
|
154
|
+
- LICENSE.txt
|
155
|
+
- README.md
|
156
|
+
- Rakefile
|
157
|
+
- bin/console
|
158
|
+
- bin/setup
|
159
|
+
- lib/simple/scraper.rb
|
160
|
+
- lib/simple/scraper/configuration.rb
|
161
|
+
- lib/simple/scraper/finder.rb
|
162
|
+
- lib/simple/scraper/parser.rb
|
163
|
+
- lib/simple/scraper/version.rb
|
164
|
+
- simple-scraper.gemspec
|
165
|
+
homepage: https://github.com/codica2/simple-scraper
|
166
|
+
licenses:
|
167
|
+
- MIT
|
168
|
+
metadata: {}
|
169
|
+
post_install_message:
|
170
|
+
rdoc_options: []
|
171
|
+
require_paths:
|
172
|
+
- lib
|
173
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
174
|
+
requirements:
|
175
|
+
- - ">="
|
176
|
+
- !ruby/object:Gem::Version
|
177
|
+
version: '0'
|
178
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
179
|
+
requirements:
|
180
|
+
- - ">="
|
181
|
+
- !ruby/object:Gem::Version
|
182
|
+
version: '0'
|
183
|
+
requirements: []
|
184
|
+
rubygems_version: 3.0.2
|
185
|
+
signing_key:
|
186
|
+
specification_version: 4
|
187
|
+
summary: Library for parsing/scraping web pages.
|
188
|
+
test_files: []
|