scraped 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rubocop.yml +8 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +20 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +204 -0
- data/Rakefile +13 -0
- data/bin/console +10 -0
- data/bin/setup +8 -0
- data/lib/scraped.rb +42 -0
- data/lib/scraped/core_ext.rb +5 -0
- data/lib/scraped/html.rb +19 -0
- data/lib/scraped/request.rb +32 -0
- data/lib/scraped/request/strategy.rb +20 -0
- data/lib/scraped/request/strategy/live_request.rb +26 -0
- data/lib/scraped/response.rb +12 -0
- data/lib/scraped/response/decorator.rb +34 -0
- data/lib/scraped/response/decorator/absolute_urls.rb +25 -0
- data/lib/scraped/response_decorator.rb +23 -0
- data/lib/scraped/version.rb +3 -0
- data/scraped.gemspec +31 -0
- metadata +177 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7f71471b758c81074f1ed52e7d06ee9e2ee7df49
|
4
|
+
data.tar.gz: a61a2f95fcf2a889aa077fae49f38b387e08accf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c7d4c5948c39db02b97723fd0dec0b916e395526f1eadb62f455d0ab8875281c5c0111ec791fc3924c5605a37dfbc2cd5f635ba91e3200c62213bf648a0170d9
|
7
|
+
data.tar.gz: 78f1da053d76b752da56cc3da2d4f341e65ca5cf047bebe40f802fe2e1744d0fb86eaccca1177f5bdd756b6b77b934a4368874272f644c1c6bfad9450944a2d1
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Change Log
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
6
|
+
and this project adheres to [Semantic Versioning](http://semver.org/).
|
7
|
+
|
8
|
+
## 0.1.0 - 2017-01-04
|
9
|
+
|
10
|
+
### Added
|
11
|
+
|
12
|
+
- Support for creating HTML scrapers.
|
13
|
+
- Scraper classes can handle sections of a page.
|
14
|
+
- Custom request logic via request strategies. This could be used to fetch
|
15
|
+
responses from an archive or a local cache.
|
16
|
+
- Custom response decorators for altering the response status, headers and body
|
17
|
+
before it gets to the scraper class.
|
18
|
+
- Built-in response decorator for making link and image urls absolute.
|
19
|
+
- `String#tidy` method which cleans up various space characters and then strips
|
20
|
+
leading and trailing whitespace.
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 UK Citizens Online Democracy
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
# Scraped
|
2
|
+
|
3
|
+
Write declarative scrapers in Ruby
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'scraped'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install scraped
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
To write a standard HTML scraper, start by creating a subclass of
|
24
|
+
`Scraped::HTML` for each _type_ of page you wish to scrape.
|
25
|
+
|
26
|
+
For example if you were scraping a list of people you might have a
|
27
|
+
`PeopleListPage` class for the list page and a `PersonPage` class for an
|
28
|
+
individual person's page.
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require 'scraped'
|
32
|
+
|
33
|
+
class ExamplePage < Scraped::HTML
|
34
|
+
field :title do
|
35
|
+
noko.at_css('h1').text
|
36
|
+
end
|
37
|
+
|
38
|
+
field :more_information do
|
39
|
+
noko.at_css('a')[:href]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
```
|
43
|
+
|
44
|
+
Then you can create a new instance and pass in a `Scraped::Response` instance.
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
page = ExamplePage.new(response: Scraped::Request.new(url: 'http://example.com').response)
|
48
|
+
|
49
|
+
page.title
|
50
|
+
# => "Example Domain"
|
51
|
+
|
52
|
+
page.more_information
|
53
|
+
# => "http://www.iana.org/domains/reserved"
|
54
|
+
|
55
|
+
page.to_h
|
56
|
+
# => { :title => "Example Domain", :more_information => "http://www.iana.org/domains/reserved" }
|
57
|
+
```
|
58
|
+
|
59
|
+
### Dealing with sections of a page
|
60
|
+
|
61
|
+
When writing an HTML scraper you'll often need to deal with just a part of the page.
|
62
|
+
For example you might want to scrape a table containing a list of people and some
|
63
|
+
associated data.
|
64
|
+
|
65
|
+
To do this you can use the `fragment` method, passing it a hash with one entry
|
66
|
+
where the key is the `noko` fragment you want to use and the value is the class
|
67
|
+
that should handle that fragment.
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
class MemberRow < Scraped::HTML
|
71
|
+
field :name do
|
72
|
+
noko.css('td')[2].text
|
73
|
+
end
|
74
|
+
|
75
|
+
field :party do
|
76
|
+
noko.css('td')[3].text
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class AllMembersPage < Scraped::HTML
|
81
|
+
field :members do
|
82
|
+
noko.css('table.members-list tr').map do |row|
|
83
|
+
fragment row => MemberRow
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
```
|
88
|
+
|
89
|
+
## Extending
|
90
|
+
|
91
|
+
There are two main ways to extend `scraped` with your own custom logic - custom requests and decorated responses. Custom requests allow you to change where the scraper is getting its responses from, e.g. you might want to make requests to archive.org if the site you're scraping has disappeared. Decorated responses allow you to manipulate the response before it's passed to the scraper. Scraped comes with some [built in decorators](#built-in-decorators) for common tasks such as making all the link urls on the page absolute rather than relative.
|
92
|
+
|
93
|
+
### Custom request strategies
|
94
|
+
|
95
|
+
To make a custom request you'll need to create a class that subclasses `Scraped::Request::Strategy` and defines a `response` method.
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
class FileOnDiskRequest < Scraped::Request::Strategy
|
99
|
+
def response
|
100
|
+
{ body: open(filename).read }
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def filename
|
106
|
+
@filename ||= File.join(URI.parse(url).host, Digest::SHA1.hexdigest(url))
|
107
|
+
end
|
108
|
+
end
|
109
|
+
```
|
110
|
+
|
111
|
+
The `response` method should return a `Hash` which has at least a `body` key. You can also include `status` and `headers` parameters in the hash to fill out those fields in the response. If not given, status will default to `200` (OK) and headers will default to `{}`.
|
112
|
+
|
113
|
+
To use a custom request strategy pass it to `Scraped::Request`:
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
request = Scraped::Request.new(url: 'http://example.com', strategies: [FileOnDiskRequest, Scraped::Request::Strategy::LiveRequest])
|
117
|
+
page = MyPersonPage.new(response: request.response)
|
118
|
+
```
|
119
|
+
|
120
|
+
### Decorated responses
|
121
|
+
|
122
|
+
To manipulate the response before it is processed by the scraper create a class that subclasses `Scraped::Response::Decorator` and defines any of the following methods: `body`, `url`, `status`, `headers`.
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
class AbsoluteLinks < Scraped::Response::Decorator
|
126
|
+
def body
|
127
|
+
doc = Nokogiri::HTML(super)
|
128
|
+
doc.css('a').each do |link|
|
129
|
+
link[:href] = URI.join(url, link[:href]).to_s
|
130
|
+
end
|
131
|
+
doc.to_s
|
132
|
+
end
|
133
|
+
end
|
134
|
+
```
|
135
|
+
|
136
|
+
As well as the `body` method you can also supply your own `url`, `status` and `headers` methods. You can access the current request body by calling `super` from your method. You can also call `url`, `headers` or `status` to access those properties of the current response.
|
137
|
+
|
138
|
+
To use a response decorator you need to use the `decorator` class method in a `Scraped::HTML` subclass:
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
class PageWithRelativeLinks < Scraped::HTML
|
142
|
+
decorator AbsoluteLinks
|
143
|
+
|
144
|
+
# Other fields...
|
145
|
+
end
|
146
|
+
```
|
147
|
+
|
148
|
+
### Configuring requests and responses
|
149
|
+
|
150
|
+
When passing an array of request strategies or response decorators you should always pass the class, rather than the instance. If you want to configure an instance you can pass in a two element array where the first element is the class and the second element is the config:
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
class CustomHeader < Scraped::Response::Decorator
|
154
|
+
def headers
|
155
|
+
response.headers.merge('X-Greeting' => config[:greeting])
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class ExamplePage < Scraped::HTML
|
160
|
+
decorator CustomHeader, greeting: 'Hello, world'
|
161
|
+
end
|
162
|
+
```
|
163
|
+
|
164
|
+
With the above code a custom header would be added to the response: `X-Greeting: Hello, world`.
|
165
|
+
|
166
|
+
#### Inheritance with decorators
|
167
|
+
|
168
|
+
When you inherit from a class that already has decorators the child class will also inherit the parent's decorators. There's currently no way to re-order or remove decorators in child classes, though that _may_ be added in the future.
|
169
|
+
|
170
|
+
### Built in decorators
|
171
|
+
|
172
|
+
#### Absolute link and image urls
|
173
|
+
|
174
|
+
Very frequently you will find that you need to make links and images on the page
|
175
|
+
you are scraping absolute rather than relative. Scraped comes with support for
|
176
|
+
this out of the box via the `Scraped::Response::Decorator::AbsoluteUrls`
|
177
|
+
decorator.
|
178
|
+
|
179
|
+
```ruby
|
180
|
+
require 'scraped'
|
181
|
+
|
182
|
+
class MemberPage < Scraped::HTML
|
183
|
+
decorator Scraped::Response::Decorator::AbsoluteUrls
|
184
|
+
|
185
|
+
field :image do
|
186
|
+
# Image url will be absolute thanks to the decorator.
|
187
|
+
noko.at_css('.profile-picture/@src').text
|
188
|
+
end
|
189
|
+
end
|
190
|
+
```
|
191
|
+
|
192
|
+
## Development
|
193
|
+
|
194
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
195
|
+
|
196
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
197
|
+
|
198
|
+
## Contributing
|
199
|
+
|
200
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/everypolitician/scraped.
|
201
|
+
|
202
|
+
## License
|
203
|
+
|
204
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
Rake::TestTask.new(:test) do |t|
|
5
|
+
t.libs << 'test'
|
6
|
+
t.libs << 'lib'
|
7
|
+
t.test_files = FileList['test/**/*_test.rb']
|
8
|
+
end
|
9
|
+
|
10
|
+
require 'rubocop/rake_task'
|
11
|
+
RuboCop::RakeTask.new
|
12
|
+
|
13
|
+
task default: %i(test rubocop)
|
data/bin/console
ADDED
data/bin/setup
ADDED
data/lib/scraped.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'field_serializer'
|
4
|
+
require 'require_all'
|
5
|
+
require_rel 'scraped'
|
6
|
+
|
7
|
+
# Abstract class which scrapers can extend to implement their functionality.
|
8
|
+
class Scraped
|
9
|
+
include FieldSerializer
|
10
|
+
|
11
|
+
def self.decorator(klass, config = {})
|
12
|
+
decorators << config.merge(decorator: klass)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.decorators
|
16
|
+
@decorators ||= []
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.inherited(klass)
|
20
|
+
klass.decorators.concat(decorators)
|
21
|
+
super
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(response:)
|
25
|
+
@original_response = response
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :original_response
|
31
|
+
|
32
|
+
def response
|
33
|
+
@response ||= ResponseDecorator.new(
|
34
|
+
response: original_response,
|
35
|
+
decorators: self.class.decorators
|
36
|
+
).response
|
37
|
+
end
|
38
|
+
|
39
|
+
def url
|
40
|
+
response.url
|
41
|
+
end
|
42
|
+
end
|
data/lib/scraped/html.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
class Scraped
|
2
|
+
class HTML < Scraped
|
3
|
+
private
|
4
|
+
|
5
|
+
def initialize(noko: nil, **args)
|
6
|
+
super(**args)
|
7
|
+
@noko = noko
|
8
|
+
end
|
9
|
+
|
10
|
+
def noko
|
11
|
+
@noko ||= Nokogiri::HTML(response.body)
|
12
|
+
end
|
13
|
+
|
14
|
+
def fragment(mapping)
|
15
|
+
noko_fragment, klass = mapping.to_a.first
|
16
|
+
klass.new(noko: noko_fragment, response: response)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'scraped/request/strategy/live_request'
|
2
|
+
require 'scraped/response'
|
3
|
+
|
4
|
+
class Scraped
|
5
|
+
class Request
|
6
|
+
def initialize(url:, strategies: [Strategy::LiveRequest])
|
7
|
+
@url = url
|
8
|
+
@strategies = strategies
|
9
|
+
end
|
10
|
+
|
11
|
+
def response(decorators: [])
|
12
|
+
abort "Failed to fetch #{url}" if first_successful_response.nil?
|
13
|
+
response = Response.new(first_successful_response.merge(url: url))
|
14
|
+
ResponseDecorator.new(response: response, decorators: decorators).response
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_reader :url, :strategies
|
20
|
+
|
21
|
+
def first_successful_response
|
22
|
+
@first_successful_response ||=
|
23
|
+
strategies.lazy.map do |strategy_config|
|
24
|
+
unless strategy_config.respond_to?(:delete)
|
25
|
+
strategy_config = { strategy: strategy_config }
|
26
|
+
end
|
27
|
+
strategy_class = strategy_config.delete(:strategy)
|
28
|
+
strategy_class.new(url: url, config: strategy_config).response
|
29
|
+
end.reject(&:nil?).first
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Scraped
|
2
|
+
class Request
|
3
|
+
class Strategy
|
4
|
+
class NotImplementedError < StandardError; end
|
5
|
+
|
6
|
+
def initialize(url:, config: {})
|
7
|
+
@url = url
|
8
|
+
@config = config.to_h
|
9
|
+
end
|
10
|
+
|
11
|
+
def response
|
12
|
+
raise NotImplementedError, "No #{self.class}#response method found"
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :url, :config
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'scraped/request/strategy'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
class Scraped
|
5
|
+
class Request
|
6
|
+
class Strategy
|
7
|
+
class LiveRequest < Strategy
|
8
|
+
def response
|
9
|
+
log "Fetching #{url}"
|
10
|
+
response = open(url)
|
11
|
+
{
|
12
|
+
status: response.status.first.to_i,
|
13
|
+
headers: response.meta,
|
14
|
+
body: response.read,
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def log(message)
|
21
|
+
warn "[#{self.class}] #{message}" if ENV.key?('VERBOSE')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class Scraped
|
2
|
+
class Response
|
3
|
+
class Decorator
|
4
|
+
def initialize(response:, config: {})
|
5
|
+
@response = response
|
6
|
+
@config = config.to_h
|
7
|
+
end
|
8
|
+
|
9
|
+
def decorated_response
|
10
|
+
Response.new(url: url, body: body, headers: headers, status: status)
|
11
|
+
end
|
12
|
+
|
13
|
+
def url
|
14
|
+
response.url
|
15
|
+
end
|
16
|
+
|
17
|
+
def body
|
18
|
+
response.body
|
19
|
+
end
|
20
|
+
|
21
|
+
def headers
|
22
|
+
response.headers
|
23
|
+
end
|
24
|
+
|
25
|
+
def status
|
26
|
+
response.status
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
attr_reader :response, :config
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
class Scraped
|
5
|
+
class Response
|
6
|
+
class Decorator
|
7
|
+
class AbsoluteUrls < Decorator
|
8
|
+
def body
|
9
|
+
Nokogiri::HTML(super).tap do |doc|
|
10
|
+
doc.css('img').each { |img| img[:src] = absolute_url(img[:src]) }
|
11
|
+
doc.css('a').each { |a| a[:href] = absolute_url(a[:href]) }
|
12
|
+
end.to_s
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def absolute_url(relative_url)
|
18
|
+
URI.join(url, relative_url) unless relative_url.to_s.empty?
|
19
|
+
rescue URI::InvalidURIError
|
20
|
+
relative_url
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class Scraped
|
2
|
+
class ResponseDecorator
|
3
|
+
def initialize(response:, decorators:)
|
4
|
+
@original_response = response
|
5
|
+
@decorators = decorators.to_a
|
6
|
+
end
|
7
|
+
|
8
|
+
def response
|
9
|
+
decorators.reduce(original_response) do |r, decorator_config|
|
10
|
+
unless decorator_config.respond_to?(:[])
|
11
|
+
decorator_config = { decorator: decorator_config }
|
12
|
+
end
|
13
|
+
decorator_class = decorator_config[:decorator]
|
14
|
+
decorator_class.new(response: r, config: decorator_config)
|
15
|
+
.decorated_response
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
attr_reader :original_response, :decorators
|
22
|
+
end
|
23
|
+
end
|
data/scraped.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'scraped/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'scraped'
|
8
|
+
spec.version = Scraped::VERSION
|
9
|
+
spec.authors = ['EveryPolitician']
|
10
|
+
spec.email = ['team@everypolitician.org']
|
11
|
+
|
12
|
+
spec.summary = 'Write declarative scrapers in Ruby'
|
13
|
+
spec.homepage = 'https://github.com/everypolitician/scraped'
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
|
+
f.match(%r{^(test|spec|features)/})
|
17
|
+
end
|
18
|
+
spec.bindir = 'exe'
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ['lib']
|
21
|
+
|
22
|
+
spec.add_runtime_dependency 'nokogiri'
|
23
|
+
spec.add_runtime_dependency 'field_serializer', '>= 0.3.0'
|
24
|
+
spec.add_runtime_dependency 'require_all'
|
25
|
+
|
26
|
+
spec.add_development_dependency 'bundler', '~> 1.13'
|
27
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
28
|
+
spec.add_development_dependency 'minitest', '~> 5.0'
|
29
|
+
spec.add_development_dependency 'pry', '~> 0.10'
|
30
|
+
spec.add_development_dependency 'rubocop', '~> 0.44'
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scraped
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- EveryPolitician
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: field_serializer
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.3.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.3.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: require_all
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.13'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.13'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: minitest
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '5.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '5.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pry
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.10'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.10'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rubocop
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0.44'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0.44'
|
125
|
+
description:
|
126
|
+
email:
|
127
|
+
- team@everypolitician.org
|
128
|
+
executables: []
|
129
|
+
extensions: []
|
130
|
+
extra_rdoc_files: []
|
131
|
+
files:
|
132
|
+
- ".gitignore"
|
133
|
+
- ".rubocop.yml"
|
134
|
+
- ".travis.yml"
|
135
|
+
- CHANGELOG.md
|
136
|
+
- Gemfile
|
137
|
+
- LICENSE.txt
|
138
|
+
- README.md
|
139
|
+
- Rakefile
|
140
|
+
- bin/console
|
141
|
+
- bin/setup
|
142
|
+
- lib/scraped.rb
|
143
|
+
- lib/scraped/core_ext.rb
|
144
|
+
- lib/scraped/html.rb
|
145
|
+
- lib/scraped/request.rb
|
146
|
+
- lib/scraped/request/strategy.rb
|
147
|
+
- lib/scraped/request/strategy/live_request.rb
|
148
|
+
- lib/scraped/response.rb
|
149
|
+
- lib/scraped/response/decorator.rb
|
150
|
+
- lib/scraped/response/decorator/absolute_urls.rb
|
151
|
+
- lib/scraped/response_decorator.rb
|
152
|
+
- lib/scraped/version.rb
|
153
|
+
- scraped.gemspec
|
154
|
+
homepage: https://github.com/everypolitician/scraped
|
155
|
+
licenses: []
|
156
|
+
metadata: {}
|
157
|
+
post_install_message:
|
158
|
+
rdoc_options: []
|
159
|
+
require_paths:
|
160
|
+
- lib
|
161
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ">="
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
|
+
requirements:
|
168
|
+
- - ">="
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
version: '0'
|
171
|
+
requirements: []
|
172
|
+
rubyforge_project:
|
173
|
+
rubygems_version: 2.5.2
|
174
|
+
signing_key:
|
175
|
+
specification_version: 4
|
176
|
+
summary: Write declarative scrapers in Ruby
|
177
|
+
test_files: []
|