scraped 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +9 -0
- data/lib/scraped/request.rb +4 -3
- data/lib/scraped/request/strategy.rb +3 -2
- data/lib/scraped/request/strategy/live_request.rb +1 -1
- data/lib/scraped/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ffbf1466cb9e85f66f2c41dbbf9ef26f25ee568d
|
4
|
+
data.tar.gz: 7106cfa4f03a70452a3f53ac0b3f30de8d912162
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3193bab3627b954eb7db141fcf219eb6b92151d88d6829179a39410eff47c6b0c8aa4639c01ef799167c3d8b804f21e3711c5228bbfe26173df87dd80caa54e2
|
7
|
+
data.tar.gz: 12d9d2e59a564bfa823f4575c2c13471a0042e8d0fb02606ba8f142578bcdaff35481fa51ea8acf2d67e64bfc95a1cce1a89138b4c9b7b304ac370189d72fd27
|
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
7
7
|
|
8
|
+
## 0.5.0 - 2017-03-17
|
9
|
+
|
10
|
+
### Added
|
11
|
+
|
12
|
+
- Add `headers:` parameter to `Scraped::Request.new` for passing request
|
13
|
+
headers to the underlying strategy.
|
14
|
+
|
8
15
|
## 0.4.0 - 2017-03-14
|
9
16
|
|
10
17
|
### Changed
|
data/README.md
CHANGED
@@ -86,6 +86,15 @@ class AllMembersPage < Scraped::HTML
|
|
86
86
|
end
|
87
87
|
```
|
88
88
|
|
89
|
+
### Passing request headers
|
90
|
+
|
91
|
+
To set request headers you can pass a `headers:` argument to `Scraped::Request.new`:
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
response = Scraped::Request.new(url: 'http://example.com', headers: { 'Cookie' => 'user_id' => '42' }).response
|
95
|
+
page = ExamplePage.new(response: response)
|
96
|
+
```
|
97
|
+
|
89
98
|
## Extending
|
90
99
|
|
91
100
|
There are two main ways to extend `scraped` with your own custom logic - custom requests and decorated responses. Custom requests allow you to change where the scraper is getting its responses from, e.g. you might want to make requests to archive.org if the site you're scraping has disappeared. Decorated responses allow you to manipulate the response before it's passed to the scraper. Scraped comes with some [built in decorators](#built-in-decorators) for common tasks such as making all the link urls on the page absolute rather than relative.
|
data/lib/scraped/request.rb
CHANGED
@@ -3,8 +3,9 @@ require 'scraped/response'
|
|
3
3
|
|
4
4
|
module Scraped
|
5
5
|
class Request
|
6
|
-
def initialize(url:, strategies: [Strategy::LiveRequest])
|
6
|
+
def initialize(url:, headers: {}, strategies: [Strategy::LiveRequest])
|
7
7
|
@url = url
|
8
|
+
@headers = headers
|
8
9
|
@strategies = strategies
|
9
10
|
end
|
10
11
|
|
@@ -16,7 +17,7 @@ module Scraped
|
|
16
17
|
|
17
18
|
private
|
18
19
|
|
19
|
-
attr_reader :url, :strategies
|
20
|
+
attr_reader :url, :headers, :strategies
|
20
21
|
|
21
22
|
def first_successful_response
|
22
23
|
@first_successful_response ||=
|
@@ -25,7 +26,7 @@ module Scraped
|
|
25
26
|
strategy_config = { strategy: strategy_config }
|
26
27
|
end
|
27
28
|
strategy_class = strategy_config.delete(:strategy)
|
28
|
-
strategy_class.new(url: url, config: strategy_config).response
|
29
|
+
strategy_class.new(url: url, headers: headers, config: strategy_config).response
|
29
30
|
end.reject(&:nil?).first
|
30
31
|
end
|
31
32
|
end
|
@@ -3,8 +3,9 @@ module Scraped
|
|
3
3
|
class Strategy
|
4
4
|
class NotImplementedError < StandardError; end
|
5
5
|
|
6
|
-
def initialize(url:, config: {})
|
6
|
+
def initialize(url:, headers: {}, config: {})
|
7
7
|
@url = url
|
8
|
+
@headers = headers
|
8
9
|
@config = config.to_h
|
9
10
|
end
|
10
11
|
|
@@ -14,7 +15,7 @@ module Scraped
|
|
14
15
|
|
15
16
|
private
|
16
17
|
|
17
|
-
attr_reader :url, :config
|
18
|
+
attr_reader :url, :headers, :config
|
18
19
|
end
|
19
20
|
end
|
20
21
|
end
|
data/lib/scraped/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraped
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- EveryPolitician
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|