scraped 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e86824184d28b3141f65c48a5fc51e7502c7781b
4
- data.tar.gz: d33f15a4b1e7094b2677dbf09c9573bbeb49fcff
3
+ metadata.gz: ffbf1466cb9e85f66f2c41dbbf9ef26f25ee568d
4
+ data.tar.gz: 7106cfa4f03a70452a3f53ac0b3f30de8d912162
5
5
  SHA512:
6
- metadata.gz: 64f7fec521757f72ac2b927cddd5bf958d4d0fca01838ceb0e089bb40435e0a2d79ad6f4d737a0c83b4bba3cd26bed1dc252a2a0225169c1b382d4951157704a
7
- data.tar.gz: 23368dd65b5773b317159343820bc9bd44525e42a38b32404616a17a3d83cb6a1fc908c2852c98607c0baf1d1f71ef5a6fc15ceaab7ce30753fbe6d74def3431
6
+ metadata.gz: 3193bab3627b954eb7db141fcf219eb6b92151d88d6829179a39410eff47c6b0c8aa4639c01ef799167c3d8b804f21e3711c5228bbfe26173df87dd80caa54e2
7
+ data.tar.gz: 12d9d2e59a564bfa823f4575c2c13471a0042e8d0fb02606ba8f142578bcdaff35481fa51ea8acf2d67e64bfc95a1cce1a89138b4c9b7b304ac370189d72fd27
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/).
7
7
 
8
+ ## 0.5.0 - 2017-03-17
9
+
10
+ ### Added
11
+
12
+ - Add `headers:` parameter to `Scraped::Request.new` for passing request
13
+ headers to the underlying strategy.
14
+
8
15
  ## 0.4.0 - 2017-03-14
9
16
 
10
17
  ### Changed
data/README.md CHANGED
@@ -86,6 +86,15 @@ class AllMembersPage < Scraped::HTML
86
86
  end
87
87
  ```
88
88
 
89
+ ### Passing request headers
90
+
91
+ To set request headers you can pass a `headers:` argument to `Scraped::Request.new`:
92
+
93
+ ```ruby
94
+ response = Scraped::Request.new(url: 'http://example.com', headers: { 'Cookie' => 'user_id' => '42' }).response
95
+ page = ExamplePage.new(response: response)
96
+ ```
97
+
89
98
  ## Extending
90
99
 
91
100
  There are two main ways to extend `scraped` with your own custom logic - custom requests and decorated responses. Custom requests allow you to change where the scraper is getting its responses from, e.g. you might want to make requests to archive.org if the site you're scraping has disappeared. Decorated responses allow you to manipulate the response before it's passed to the scraper. Scraped comes with some [built in decorators](#built-in-decorators) for common tasks such as making all the link urls on the page absolute rather than relative.
@@ -3,8 +3,9 @@ require 'scraped/response'
3
3
 
4
4
  module Scraped
5
5
  class Request
6
- def initialize(url:, strategies: [Strategy::LiveRequest])
6
+ def initialize(url:, headers: {}, strategies: [Strategy::LiveRequest])
7
7
  @url = url
8
+ @headers = headers
8
9
  @strategies = strategies
9
10
  end
10
11
 
@@ -16,7 +17,7 @@ module Scraped
16
17
 
17
18
  private
18
19
 
19
- attr_reader :url, :strategies
20
+ attr_reader :url, :headers, :strategies
20
21
 
21
22
  def first_successful_response
22
23
  @first_successful_response ||=
@@ -25,7 +26,7 @@ module Scraped
25
26
  strategy_config = { strategy: strategy_config }
26
27
  end
27
28
  strategy_class = strategy_config.delete(:strategy)
28
- strategy_class.new(url: url, config: strategy_config).response
29
+ strategy_class.new(url: url, headers: headers, config: strategy_config).response
29
30
  end.reject(&:nil?).first
30
31
  end
31
32
  end
@@ -3,8 +3,9 @@ module Scraped
3
3
  class Strategy
4
4
  class NotImplementedError < StandardError; end
5
5
 
6
- def initialize(url:, config: {})
6
+ def initialize(url:, headers: {}, config: {})
7
7
  @url = url
8
+ @headers = headers
8
9
  @config = config.to_h
9
10
  end
10
11
 
@@ -14,7 +15,7 @@ module Scraped
14
15
 
15
16
  private
16
17
 
17
- attr_reader :url, :config
18
+ attr_reader :url, :headers, :config
18
19
  end
19
20
  end
20
21
  end
@@ -7,7 +7,7 @@ module Scraped
7
7
  class LiveRequest < Strategy
8
8
  def response
9
9
  log "Fetching #{url}"
10
- response = open(url)
10
+ response = open(url, headers)
11
11
  {
12
12
  status: response.status.first.to_i,
13
13
  headers: response.meta,
@@ -1,3 +1,3 @@
1
1
  module Scraped
2
- VERSION = '0.4.0'.freeze
2
+ VERSION = '0.5.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraped
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - EveryPolitician
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-03-15 00:00:00.000000000 Z
11
+ date: 2017-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri