scraped 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +10 -0
 - data/.rubocop.yml +8 -0
 - data/.travis.yml +8 -0
 - data/CHANGELOG.md +20 -0
 - data/Gemfile +4 -0
 - data/LICENSE.txt +21 -0
 - data/README.md +204 -0
 - data/Rakefile +13 -0
 - data/bin/console +10 -0
 - data/bin/setup +8 -0
 - data/lib/scraped.rb +42 -0
 - data/lib/scraped/core_ext.rb +5 -0
 - data/lib/scraped/html.rb +19 -0
 - data/lib/scraped/request.rb +32 -0
 - data/lib/scraped/request/strategy.rb +20 -0
 - data/lib/scraped/request/strategy/live_request.rb +26 -0
 - data/lib/scraped/response.rb +12 -0
 - data/lib/scraped/response/decorator.rb +34 -0
 - data/lib/scraped/response/decorator/absolute_urls.rb +25 -0
 - data/lib/scraped/response_decorator.rb +23 -0
 - data/lib/scraped/version.rb +3 -0
 - data/scraped.gemspec +31 -0
 - metadata +177 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 7f71471b758c81074f1ed52e7d06ee9e2ee7df49
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: a61a2f95fcf2a889aa077fae49f38b387e08accf
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: c7d4c5948c39db02b97723fd0dec0b916e395526f1eadb62f455d0ab8875281c5c0111ec791fc3924c5605a37dfbc2cd5f635ba91e3200c62213bf648a0170d9
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 78f1da053d76b752da56cc3da2d4f341e65ca5cf047bebe40f802fe2e1744d0fb86eaccca1177f5bdd756b6b77b934a4368874272f644c1c6bfad9450944a2d1
         
     | 
    
        data/.gitignore
    ADDED
    
    
    
        data/.rubocop.yml
    ADDED
    
    
    
        data/.travis.yml
    ADDED
    
    
    
        data/CHANGELOG.md
    ADDED
    
    | 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Change Log
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            All notable changes to this project will be documented in this file.
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            The format is based on [Keep a Changelog](http://keepachangelog.com/)
         
     | 
| 
      
 6 
     | 
    
         
            +
            and this project adheres to [Semantic Versioning](http://semver.org/).
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ## 0.1.0 - 2017-01-04
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            ### Added
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            - Support for creating HTML scrapers.
         
     | 
| 
      
 13 
     | 
    
         
            +
            - Scraper classes can handle sections of a page.
         
     | 
| 
      
 14 
     | 
    
         
            +
            - Custom request logic via request strategies. This could be used to fetch
         
     | 
| 
      
 15 
     | 
    
         
            +
              responses from an archive or a local cache.
         
     | 
| 
      
 16 
     | 
    
         
            +
            - Custom response decorators for altering the response status, headers and body
         
     | 
| 
      
 17 
     | 
    
         
            +
              before it gets to the scraper class.
         
     | 
| 
      
 18 
     | 
    
         
            +
            - Built-in response decorator for making link and image urls absolute.
         
     | 
| 
      
 19 
     | 
    
         
            +
            - `String#tidy` method which cleans up various space characters and then strips
         
     | 
| 
      
 20 
     | 
    
         
            +
              leading and trailing whitespace.
         
     | 
    
        data/Gemfile
    ADDED
    
    
    
        data/LICENSE.txt
    ADDED
    
    | 
         @@ -0,0 +1,21 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            The MIT License (MIT)
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Copyright (c) 2016 UK Citizens Online Democracy
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy
         
     | 
| 
      
 6 
     | 
    
         
            +
            of this software and associated documentation files (the "Software"), to deal
         
     | 
| 
      
 7 
     | 
    
         
            +
            in the Software without restriction, including without limitation the rights
         
     | 
| 
      
 8 
     | 
    
         
            +
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         
     | 
| 
      
 9 
     | 
    
         
            +
            copies of the Software, and to permit persons to whom the Software is
         
     | 
| 
      
 10 
     | 
    
         
            +
            furnished to do so, subject to the following conditions:
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            The above copyright notice and this permission notice shall be included in all
         
     | 
| 
      
 13 
     | 
    
         
            +
            copies or substantial portions of the Software.
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         
     | 
| 
      
 16 
     | 
    
         
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         
     | 
| 
      
 17 
     | 
    
         
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         
     | 
| 
      
 18 
     | 
    
         
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         
     | 
| 
      
 19 
     | 
    
         
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         
     | 
| 
      
 20 
     | 
    
         
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         
     | 
| 
      
 21 
     | 
    
         
            +
            SOFTWARE.
         
     | 
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1,204 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Scraped
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Write declarative scrapers in Ruby
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            ## Installation
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            Add this line to your application's Gemfile:
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 10 
     | 
    
         
            +
            gem 'scraped'
         
     | 
| 
      
 11 
     | 
    
         
            +
            ```
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            And then execute:
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                $ bundle
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            Or install it yourself as:
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                $ gem install scraped
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            ## Usage
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            To write a standard HTML scraper, start by creating a subclass of
         
     | 
| 
      
 24 
     | 
    
         
            +
            `Scraped::HTML` for each _type_ of page you wish to scrape.
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            For example if you were scraping a list of people you might have a
         
     | 
| 
      
 27 
     | 
    
         
            +
            `PeopleListPage` class for the list page and a `PersonPage` class for an
         
     | 
| 
      
 28 
     | 
    
         
            +
            individual person's page.
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 31 
     | 
    
         
            +
            require 'scraped'
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            class ExamplePage < Scraped::HTML
         
     | 
| 
      
 34 
     | 
    
         
            +
              field :title do
         
     | 
| 
      
 35 
     | 
    
         
            +
                noko.at_css('h1').text
         
     | 
| 
      
 36 
     | 
    
         
            +
              end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
              field :more_information do
         
     | 
| 
      
 39 
     | 
    
         
            +
                noko.at_css('a')[:href]
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
            end
         
     | 
| 
      
 42 
     | 
    
         
            +
            ```
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            Then you can create a new instance and pass in a `Scraped::Response` instance.
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 47 
     | 
    
         
            +
            page = ExamplePage.new(response: Scraped::Request.new(url: 'http://example.com').response)
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
            page.title
         
     | 
| 
      
 50 
     | 
    
         
            +
            # => "Example Domain"
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
            page.more_information
         
     | 
| 
      
 53 
     | 
    
         
            +
            # => "http://www.iana.org/domains/reserved"
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            page.to_h
         
     | 
| 
      
 56 
     | 
    
         
            +
            # => { :title => "Example Domain", :more_information => "http://www.iana.org/domains/reserved" }
         
     | 
| 
      
 57 
     | 
    
         
            +
            ```
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            ### Dealing with sections of a page
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
            When writing an HTML scraper you'll often need to deal with just a part of the page.
         
     | 
| 
      
 62 
     | 
    
         
            +
            For example you might want to scrape a table containing a list of people and some
         
     | 
| 
      
 63 
     | 
    
         
            +
            associated data.
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            To do this you can use the `fragment` method, passing it a hash with one entry
         
     | 
| 
      
 66 
     | 
    
         
            +
            where the key is the `noko` fragment you want to use and the value is the class
         
     | 
| 
      
 67 
     | 
    
         
            +
            that should handle that fragment.
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 70 
     | 
    
         
            +
            class MemberRow < Scraped::HTML
         
     | 
| 
      
 71 
     | 
    
         
            +
              field :name do
         
     | 
| 
      
 72 
     | 
    
         
            +
                noko.css('td')[2].text
         
     | 
| 
      
 73 
     | 
    
         
            +
              end
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
              field :party do
         
     | 
| 
      
 76 
     | 
    
         
            +
                noko.css('td')[3].text
         
     | 
| 
      
 77 
     | 
    
         
            +
              end
         
     | 
| 
      
 78 
     | 
    
         
            +
            end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
            class AllMembersPage < Scraped::HTML
         
     | 
| 
      
 81 
     | 
    
         
            +
              field :members do
         
     | 
| 
      
 82 
     | 
    
         
            +
                noko.css('table.members-list tr').map do |row|
         
     | 
| 
      
 83 
     | 
    
         
            +
                  fragment row => MemberRow
         
     | 
| 
      
 84 
     | 
    
         
            +
                end
         
     | 
| 
      
 85 
     | 
    
         
            +
              end
         
     | 
| 
      
 86 
     | 
    
         
            +
            end
         
     | 
| 
      
 87 
     | 
    
         
            +
            ```
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
            ## Extending
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
            There are two main ways to extend `scraped` with your own custom logic - custom requests and decorated responses. Custom requests allow you to change where the scraper is getting its responses from, e.g. you might want to make requests to archive.org if the site you're scraping has disappeared. Decorated responses allow you to manipulate the response before it's passed to the scraper. Scraped comes with some [built in decorators](#built-in-decorators) for common tasks such as making all the link urls on the page absolute rather than relative.
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
            ### Custom request strategies
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
            To make a custom request you'll need to create a class that subclasses `Scraped::Request::Strategy` and defines a `response` method.
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 98 
     | 
    
         
            +
            class FileOnDiskRequest < Scraped::Request::Strategy
         
     | 
| 
      
 99 
     | 
    
         
            +
              def response
         
     | 
| 
      
 100 
     | 
    
         
            +
                { body: open(filename).read }
         
     | 
| 
      
 101 
     | 
    
         
            +
              end
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
              private
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
              def filename
         
     | 
| 
      
 106 
     | 
    
         
            +
                @filename ||= File.join(URI.parse(url).host, Digest::SHA1.hexdigest(url))
         
     | 
| 
      
 107 
     | 
    
         
            +
              end
         
     | 
| 
      
 108 
     | 
    
         
            +
            end
         
     | 
| 
      
 109 
     | 
    
         
            +
            ```
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
            The `response` method should return a `Hash` which has at least a `body` key. You can also include `status` and `headers` parameters in the hash to fill out those fields in the response. If not given, status will default to `200` (OK) and headers will default to `{}`.
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
            To use a custom request strategy pass it to `Scraped::Request`:
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 116 
     | 
    
         
            +
            request = Scraped::Request.new(url: 'http://example.com', strategies: [FileOnDiskRequest, Scraped::Request::Strategy::LiveRequest])
         
     | 
| 
      
 117 
     | 
    
         
            +
            page = MyPersonPage.new(response: request.response)
         
     | 
| 
      
 118 
     | 
    
         
            +
            ```
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
            ### Decorated responses
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
            To manipulate the response before it is processed by the scraper create a class that subclasses `Scraped::Response::Decorator` and defines any of the following methods: `body`, `url`, `status`, `headers`.
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 125 
     | 
    
         
            +
            class AbsoluteLinks < Scraped::Response::Decorator
         
     | 
| 
      
 126 
     | 
    
         
            +
              def body
         
     | 
| 
      
 127 
     | 
    
         
            +
                doc = Nokogiri::HTML(super)
         
     | 
| 
      
 128 
     | 
    
         
            +
                doc.css('a').each do |link|
         
     | 
| 
      
 129 
     | 
    
         
            +
                  link[:href] = URI.join(url, link[:href]).to_s
         
     | 
| 
      
 130 
     | 
    
         
            +
                end
         
     | 
| 
      
 131 
     | 
    
         
            +
                doc.to_s
         
     | 
| 
      
 132 
     | 
    
         
            +
              end
         
     | 
| 
      
 133 
     | 
    
         
            +
            end
         
     | 
| 
      
 134 
     | 
    
         
            +
            ```
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
            As well as the `body` method you can also supply your own `url`, `status` and `headers` methods. You can access the current request body by calling `super` from your method. You can also call `url`, `headers` or `status` to access those properties of the current response.
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
            To use a response decorator you need to use the `decorator` class method in a `Scraped::HTML` subclass:
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 141 
     | 
    
         
            +
            class PageWithRelativeLinks < Scraped::HTML
         
     | 
| 
      
 142 
     | 
    
         
            +
              decorator AbsoluteLinks
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
              # Other fields...
         
     | 
| 
      
 145 
     | 
    
         
            +
            end
         
     | 
| 
      
 146 
     | 
    
         
            +
            ```
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
            ### Configuring requests and responses
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
            When passing an array of request strategies or response decorators you should always pass the class, rather than the instance. If you want to configure an instance you can pass in a two element array where the first element is the class and the second element is the config:
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 153 
     | 
    
         
            +
            class CustomHeader < Scraped::Response::Decorator
         
     | 
| 
      
 154 
     | 
    
         
            +
              def headers
         
     | 
| 
      
 155 
     | 
    
         
            +
                response.headers.merge('X-Greeting' => config[:greeting])
         
     | 
| 
      
 156 
     | 
    
         
            +
              end
         
     | 
| 
      
 157 
     | 
    
         
            +
            end
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
            class ExamplePage < Scraped::HTML
         
     | 
| 
      
 160 
     | 
    
         
            +
              decorator CustomHeader, greeting: 'Hello, world'
         
     | 
| 
      
 161 
     | 
    
         
            +
            end
         
     | 
| 
      
 162 
     | 
    
         
            +
            ```
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
            With the above code a custom header would be added to the response: `X-Greeting: Hello, world`.
         
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
      
 166 
     | 
    
         
            +
            #### Inheritance with decorators
         
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
            When you inherit from a class that already has decorators the child class will also inherit the parent's decorators. There's currently no way to re-order or remove decorators in child classes, though that _may_ be added in the future.
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
            ### Built in decorators
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
      
 172 
     | 
    
         
            +
            #### Absolute link and image urls
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
      
 174 
     | 
    
         
            +
            Very frequently you will find that you need to make links and images on the page
         
     | 
| 
      
 175 
     | 
    
         
            +
            you are scraping absolute rather than relative. Scraped comes with support for
         
     | 
| 
      
 176 
     | 
    
         
            +
            this out of the box via the `Scraped::Response::Decorator::AbsoluteUrls`
         
     | 
| 
      
 177 
     | 
    
         
            +
            decorator.
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 180 
     | 
    
         
            +
            require 'scraped'
         
     | 
| 
      
 181 
     | 
    
         
            +
             
     | 
| 
      
 182 
     | 
    
         
            +
            class MemberPage < Scraped::HTML
         
     | 
| 
      
 183 
     | 
    
         
            +
              decorator Scraped::Response::Decorator::AbsoluteUrls
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
              field :image do
         
     | 
| 
      
 186 
     | 
    
         
            +
                # Image url will be absolute thanks to the decorator.
         
     | 
| 
      
 187 
     | 
    
         
            +
                noko.at_css('.profile-picture/@src').text
         
     | 
| 
      
 188 
     | 
    
         
            +
              end
         
     | 
| 
      
 189 
     | 
    
         
            +
            end
         
     | 
| 
      
 190 
     | 
    
         
            +
            ```
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
            ## Development
         
     | 
| 
      
 193 
     | 
    
         
            +
             
     | 
| 
      
 194 
     | 
    
         
            +
            After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
         
     | 
| 
      
 195 
     | 
    
         
            +
             
     | 
| 
      
 196 
     | 
    
         
            +
            To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
         
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
            ## Contributing
         
     | 
| 
      
 199 
     | 
    
         
            +
             
     | 
| 
      
 200 
     | 
    
         
            +
            Bug reports and pull requests are welcome on GitHub at https://github.com/everypolitician/scraped.
         
     | 
| 
      
 201 
     | 
    
         
            +
             
     | 
| 
      
 202 
     | 
    
         
            +
            ## License
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
            The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
         
     | 
    
        data/Rakefile
    ADDED
    
    | 
         @@ -0,0 +1,13 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'bundler/gem_tasks'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'rake/testtask'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            Rake::TestTask.new(:test) do |t|
         
     | 
| 
      
 5 
     | 
    
         
            +
              t.libs << 'test'
         
     | 
| 
      
 6 
     | 
    
         
            +
              t.libs << 'lib'
         
     | 
| 
      
 7 
     | 
    
         
            +
              t.test_files = FileList['test/**/*_test.rb']
         
     | 
| 
      
 8 
     | 
    
         
            +
            end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            require 'rubocop/rake_task'
         
     | 
| 
      
 11 
     | 
    
         
            +
            RuboCop::RakeTask.new
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            task default: %i(test rubocop)
         
     | 
    
        data/bin/console
    ADDED
    
    
    
        data/bin/setup
    ADDED
    
    
    
        data/lib/scraped.rb
    ADDED
    
    | 
         @@ -0,0 +1,42 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'nokogiri'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'field_serializer'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'require_all'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require_rel 'scraped'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            # Abstract class which scrapers can extend to implement their functionality.
         
     | 
| 
      
 8 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 9 
     | 
    
         
            +
              include FieldSerializer
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
              def self.decorator(klass, config = {})
         
     | 
| 
      
 12 
     | 
    
         
            +
                decorators << config.merge(decorator: klass)
         
     | 
| 
      
 13 
     | 
    
         
            +
              end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              def self.decorators
         
     | 
| 
      
 16 
     | 
    
         
            +
                @decorators ||= []
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
              def self.inherited(klass)
         
     | 
| 
      
 20 
     | 
    
         
            +
                klass.decorators.concat(decorators)
         
     | 
| 
      
 21 
     | 
    
         
            +
                super
         
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
              def initialize(response:)
         
     | 
| 
      
 25 
     | 
    
         
            +
                @original_response = response
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              private
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
              attr_reader :original_response
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
              def response
         
     | 
| 
      
 33 
     | 
    
         
            +
                @response ||= ResponseDecorator.new(
         
     | 
| 
      
 34 
     | 
    
         
            +
                  response:   original_response,
         
     | 
| 
      
 35 
     | 
    
         
            +
                  decorators: self.class.decorators
         
     | 
| 
      
 36 
     | 
    
         
            +
                ).response
         
     | 
| 
      
 37 
     | 
    
         
            +
              end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
              def url
         
     | 
| 
      
 40 
     | 
    
         
            +
                response.url
         
     | 
| 
      
 41 
     | 
    
         
            +
              end
         
     | 
| 
      
 42 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/scraped/html.rb
    ADDED
    
    | 
         @@ -0,0 +1,19 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 2 
     | 
    
         
            +
              class HTML < Scraped
         
     | 
| 
      
 3 
     | 
    
         
            +
                private
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(noko: nil, **args)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  super(**args)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @noko = noko
         
     | 
| 
      
 8 
     | 
    
         
            +
                end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                def noko
         
     | 
| 
      
 11 
     | 
    
         
            +
                  @noko ||= Nokogiri::HTML(response.body)
         
     | 
| 
      
 12 
     | 
    
         
            +
                end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                def fragment(mapping)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  noko_fragment, klass = mapping.to_a.first
         
     | 
| 
      
 16 
     | 
    
         
            +
                  klass.new(noko: noko_fragment, response: response)
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,32 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'scraped/request/strategy/live_request'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'scraped/response'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 5 
     | 
    
         
            +
              class Request
         
     | 
| 
      
 6 
     | 
    
         
            +
                def initialize(url:, strategies: [Strategy::LiveRequest])
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @url = url
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @strategies = strategies
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                def response(decorators: [])
         
     | 
| 
      
 12 
     | 
    
         
            +
                  abort "Failed to fetch #{url}" if first_successful_response.nil?
         
     | 
| 
      
 13 
     | 
    
         
            +
                  response = Response.new(first_successful_response.merge(url: url))
         
     | 
| 
      
 14 
     | 
    
         
            +
                  ResponseDecorator.new(response: response, decorators: decorators).response
         
     | 
| 
      
 15 
     | 
    
         
            +
                end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                private
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                attr_reader :url, :strategies
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                def first_successful_response
         
     | 
| 
      
 22 
     | 
    
         
            +
                  @first_successful_response ||=
         
     | 
| 
      
 23 
     | 
    
         
            +
                    strategies.lazy.map do |strategy_config|
         
     | 
| 
      
 24 
     | 
    
         
            +
                      unless strategy_config.respond_to?(:delete)
         
     | 
| 
      
 25 
     | 
    
         
            +
                        strategy_config = { strategy: strategy_config }
         
     | 
| 
      
 26 
     | 
    
         
            +
                      end
         
     | 
| 
      
 27 
     | 
    
         
            +
                      strategy_class = strategy_config.delete(:strategy)
         
     | 
| 
      
 28 
     | 
    
         
            +
                      strategy_class.new(url: url, config: strategy_config).response
         
     | 
| 
      
 29 
     | 
    
         
            +
                    end.reject(&:nil?).first
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
              end
         
     | 
| 
      
 32 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 2 
     | 
    
         
            +
              class Request
         
     | 
| 
      
 3 
     | 
    
         
            +
                class Strategy
         
     | 
| 
      
 4 
     | 
    
         
            +
                  class NotImplementedError < StandardError; end
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                  def initialize(url:, config: {})
         
     | 
| 
      
 7 
     | 
    
         
            +
                    @url = url
         
     | 
| 
      
 8 
     | 
    
         
            +
                    @config = config.to_h
         
     | 
| 
      
 9 
     | 
    
         
            +
                  end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                  def response
         
     | 
| 
      
 12 
     | 
    
         
            +
                    raise NotImplementedError, "No #{self.class}#response method found"
         
     | 
| 
      
 13 
     | 
    
         
            +
                  end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                  private
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  attr_reader :url, :config
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,26 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'scraped/request/strategy'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'open-uri'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 5 
     | 
    
         
            +
              class Request
         
     | 
| 
      
 6 
     | 
    
         
            +
                class Strategy
         
     | 
| 
      
 7 
     | 
    
         
            +
                  class LiveRequest < Strategy
         
     | 
| 
      
 8 
     | 
    
         
            +
                    def response
         
     | 
| 
      
 9 
     | 
    
         
            +
                      log "Fetching #{url}"
         
     | 
| 
      
 10 
     | 
    
         
            +
                      response = open(url)
         
     | 
| 
      
 11 
     | 
    
         
            +
                      {
         
     | 
| 
      
 12 
     | 
    
         
            +
                        status:  response.status.first.to_i,
         
     | 
| 
      
 13 
     | 
    
         
            +
                        headers: response.meta,
         
     | 
| 
      
 14 
     | 
    
         
            +
                        body:    response.read,
         
     | 
| 
      
 15 
     | 
    
         
            +
                      }
         
     | 
| 
      
 16 
     | 
    
         
            +
                    end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                    private
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                    def log(message)
         
     | 
| 
      
 21 
     | 
    
         
            +
                      warn "[#{self.class}] #{message}" if ENV.key?('VERBOSE')
         
     | 
| 
      
 22 
     | 
    
         
            +
                    end
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 2 
     | 
    
         
            +
              class Response
         
     | 
| 
      
 3 
     | 
    
         
            +
                class Decorator
         
     | 
| 
      
 4 
     | 
    
         
            +
                  def initialize(response:, config: {})
         
     | 
| 
      
 5 
     | 
    
         
            +
                    @response = response
         
     | 
| 
      
 6 
     | 
    
         
            +
                    @config = config.to_h
         
     | 
| 
      
 7 
     | 
    
         
            +
                  end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                  def decorated_response
         
     | 
| 
      
 10 
     | 
    
         
            +
                    Response.new(url: url, body: body, headers: headers, status: status)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  def url
         
     | 
| 
      
 14 
     | 
    
         
            +
                    response.url
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  def body
         
     | 
| 
      
 18 
     | 
    
         
            +
                    response.body
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  def headers
         
     | 
| 
      
 22 
     | 
    
         
            +
                    response.headers
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  def status
         
     | 
| 
      
 26 
     | 
    
         
            +
                    response.status
         
     | 
| 
      
 27 
     | 
    
         
            +
                  end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                  private
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                  attr_reader :response, :config
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,25 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'nokogiri'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'uri'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 5 
     | 
    
         
            +
              class Response
         
     | 
| 
      
 6 
     | 
    
         
            +
                class Decorator
         
     | 
| 
      
 7 
     | 
    
         
            +
                  class AbsoluteUrls < Decorator
         
     | 
| 
      
 8 
     | 
    
         
            +
                    def body
         
     | 
| 
      
 9 
     | 
    
         
            +
                      Nokogiri::HTML(super).tap do |doc|
         
     | 
| 
      
 10 
     | 
    
         
            +
                        doc.css('img').each { |img| img[:src] = absolute_url(img[:src]) }
         
     | 
| 
      
 11 
     | 
    
         
            +
                        doc.css('a').each { |a| a[:href] = absolute_url(a[:href]) }
         
     | 
| 
      
 12 
     | 
    
         
            +
                      end.to_s
         
     | 
| 
      
 13 
     | 
    
         
            +
                    end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                    private
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                    def absolute_url(relative_url)
         
     | 
| 
      
 18 
     | 
    
         
            +
                      URI.join(url, relative_url) unless relative_url.to_s.empty?
         
     | 
| 
      
 19 
     | 
    
         
            +
                    rescue URI::InvalidURIError
         
     | 
| 
      
 20 
     | 
    
         
            +
                      relative_url
         
     | 
| 
      
 21 
     | 
    
         
            +
                    end
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
      
 23 
     | 
    
         
            +
                end
         
     | 
| 
      
 24 
     | 
    
         
            +
              end
         
     | 
| 
      
 25 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Scraped
         
     | 
| 
      
 2 
     | 
    
         
            +
              class ResponseDecorator
         
     | 
| 
      
 3 
     | 
    
         
            +
                def initialize(response:, decorators:)
         
     | 
| 
      
 4 
     | 
    
         
            +
                  @original_response = response
         
     | 
| 
      
 5 
     | 
    
         
            +
                  @decorators = decorators.to_a
         
     | 
| 
      
 6 
     | 
    
         
            +
                end
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                def response
         
     | 
| 
      
 9 
     | 
    
         
            +
                  decorators.reduce(original_response) do |r, decorator_config|
         
     | 
| 
      
 10 
     | 
    
         
            +
                    unless decorator_config.respond_to?(:[])
         
     | 
| 
      
 11 
     | 
    
         
            +
                      decorator_config = { decorator: decorator_config }
         
     | 
| 
      
 12 
     | 
    
         
            +
                    end
         
     | 
| 
      
 13 
     | 
    
         
            +
                    decorator_class = decorator_config[:decorator]
         
     | 
| 
      
 14 
     | 
    
         
            +
                    decorator_class.new(response: r, config: decorator_config)
         
     | 
| 
      
 15 
     | 
    
         
            +
                                   .decorated_response
         
     | 
| 
      
 16 
     | 
    
         
            +
                  end
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                private
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                attr_reader :original_response, :decorators
         
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
            end
         
     | 
    
        data/scraped.gemspec
    ADDED
    
    | 
         @@ -0,0 +1,31 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # coding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            lib = File.expand_path('../lib', __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'scraped/version'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |spec|
         
     | 
| 
      
 7 
     | 
    
         
            +
              spec.name          = 'scraped'
         
     | 
| 
      
 8 
     | 
    
         
            +
              spec.version       = Scraped::VERSION
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.authors       = ['EveryPolitician']
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.email         = ['team@everypolitician.org']
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              spec.summary       = 'Write declarative scrapers in Ruby'
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.homepage      = 'https://github.com/everypolitician/scraped'
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              spec.files         = `git ls-files -z`.split("\x0").reject do |f|
         
     | 
| 
      
 16 
     | 
    
         
            +
                f.match(%r{^(test|spec|features)/})
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.bindir        = 'exe'
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         
     | 
| 
      
 20 
     | 
    
         
            +
              spec.require_paths = ['lib']
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              spec.add_runtime_dependency 'nokogiri'
         
     | 
| 
      
 23 
     | 
    
         
            +
              spec.add_runtime_dependency 'field_serializer', '>= 0.3.0'
         
     | 
| 
      
 24 
     | 
    
         
            +
              spec.add_runtime_dependency 'require_all'
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
              spec.add_development_dependency 'bundler', '~> 1.13'
         
     | 
| 
      
 27 
     | 
    
         
            +
              spec.add_development_dependency 'rake', '~> 10.0'
         
     | 
| 
      
 28 
     | 
    
         
            +
              spec.add_development_dependency 'minitest', '~> 5.0'
         
     | 
| 
      
 29 
     | 
    
         
            +
              spec.add_development_dependency 'pry', '~> 0.10'
         
     | 
| 
      
 30 
     | 
    
         
            +
              spec.add_development_dependency 'rubocop', '~> 0.44'
         
     | 
| 
      
 31 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,177 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: scraped
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.0
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - EveryPolitician
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: exe
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2017-01-04 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 13 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 14 
     | 
    
         
            +
              name: nokogiri
         
     | 
| 
      
 15 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 16 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 17 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 18 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 19 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 20 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 21 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 22 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 23 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 24 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 25 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 26 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 27 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 28 
     | 
    
         
            +
              name: field_serializer
         
     | 
| 
      
 29 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 31 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 32 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 33 
     | 
    
         
            +
                    version: 0.3.0
         
     | 
| 
      
 34 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 35 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 37 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 38 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 39 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 40 
     | 
    
         
            +
                    version: 0.3.0
         
     | 
| 
      
 41 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 42 
     | 
    
         
            +
              name: require_all
         
     | 
| 
      
 43 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 44 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 45 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 46 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 47 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 48 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 49 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 50 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 51 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 52 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 53 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 54 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 55 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 56 
     | 
    
         
            +
              name: bundler
         
     | 
| 
      
 57 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 58 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 59 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 60 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 61 
     | 
    
         
            +
                    version: '1.13'
         
     | 
| 
      
 62 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 63 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 64 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 65 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 66 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 67 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 68 
     | 
    
         
            +
                    version: '1.13'
         
     | 
| 
      
 69 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 70 
     | 
    
         
            +
              name: rake
         
     | 
| 
      
 71 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 72 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 73 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 74 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 75 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 76 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 77 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 78 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 79 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 80 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 81 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 82 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 83 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 84 
     | 
    
         
            +
              name: minitest
         
     | 
| 
      
 85 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 86 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 87 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 88 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 89 
     | 
    
         
            +
                    version: '5.0'
         
     | 
| 
      
 90 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 91 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 92 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 93 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 94 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 95 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 96 
     | 
    
         
            +
                    version: '5.0'
         
     | 
| 
      
 97 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 98 
     | 
    
         
            +
              name: pry
         
     | 
| 
      
 99 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 100 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 101 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 102 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 103 
     | 
    
         
            +
                    version: '0.10'
         
     | 
| 
      
 104 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 105 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 106 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 107 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 108 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 109 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 110 
     | 
    
         
            +
                    version: '0.10'
         
     | 
| 
      
 111 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 112 
     | 
    
         
            +
              name: rubocop
         
     | 
| 
      
 113 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 114 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 115 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 116 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 117 
     | 
    
         
            +
                    version: '0.44'
         
     | 
| 
      
 118 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 119 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 120 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 121 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 122 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 123 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 124 
     | 
    
         
            +
                    version: '0.44'
         
     | 
| 
      
 125 
     | 
    
         
            +
            description: 
         
     | 
| 
      
 126 
     | 
    
         
            +
            email:
         
     | 
| 
      
 127 
     | 
    
         
            +
            - team@everypolitician.org
         
     | 
| 
      
 128 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 129 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 130 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 131 
     | 
    
         
            +
            files:
         
     | 
| 
      
 132 
     | 
    
         
            +
            - ".gitignore"
         
     | 
| 
      
 133 
     | 
    
         
            +
            - ".rubocop.yml"
         
     | 
| 
      
 134 
     | 
    
         
            +
            - ".travis.yml"
         
     | 
| 
      
 135 
     | 
    
         
            +
            - CHANGELOG.md
         
     | 
| 
      
 136 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 137 
     | 
    
         
            +
            - LICENSE.txt
         
     | 
| 
      
 138 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 139 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 140 
     | 
    
         
            +
            - bin/console
         
     | 
| 
      
 141 
     | 
    
         
            +
            - bin/setup
         
     | 
| 
      
 142 
     | 
    
         
            +
            - lib/scraped.rb
         
     | 
| 
      
 143 
     | 
    
         
            +
            - lib/scraped/core_ext.rb
         
     | 
| 
      
 144 
     | 
    
         
            +
            - lib/scraped/html.rb
         
     | 
| 
      
 145 
     | 
    
         
            +
            - lib/scraped/request.rb
         
     | 
| 
      
 146 
     | 
    
         
            +
            - lib/scraped/request/strategy.rb
         
     | 
| 
      
 147 
     | 
    
         
            +
            - lib/scraped/request/strategy/live_request.rb
         
     | 
| 
      
 148 
     | 
    
         
            +
            - lib/scraped/response.rb
         
     | 
| 
      
 149 
     | 
    
         
            +
            - lib/scraped/response/decorator.rb
         
     | 
| 
      
 150 
     | 
    
         
            +
            - lib/scraped/response/decorator/absolute_urls.rb
         
     | 
| 
      
 151 
     | 
    
         
            +
            - lib/scraped/response_decorator.rb
         
     | 
| 
      
 152 
     | 
    
         
            +
            - lib/scraped/version.rb
         
     | 
| 
      
 153 
     | 
    
         
            +
            - scraped.gemspec
         
     | 
| 
      
 154 
     | 
    
         
            +
            homepage: https://github.com/everypolitician/scraped
         
     | 
| 
      
 155 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 156 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 157 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 158 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 159 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 160 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 161 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 162 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 163 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 164 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 165 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 166 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 167 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 168 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 169 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 170 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 171 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 172 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 173 
     | 
    
         
            +
            rubygems_version: 2.5.2
         
     | 
| 
      
 174 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 175 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 176 
     | 
    
         
            +
            summary: Write declarative scrapers in Ruby
         
     | 
| 
      
 177 
     | 
    
         
            +
            test_files: []
         
     |