sagrone_scraper 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +20 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +9 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +3 -0
- data/Guardfile +15 -0
- data/LICENSE +13 -0
- data/README.md +64 -0
- data/Rakefile +6 -0
- data/lib/sagrone_scraper/agent.rb +46 -0
- data/lib/sagrone_scraper/version.rb +3 -0
- data/lib/sagrone_scraper.rb +7 -0
- data/sagrone_scraper.gemspec +29 -0
- data/spec/sagrone_scraper/agent_spec.rb +105 -0
- data/spec/sagrone_scraper_spec.rb +8 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/stub_helper.rb +22 -0
- data/spec/test_responses/www.example.com +51 -0
- metadata +152 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: d067420377ca0e271b6ba7f8c00f5f6ae2198b85
         | 
| 4 | 
            +
              data.tar.gz: cc93b626d827b17e7f16fa91b7fd00b13936c318
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: 804b9c719e81d87b762f1cea45c3e1919d459d7520270ac1176907e3cb14efef3f992f24f2ed71db3baa85cae7e0fb3b4c5f18394785da396df772a6eeb59755
         | 
| 7 | 
            +
              data.tar.gz: a9b2524b7029896731942e13483e736b45115739d8cc22a7176e9afc477f6b3d460fbe8b9b88c577ae706a976ad10c30d49deba45cdfb9d65206085dd4459f3d
         | 
    
        data/.editorconfig
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            # EditorConfig helps developers define and maintain consistent
         | 
| 2 | 
            +
            # coding styles between different editors and IDEs
         | 
| 3 | 
            +
            # editorconfig.org
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            root = true
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            [*]
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Change these settings to your own preference
         | 
| 10 | 
            +
            indent_style = space
         | 
| 11 | 
            +
            indent_size = 2
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # We recommend you to keep these unchanged
         | 
| 14 | 
            +
            end_of_line = lf
         | 
| 15 | 
            +
            charset = utf-8
         | 
| 16 | 
            +
            trim_trailing_whitespace = true
         | 
| 17 | 
            +
            insert_final_newline = true
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            [*.md]
         | 
| 20 | 
            +
            trim_trailing_whitespace = false
         | 
    
        data/.gitignore
    ADDED
    
    
    
        data/.rspec
    ADDED
    
    
    
        data/.travis.yml
    ADDED
    
    
    
        data/CHANGELOG.md
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/Guardfile
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            clearing :on
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            guard :rspec, cmd: "bundle exec rspec" do
         | 
| 4 | 
            +
              require "guard/rspec/dsl"
         | 
| 5 | 
            +
              dsl = Guard::RSpec::Dsl.new(self)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              # RSpec files
         | 
| 8 | 
            +
              rspec = dsl.rspec
         | 
| 9 | 
            +
              watch(rspec.spec_files)
         | 
| 10 | 
            +
              watch(%r{^spec/(.+)_helper\.rb$}) { "spec" }
         | 
| 11 | 
            +
              watch(%r{^spec/test_responses/(.+)$}) { "spec" }
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              # Library files
         | 
| 14 | 
            +
              watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
         | 
| 15 | 
            +
            end
         | 
    
        data/LICENSE
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
| 1 | 
            +
            Copyright 2015 Marius Colacioiu
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            You may obtain a copy of the License at
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            limitations under the License.
         | 
    
        data/README.md
    ADDED
    
    | @@ -0,0 +1,64 @@ | |
| 1 | 
            +
            # SagroneScraper
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Simple library to scrap web pages. Bellow you will find information on [how to use it](#usage).
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ## Installation
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            Add this line to your application's Gemfile:
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                $ gem 'sagrone_scraper'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            And then execute:
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                $ bundle
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            Or install it yourself as:
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                $ gem install sagrone_scraper
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            ## Usage
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            #### `SagroneScraper::Agent`
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            The agent is responsible for scraping a web page from a URL.
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            Here is how you can create an `agent`:
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            1. one way is to pass it a `url` option
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                ```ruby
         | 
| 30 | 
            +
                require 'sagrone_scraper/agent'
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                agent = SagroneScraper::Agent.new(url: 'https://twitter.com/Milano_JS')
         | 
| 33 | 
            +
                agent.page
         | 
| 34 | 
            +
                # => Mechanize::Page
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                agent.page.at('.ProfileHeaderCard-bio').text
         | 
| 37 | 
            +
                # => "Javascript User Group Milano #milanojs"
         | 
| 38 | 
            +
                ```
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            2. another way is to pass a `page` option (`Mechanize::Page`)
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                ```ruby
         | 
| 43 | 
            +
                require 'sagrone_scraper/agent'
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                mechanize_agent = Mechanize.new { |agent| agent.user_agent_alias = 'Linux Firefox' }
         | 
| 46 | 
            +
                page = mechanize_agent.get('https://twitter.com/Milano_JS')
         | 
| 47 | 
            +
                # => Mechanize::Page
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                agent = SagroneScraper::Agent.new(page: page)
         | 
| 50 | 
            +
                agent.url
         | 
| 51 | 
            +
                # => "https://twitter.com/Milano_JS"
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                agent.page.at('.ProfileHeaderCard-locationText').text
         | 
| 54 | 
            +
                # => "Milan, Italy"
         | 
| 55 | 
            +
                ```
         | 
| 56 | 
            +
             | 
| 57 | 
            +
             | 
| 58 | 
            +
            ## Contributing
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            1. Fork it ( https://github.com/[my-github-username]/sagrone_scraper/fork )
         | 
| 61 | 
            +
            2. Create your feature branch (`git checkout -b my-new-feature`)
         | 
| 62 | 
            +
            3. Commit your changes (`git commit -am 'Add some feature'`)
         | 
| 63 | 
            +
            4. Push to the branch (`git push origin my-new-feature`)
         | 
| 64 | 
            +
            5. Create a new Pull Request
         | 
    
        data/Rakefile
    ADDED
    
    
| @@ -0,0 +1,46 @@ | |
| 1 | 
            +
            require 'mechanize'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module SagroneScraper
         | 
| 4 | 
            +
              class Agent
         | 
| 5 | 
            +
                Error = Class.new(RuntimeError)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                AGENT_ALIASES = ["Linux Firefox", "Linux Mozilla", "Mac Firefox", "Mac Mozilla", "Mac Safari", "Windows Chrome", "Windows IE 8", "Windows IE 9", "Windows Mozilla"]
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                attr_reader :url, :page
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                def initialize(options = {})
         | 
| 12 | 
            +
                  raise Error.new('Exactly one option must be provided: "url" or "page"') unless exactly_one_of(options)
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  @url, @page = options[:url], options[:page]
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  @url ||= page_url
         | 
| 17 | 
            +
                  @page ||= http_client.get(url)
         | 
| 18 | 
            +
                rescue StandardError => error
         | 
| 19 | 
            +
                  raise Error.new(error.message)
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                def http_client
         | 
| 23 | 
            +
                  @http_client ||= self.class.http_client
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                def self.http_client
         | 
| 27 | 
            +
                  Mechanize.new do |agent|
         | 
| 28 | 
            +
                    agent.user_agent_alias = AGENT_ALIASES.sample
         | 
| 29 | 
            +
                    agent.max_history = 0
         | 
| 30 | 
            +
                  end
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                private
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def page_url
         | 
| 36 | 
            +
                  @page.uri.to_s
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def exactly_one_of(options)
         | 
| 40 | 
            +
                  url_present = !!options[:url]
         | 
| 41 | 
            +
                  page_present = !!options[:page]
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                  (url_present && !page_present) || (!url_present && page_present)
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
            end
         | 
| @@ -0,0 +1,29 @@ | |
| 1 | 
            +
            # coding: utf-8
         | 
| 2 | 
            +
            lib = File.expand_path('../lib', __FILE__)
         | 
| 3 | 
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require 'sagrone_scraper/version'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            Gem::Specification.new do |spec|
         | 
| 8 | 
            +
              spec.name          = "sagrone_scraper"
         | 
| 9 | 
            +
              spec.version       = SagroneScraper::VERSION
         | 
| 10 | 
            +
              spec.authors       = ["Marius Colacioiu"]
         | 
| 11 | 
            +
              spec.email         = ["marius.colacioiu@gmail.com"]
         | 
| 12 | 
            +
              spec.summary       = %q{Sagrone Ruby Scraper.}
         | 
| 13 | 
            +
              spec.description   = %q{Simple library to scrap web pages.}
         | 
| 14 | 
            +
              spec.homepage      = ""
         | 
| 15 | 
            +
              spec.license       = "Apache License 2.0"
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              spec.files         = `git ls-files -z`.split("\x0")
         | 
| 18 | 
            +
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         | 
| 19 | 
            +
              spec.test_files    = spec.files.grep(%r{^(spec)/})
         | 
| 20 | 
            +
              spec.require_paths = ["lib"]
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              spec.add_dependency "mechanize", "~> 2.0"
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              spec.add_development_dependency "bundler"
         | 
| 25 | 
            +
              spec.add_development_dependency "guard-rspec"
         | 
| 26 | 
            +
              spec.add_development_dependency "rake"
         | 
| 27 | 
            +
              spec.add_development_dependency "rspec"
         | 
| 28 | 
            +
              spec.add_development_dependency "webmock"
         | 
| 29 | 
            +
            end
         | 
| @@ -0,0 +1,105 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require 'sagrone_scraper/agent'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            RSpec.describe SagroneScraper::Agent do
         | 
| 5 | 
            +
              let(:user_agent_aliases) do
         | 
| 6 | 
            +
                [ "Linux Firefox", "Linux Mozilla",
         | 
| 7 | 
            +
                  "Mac Firefox", "Mac Mozilla", "Mac Safari",
         | 
| 8 | 
            +
                  "Windows Chrome", "Windows IE 8", "Windows IE 9", "Windows Mozilla" ]
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              describe 'AGENT_ALIASES' do
         | 
| 12 | 
            +
                it { expect(described_class::AGENT_ALIASES).to eq(user_agent_aliases) }
         | 
| 13 | 
            +
              end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              describe '.http_client' do
         | 
| 16 | 
            +
                subject { described_class.http_client }
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                it { should be_a(Mechanize) }
         | 
| 19 | 
            +
                it { should respond_to(:get) }
         | 
| 20 | 
            +
                it { expect(subject.user_agent).to match(/Mozilla\/5\.0/) }
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              describe '#initialize' do
         | 
| 24 | 
            +
                describe 'should require exactly one of `url` or `page` option' do
         | 
| 25 | 
            +
                  before do
         | 
| 26 | 
            +
                    stub_request_for('http://example.com', 'www.example.com')
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  it 'when options is empty' do
         | 
| 30 | 
            +
                    expect { described_class.new }.to raise_error(SagroneScraper::Agent::Error,
         | 
| 31 | 
            +
                                                                  /Exactly one option must be provided: "url" or "page"/)
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  it 'when both options are present' do
         | 
| 35 | 
            +
                    page = Mechanize.new.get('http://example.com')
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    expect {
         | 
| 38 | 
            +
                      described_class.new(url: 'http://example.com', page: page)
         | 
| 39 | 
            +
                    }.to raise_error(SagroneScraper::Agent::Error,
         | 
| 40 | 
            +
                                      /Exactly one option must be provided: "url" or "page"/)
         | 
| 41 | 
            +
                  end
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                describe 'with page option' do
         | 
| 45 | 
            +
                  before do
         | 
| 46 | 
            +
                    stub_request_for('http://example.com', 'www.example.com')
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  let(:page) { Mechanize.new.get('http://example.com') }
         | 
| 50 | 
            +
                  let(:agent) { described_class.new(page: page) }
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                  it { expect { agent }.to_not raise_error }
         | 
| 53 | 
            +
                  it { expect(agent.page).to be }
         | 
| 54 | 
            +
                  it { expect(agent.url).to eq 'http://example.com/' }
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                describe 'with invalid URL' do
         | 
| 58 | 
            +
                  let(:agent) { described_class.new(url: @invalid_url) }
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                  it 'should require URL is absolute' do
         | 
| 61 | 
            +
                    @invalid_url = 'not-a-url'
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                    expect { agent }.to raise_error(SagroneScraper::Agent::Error,
         | 
| 64 | 
            +
                                                    /absolute URL needed \(not not-a-url\)/)
         | 
| 65 | 
            +
                  end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  it 'should require absolute path' do
         | 
| 68 | 
            +
                    @invalid_url = 'http://'
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    webmock_allow do
         | 
| 71 | 
            +
                      expect { agent }.to raise_error(SagroneScraper::Agent::Error,
         | 
| 72 | 
            +
                                                      /bad URI\(absolute but no path\)/)
         | 
| 73 | 
            +
                    end
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  it 'should require valid URL' do
         | 
| 77 | 
            +
                    @invalid_url = 'http://example'
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                    webmock_allow do
         | 
| 80 | 
            +
                      expect { agent }.to raise_error(SagroneScraper::Agent::Error,
         | 
| 81 | 
            +
                                                      /getaddrinfo: nodename nor servname provided, or not known/)
         | 
| 82 | 
            +
                    end
         | 
| 83 | 
            +
                  end
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                describe 'with valid URL' do
         | 
| 87 | 
            +
                  before do
         | 
| 88 | 
            +
                    stub_request_for('http://example.com', 'www.example.com')
         | 
| 89 | 
            +
                  end
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                  let(:agent) { described_class.new(url: 'http://example.com') }
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                  it { expect(agent.http_client).to be_a(Mechanize) }
         | 
| 94 | 
            +
                  it { expect(agent.http_client).to equal(agent.http_client) }
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                  it { expect { agent }.to_not raise_error }
         | 
| 97 | 
            +
                  it { expect(agent.url).to eq('http://example.com') }
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                  it { expect(agent.page).to be_a(Mechanize::Page) }
         | 
| 100 | 
            +
                  it { expect(agent.page).to equal(agent.page) }
         | 
| 101 | 
            +
                  it { expect(agent.page).to respond_to(:at, :body, :title) }
         | 
| 102 | 
            +
                  it { expect(agent.page).to respond_to(:links, :labels, :images, :image_urls, :forms) }
         | 
| 103 | 
            +
                end
         | 
| 104 | 
            +
              end
         | 
| 105 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    ADDED
    
    | @@ -0,0 +1,24 @@ | |
| 1 | 
            +
            require 'stub_helper'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.configure do |config|
         | 
| 4 | 
            +
              config.include(StubHelper)
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              config.expect_with :rspec do |expectations|
         | 
| 7 | 
            +
                expectations.include_chain_clauses_in_custom_matcher_descriptions = true
         | 
| 8 | 
            +
              end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              config.mock_with :rspec do |mocks|
         | 
| 11 | 
            +
                mocks.verify_partial_doubles = true
         | 
| 12 | 
            +
              end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              config.filter_run :focus
         | 
| 15 | 
            +
              config.run_all_when_everything_filtered = true
         | 
| 16 | 
            +
              config.disable_monkey_patching!
         | 
| 17 | 
            +
              config.warnings = true
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              if config.files_to_run.one?
         | 
| 20 | 
            +
                config.default_formatter = 'doc'
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              config.order = :random
         | 
| 24 | 
            +
            end
         | 
    
        data/spec/stub_helper.rb
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
| 1 | 
            +
            require 'webmock/rspec'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module StubHelper
         | 
| 4 | 
            +
              def stub_request_for(url, file_name)
         | 
| 5 | 
            +
                stub_request(:get, url)
         | 
| 6 | 
            +
                  .to_return({
         | 
| 7 | 
            +
                    body: get_response_file(file_name),
         | 
| 8 | 
            +
                    headers: {'content-type' => 'text/html'},
         | 
| 9 | 
            +
                    status: 200
         | 
| 10 | 
            +
                  })
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              def webmock_allow(&block)
         | 
| 14 | 
            +
                WebMock.allow_net_connect!
         | 
| 15 | 
            +
                block.call
         | 
| 16 | 
            +
                WebMock.disable_net_connect!
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              def get_response_file(name)
         | 
| 20 | 
            +
                IO.read(File.join('spec/test_responses', "#{name}"))
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
            end
         | 
| @@ -0,0 +1,51 @@ | |
| 1 | 
            +
            <!DOCTYPE html>
         | 
| 2 | 
            +
            <!-- saved from url=(0019)http://example.com/ -->
         | 
| 3 | 
            +
            <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
         | 
| 4 | 
            +
                <title>Example Domain</title>
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                <meta charset="utf-8">
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                <meta name="viewport" content="width=device-width, initial-scale=1">
         | 
| 9 | 
            +
                <style type="text/css">
         | 
| 10 | 
            +
                body {
         | 
| 11 | 
            +
                    background-color: #f0f0f2;
         | 
| 12 | 
            +
                    margin: 0;
         | 
| 13 | 
            +
                    padding: 0;
         | 
| 14 | 
            +
                    font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                }
         | 
| 17 | 
            +
                div {
         | 
| 18 | 
            +
                    width: 600px;
         | 
| 19 | 
            +
                    margin: 5em auto;
         | 
| 20 | 
            +
                    padding: 50px;
         | 
| 21 | 
            +
                    background-color: #fff;
         | 
| 22 | 
            +
                    border-radius: 1em;
         | 
| 23 | 
            +
                }
         | 
| 24 | 
            +
                a:link, a:visited {
         | 
| 25 | 
            +
                    color: #38488f;
         | 
| 26 | 
            +
                    text-decoration: none;
         | 
| 27 | 
            +
                }
         | 
| 28 | 
            +
                @media (max-width: 700px) {
         | 
| 29 | 
            +
                    body {
         | 
| 30 | 
            +
                        background-color: #fff;
         | 
| 31 | 
            +
                    }
         | 
| 32 | 
            +
                    div {
         | 
| 33 | 
            +
                        width: auto;
         | 
| 34 | 
            +
                        margin: 0 auto;
         | 
| 35 | 
            +
                        border-radius: 0;
         | 
| 36 | 
            +
                        padding: 1em;
         | 
| 37 | 
            +
                    }
         | 
| 38 | 
            +
                }
         | 
| 39 | 
            +
                </style>
         | 
| 40 | 
            +
            </head>
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            <body>
         | 
| 43 | 
            +
            <div>
         | 
| 44 | 
            +
                <h1>Example Domain</h1>
         | 
| 45 | 
            +
                <p>This domain is established to be used for illustrative examples in documents. You may use this
         | 
| 46 | 
            +
                domain in examples without prior coordination or asking for permission.</p>
         | 
| 47 | 
            +
                <p><a href="http://www.iana.org/domains/example">More information...</a></p>
         | 
| 48 | 
            +
            </div>
         | 
| 49 | 
            +
             | 
| 50 | 
            +
             | 
| 51 | 
            +
            </body></html>
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,152 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: sagrone_scraper
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.0.1
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - Marius Colacioiu
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2015-03-06 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: mechanize
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - "~>"
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '2.0'
         | 
| 20 | 
            +
              type: :runtime
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - "~>"
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: '2.0'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: bundler
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - ">="
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: '0'
         | 
| 34 | 
            +
              type: :development
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - ">="
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: '0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: guard-rspec
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - ">="
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - ">="
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '0'
         | 
| 55 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            +
              name: rake
         | 
| 57 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 | 
            +
                requirements:
         | 
| 59 | 
            +
                - - ">="
         | 
| 60 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            +
                    version: '0'
         | 
| 62 | 
            +
              type: :development
         | 
| 63 | 
            +
              prerelease: false
         | 
| 64 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 | 
            +
                requirements:
         | 
| 66 | 
            +
                - - ">="
         | 
| 67 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            +
                    version: '0'
         | 
| 69 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 70 | 
            +
              name: rspec
         | 
| 71 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 72 | 
            +
                requirements:
         | 
| 73 | 
            +
                - - ">="
         | 
| 74 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 75 | 
            +
                    version: '0'
         | 
| 76 | 
            +
              type: :development
         | 
| 77 | 
            +
              prerelease: false
         | 
| 78 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 79 | 
            +
                requirements:
         | 
| 80 | 
            +
                - - ">="
         | 
| 81 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 82 | 
            +
                    version: '0'
         | 
| 83 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 84 | 
            +
              name: webmock
         | 
| 85 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 86 | 
            +
                requirements:
         | 
| 87 | 
            +
                - - ">="
         | 
| 88 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 89 | 
            +
                    version: '0'
         | 
| 90 | 
            +
              type: :development
         | 
| 91 | 
            +
              prerelease: false
         | 
| 92 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 93 | 
            +
                requirements:
         | 
| 94 | 
            +
                - - ">="
         | 
| 95 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 96 | 
            +
                    version: '0'
         | 
| 97 | 
            +
            description: Simple library to scrap web pages.
         | 
| 98 | 
            +
            email:
         | 
| 99 | 
            +
            - marius.colacioiu@gmail.com
         | 
| 100 | 
            +
            executables: []
         | 
| 101 | 
            +
            extensions: []
         | 
| 102 | 
            +
            extra_rdoc_files: []
         | 
| 103 | 
            +
            files:
         | 
| 104 | 
            +
            - ".editorconfig"
         | 
| 105 | 
            +
            - ".gitignore"
         | 
| 106 | 
            +
            - ".rspec"
         | 
| 107 | 
            +
            - ".travis.yml"
         | 
| 108 | 
            +
            - CHANGELOG.md
         | 
| 109 | 
            +
            - Gemfile
         | 
| 110 | 
            +
            - Guardfile
         | 
| 111 | 
            +
            - LICENSE
         | 
| 112 | 
            +
            - README.md
         | 
| 113 | 
            +
            - Rakefile
         | 
| 114 | 
            +
            - lib/sagrone_scraper.rb
         | 
| 115 | 
            +
            - lib/sagrone_scraper/agent.rb
         | 
| 116 | 
            +
            - lib/sagrone_scraper/version.rb
         | 
| 117 | 
            +
            - sagrone_scraper.gemspec
         | 
| 118 | 
            +
            - spec/sagrone_scraper/agent_spec.rb
         | 
| 119 | 
            +
            - spec/sagrone_scraper_spec.rb
         | 
| 120 | 
            +
            - spec/spec_helper.rb
         | 
| 121 | 
            +
            - spec/stub_helper.rb
         | 
| 122 | 
            +
            - spec/test_responses/www.example.com
         | 
| 123 | 
            +
            homepage: ''
         | 
| 124 | 
            +
            licenses:
         | 
| 125 | 
            +
            - Apache License 2.0
         | 
| 126 | 
            +
            metadata: {}
         | 
| 127 | 
            +
            post_install_message: 
         | 
| 128 | 
            +
            rdoc_options: []
         | 
| 129 | 
            +
            require_paths:
         | 
| 130 | 
            +
            - lib
         | 
| 131 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 132 | 
            +
              requirements:
         | 
| 133 | 
            +
              - - ">="
         | 
| 134 | 
            +
                - !ruby/object:Gem::Version
         | 
| 135 | 
            +
                  version: '0'
         | 
| 136 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 137 | 
            +
              requirements:
         | 
| 138 | 
            +
              - - ">="
         | 
| 139 | 
            +
                - !ruby/object:Gem::Version
         | 
| 140 | 
            +
                  version: '0'
         | 
| 141 | 
            +
            requirements: []
         | 
| 142 | 
            +
            rubyforge_project: 
         | 
| 143 | 
            +
            rubygems_version: 2.2.2
         | 
| 144 | 
            +
            signing_key: 
         | 
| 145 | 
            +
            specification_version: 4
         | 
| 146 | 
            +
            summary: Sagrone Ruby Scraper.
         | 
| 147 | 
            +
            test_files:
         | 
| 148 | 
            +
            - spec/sagrone_scraper/agent_spec.rb
         | 
| 149 | 
            +
            - spec/sagrone_scraper_spec.rb
         | 
| 150 | 
            +
            - spec/spec_helper.rb
         | 
| 151 | 
            +
            - spec/stub_helper.rb
         | 
| 152 | 
            +
            - spec/test_responses/www.example.com
         |