web-scraper 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/README.md +7 -0
- data/Rakefile +1 -0
- data/lib/web-scraper/logger_configurer.rb +20 -0
- data/lib/web-scraper/scraped_response.rb +23 -0
- data/lib/web-scraper/version.rb +5 -0
- data/lib/web_scraper.rb +35 -0
- data/spec/logger_spec.rb +42 -0
- data/spec/resources/page.html +103 -0
- data/spec/spec_helper.rb +37 -0
- data/spec/support/test_web_server.rb +74 -0
- data/spec/web_scraper_spec.rb +79 -0
- data/web-scraper.gemspec +26 -0
- metadata +120 -0
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require "logger"
|
2
|
+
|
3
|
+
class LoggerConfigurer
|
4
|
+
|
5
|
+
DEFAULT_FILENAME = 'web-scraper.log'
|
6
|
+
|
7
|
+
def self.create_logger(options = {})
|
8
|
+
|
9
|
+
@logger = Logger.new logger_device(options[:log_to_file])
|
10
|
+
@logger.level = options[:log_level].to_i if options[:log_level]
|
11
|
+
@logger
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.logger_device(file_option)
|
15
|
+
return nil if not file_option and ENV["WEBSCRAPER_ENV"] == 'test'
|
16
|
+
|
17
|
+
file_option ? DEFAULT_FILENAME : STDOUT
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
# == ScrapedResponse
|
5
|
+
# Will hold values from the scraping of the page
|
6
|
+
class ScrapedResponse
|
7
|
+
# The constructor define at runtime all the methods name
|
8
|
+
# contained in the key of the hash that will return the computation of the
|
9
|
+
# XPath of the hash value
|
10
|
+
def initialize(page_content, options)
|
11
|
+
# puts page_content
|
12
|
+
@document = Nokogiri::HTML(page_content)
|
13
|
+
ScrapedResponse.create_methods(options)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.create_methods(options)
|
17
|
+
options.each do |key,value|
|
18
|
+
define_method(key) do
|
19
|
+
@document.xpath(value).children.first.text.strip!
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/web_scraper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require "web-scraper/version"
|
2
|
+
require "web-scraper/logger_configurer"
|
3
|
+
require "web-scraper/scraped_response"
|
4
|
+
|
5
|
+
require 'httpclient'
|
6
|
+
|
7
|
+
# == Web Scraper
|
8
|
+
#
|
9
|
+
module WebScraper
|
10
|
+
# web_page is the method that will start the download type description
|
11
|
+
# options
|
12
|
+
def web_page(base_url, options = {} ,&block)
|
13
|
+
@logger = LoggerConfigurer.create_logger(options)
|
14
|
+
@client = HTTPClient.new
|
15
|
+
@base_url = base_url
|
16
|
+
|
17
|
+
block_given? ? self.instance_eval(&block) : self.download(:uri => '/')
|
18
|
+
end
|
19
|
+
|
20
|
+
# download method
|
21
|
+
def download(params)
|
22
|
+
@logger.info("downloading... #{self.url(params[:uri])}")
|
23
|
+
@logger.debug(" with params... #{params[:params]}")
|
24
|
+
@last_page = @client.post(self.url(params[:uri]), :body => params[:params], :follow_redirect => true).body
|
25
|
+
end
|
26
|
+
|
27
|
+
def scrape(options)
|
28
|
+
ScrapedResponse.new(@last_page, options)
|
29
|
+
end
|
30
|
+
|
31
|
+
def url(uri)
|
32
|
+
"#{@base_url}#{uri}"
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
data/spec/logger_spec.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe LoggerConfigurer do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
ENV['WEBSCRAPER_ENV'] = 'logger_test'
|
7
|
+
end
|
8
|
+
|
9
|
+
after(:all) do
|
10
|
+
ENV['WEBSCRAPER_ENV'] = 'test'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should create a Logger object w/ no options" do
|
14
|
+
LoggerConfigurer.create_logger.should be_a Logger
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should check for a :log_level option" do
|
18
|
+
LoggerConfigurer.create_logger(:log_level => Logger::INFO).level.should be Logger::INFO
|
19
|
+
end
|
20
|
+
|
21
|
+
context "output to console or file " do
|
22
|
+
before(:each) do
|
23
|
+
@output = ''
|
24
|
+
$stdout.stub!( :write ) { |*args| @output.<<( *args )}
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should return a stdout w/o params" do
|
28
|
+
LoggerConfigurer.create_logger.info "testing logger"
|
29
|
+
@output.should include("testing logger")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should return stdout for a log_to_file => false option" do
|
33
|
+
LoggerConfigurer.create_logger(:log_to_file => false).info "testing logger"
|
34
|
+
@output.should include("testing logger")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should return FileOutputter for a :file => true option" do
|
38
|
+
LoggerConfigurer.create_logger(:log_to_file => true).info "testing logger"
|
39
|
+
@output.should be_empty
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<style type="text/css">
|
4
|
+
|
5
|
+
<!--
|
6
|
+
@import "pvgistext.css";
|
7
|
+
-->
|
8
|
+
</style>
|
9
|
+
<title>
|
10
|
+
PV power estimate information
|
11
|
+
</title>
|
12
|
+
<meta http-equiv="Content-Type" content="text/html">
|
13
|
+
<meta name="author" content="Created by Thomas Huld - JRC ISPRA - ITALY, Thomas.Huld@jrc.it">
|
14
|
+
</head>
|
15
|
+
<body text="#000000" bgcolor="#F5F5FF" >
|
16
|
+
|
17
|
+
|
18
|
+
<table width="100%" border="0">
|
19
|
+
<tr>
|
20
|
+
<td align="left" >
|
21
|
+
|
22
|
+
<p class=title>
|
23
|
+
Performance of Grid-connected PV
|
24
|
+
<br>
|
25
|
+
</p>
|
26
|
+
<p>
|
27
|
+
<font color=\"red\"> NOTE: before using these calculations for anything serious, you should read </font><a href="javascript:window.open('PVcalchelp_' + 'en' + '.html','help','height=600,width=600,toolbar=no,scrollbars=yes,resizable'); void 0;"><font color=blue> [this]</font></a>
|
28
|
+
</p>
|
29
|
+
<p class=subtitle align="left">
|
30
|
+
PVGIS estimates of solar electricity generation
|
31
|
+
</p><br><font color="red">Warning: the system could not read the horizon file supplied, using standard horizon</font><br>Location: 45°49'3" North, 8°49'34" East, Elevation: 389 m a.s.l.,<br><br>
|
32
|
+
</td>
|
33
|
+
</tr>
|
34
|
+
<tr>
|
35
|
+
<td align="left">
|
36
|
+
|
37
|
+
<br>
|
38
|
+
Solar radiation database used: PVGIS-classic
|
39
|
+
<br>
|
40
|
+
<br>
|
41
|
+
|
42
|
+
Nominal power of the PV system: 1.0 kW (crystalline silicon)<br>
|
43
|
+
|
44
|
+
|
45
|
+
Estimated losses due to temperature: 12.3% (using local ambient temperature)<br>
|
46
|
+
|
47
|
+
Estimated loss due to angular reflectance effects: 3.3%<br>
|
48
|
+
Other losses (cables, inverter etc.): 8.0%<br>
|
49
|
+
Combined PV system losses: 22.0%
|
50
|
+
|
51
|
+
</td>
|
52
|
+
</tr>
|
53
|
+
<tr>
|
54
|
+
<td align="left">
|
55
|
+
<br>
|
56
|
+
</td>
|
57
|
+
</tr> <tr><td align="left">
|
58
|
+
<table class=data_table border="1" width="300" >
|
59
|
+
<tr>
|
60
|
+
<td colspan=5>
|
61
|
+
<b>Fixed system: inclination=23°, orientation=60° </b>
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
<tr>
|
65
|
+
|
66
|
+
<th align="left">Month</th><th width=40><span class=formula>E</span><sub>d</sub> </th><th width=40> <span class=formula>E</span><sub>m</sub> </th><th width=40><span class=formula>H</span><sub>d</sub></th><th width=40> <span class=formula>H</span><sub>m</sub> </th></tr>
|
67
|
+
<tr> <td> Jan </td><td align="right">1.45</td><td align="right">45.0</td><td align="right">1.71</td><td align="right">53.1</td></tr>
|
68
|
+
<tr> <td> Feb </td><td align="right">1.87</td><td align="right">52.5</td><td align="right">2.24</td><td align="right">62.7</td></tr>
|
69
|
+
<tr> <td> Mar </td><td align="right">2.90</td><td align="right">90.0</td><td align="right">3.61</td><td align="right">112</td></tr>
|
70
|
+
<tr> <td> Apr </td><td align="right">3.32</td><td align="right">99.5</td><td align="right">4.23</td><td align="right">127</td></tr>
|
71
|
+
<tr> <td> May </td><td align="right">3.50</td><td align="right">108</td><td align="right">4.59</td><td align="right">142</td></tr>
|
72
|
+
<tr> <td> Jun </td><td align="right">4.17</td><td align="right">125</td><td align="right">5.56</td><td align="right">167</td></tr>
|
73
|
+
<tr> <td> Jul </td><td align="right">4.31</td><td align="right">134</td><td align="right">5.79</td><td align="right">180</td></tr>
|
74
|
+
<tr> <td> Aug </td><td align="right">3.90</td><td align="right">121</td><td align="right">5.26</td><td align="right">163</td></tr>
|
75
|
+
<tr> <td> Sep </td><td align="right">3.17</td><td align="right">95.1</td><td align="right">4.15</td><td align="right">124</td></tr>
|
76
|
+
<tr> <td> Oct </td><td align="right">2.12</td><td align="right">65.7</td><td align="right">2.66</td><td align="right">82.4</td></tr>
|
77
|
+
<tr> <td> Nov </td><td align="right">1.47</td><td align="right">44.1</td><td align="right">1.78</td><td align="right">53.4</td></tr>
|
78
|
+
<tr> <td> Dec </td><td align="right">1.28</td><td align="right">39.5</td><td align="right">1.53</td><td align="right">47.3</td></tr><tr><td colspan=5> </td></tr><tr><td><b> Yearly average </b></td><td align="right"><b>2.79 </b></td><td align="right"><b>85.0 </b></td></td><td align="right"><b>3.60 </b></td><td align="right"><b>109 </b></td></tr><tr><td><b>Total for year</b></td><td align="right" colspan=2 ><b> 1020 </b> </td> <td align="right" colspan=2 ><b> 1310 </b> </td> </tr></table></td></tr><tr><td><br></td></tr><tr><td><p>
|
79
|
+
<span class=formula>E</span><sub>d</sub>: Average daily electricity production from the given system (kWh)<br>
|
80
|
+
<span class=formula>E</span><sub>m</sub>: Average monthly electricity production from the given system (kWh)<br>
|
81
|
+
<span class=formula>H</span><sub>d</sub>: Average daily sum of global irradiation per square meter received by the modules of the given system (kWh/m<sup>2</sup>)<br>
|
82
|
+
<span class=formula>H</span><sub>m</sub>: Average sum of global irradiation per square meter received by the modules of the given system (kWh/m<sup>2</sup>)<br></tr></td><tr><td><tr>
|
83
|
+
<td>
|
84
|
+
<br>
|
85
|
+
<p>
|
86
|
+
PVGIS © European Communities, 2001-2010<br>
|
87
|
+
Reproduction is authorised, provided the source is acknowledged<br>
|
88
|
+
See the disclaimer <a target="legal" href="http://europa.eu/geninfo/legal_notices_en.htm">
|
89
|
+
here </a>
|
90
|
+
|
91
|
+
|
92
|
+
</p>
|
93
|
+
</td>
|
94
|
+
</tr>
|
95
|
+
</table>
|
96
|
+
<script language="JavaScript">
|
97
|
+
opener.window.setAngles(23,60,1,1);
|
98
|
+
</script
|
99
|
+
<script language="JavaScript">
|
100
|
+
window.focus();
|
101
|
+
</script>
|
102
|
+
</body>
|
103
|
+
</html>
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'rubygems'
|
3
|
+
|
4
|
+
# This file is copied to spec/ when you run 'rails generate rspec:install'
|
5
|
+
ENV["WEBSCRAPER_ENV"] ||= 'test'
|
6
|
+
Dir["lib/**/*.rb", "spec/support/*.rb"].each { |f| require File.absolute_path(f) }
|
7
|
+
require 'rspec/core'
|
8
|
+
|
9
|
+
def read_resource(filename)
|
10
|
+
IO.read("spec/resources/#{filename}")
|
11
|
+
end
|
12
|
+
|
13
|
+
RSpec.configure do |config|
|
14
|
+
# == Mock Framework
|
15
|
+
#
|
16
|
+
# If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
|
17
|
+
#
|
18
|
+
# config.mock_with :mocha
|
19
|
+
# config.mock_with :flexmock
|
20
|
+
# config.mock_with :rr
|
21
|
+
config.mock_with :rspec
|
22
|
+
|
23
|
+
# Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
|
24
|
+
# config.fixture_path = "#{::Rails.root}/spec/fixtures"
|
25
|
+
|
26
|
+
# If you're not using ActiveRecord, or you'd prefer not to run each of your
|
27
|
+
# examples within a transaction, remove the following line or assign false
|
28
|
+
# instead of true.
|
29
|
+
# config.use_transactional_fixtures = true
|
30
|
+
|
31
|
+
# config.before(:suite) do
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# config.after(:suite) do
|
35
|
+
# end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
## Bring up server in a new thread (do once?):
|
4
|
+
# @mock_server = MockServer.new(4000, 0.5)
|
5
|
+
#
|
6
|
+
## Pull down server:
|
7
|
+
# @mock_server.stop
|
8
|
+
#
|
9
|
+
## Expectations (rspec example):
|
10
|
+
# request_received = false
|
11
|
+
# @mock_server.attach do |env|
|
12
|
+
# request_received = true
|
13
|
+
# env['REQUEST_METHOD'].should == ‘POST’
|
14
|
+
# env['PATH_INFO'].should == ‘/foo’
|
15
|
+
# [ 200, { 'Content-Type' => 'text/plain', 'Content-Length' => '40' }, [ 'This gets returned from the HTTP request' ]]
|
16
|
+
# end
|
17
|
+
# request_received.should be_true
|
18
|
+
# my_code_that_should_make_post_request # to http://localhost:4000/foo
|
19
|
+
#
|
20
|
+
## After each test:
|
21
|
+
# @mock_server.detach
|
22
|
+
require "WEBrick"
|
23
|
+
require 'rack'
|
24
|
+
|
25
|
+
class TestWebServer
|
26
|
+
def initialize(port = 4000, pause = 0.5)
|
27
|
+
@block = nil
|
28
|
+
@parent_thread = Thread.current
|
29
|
+
@thread = Thread.new do
|
30
|
+
Rack::Handler::WEBrick.run(self, :Port => port,
|
31
|
+
:Logger => WEBrick::Log.new("/dev/null"),
|
32
|
+
:AccessLog => [nil, nil])
|
33
|
+
end
|
34
|
+
sleep pause # give the server time to fire up… YUK!
|
35
|
+
end
|
36
|
+
|
37
|
+
def stop
|
38
|
+
Thread.kill(@thread)
|
39
|
+
end
|
40
|
+
|
41
|
+
def attach(&block)
|
42
|
+
@block = block
|
43
|
+
end
|
44
|
+
|
45
|
+
def detach()
|
46
|
+
@block = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def call(env)
|
50
|
+
begin
|
51
|
+
raise "Specify a handler for the request using attach(block), the block should return a valid rack response and can test expectations" unless @block
|
52
|
+
@block.call(env)
|
53
|
+
rescue Exception => e
|
54
|
+
@parent_thread.raise e
|
55
|
+
[ 500, { 'Content-Type' => 'text/plain', 'Content-Length' => '13' }, [ 'Bad test code' ]]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# body is the content returned
|
60
|
+
# :method can be :get - default or :post]
|
61
|
+
# :path => '/pippo'
|
62
|
+
def stub_response(body, options = {})
|
63
|
+
method = options[:method] ? options[:method].to_s.upcase : 'GET'
|
64
|
+
path = options[:path] ? options[:path] : '/'
|
65
|
+
content_type = options[:content_type] ? options[:content_type] : 'text/plain'
|
66
|
+
|
67
|
+
attach do |env|
|
68
|
+
env['REQUEST_METHOD'].should == method
|
69
|
+
env['PATH_INFO'].should == path
|
70
|
+
[ 200, { 'Content-Type' => content_type, 'Content-Length' => body.length.to_s }, [ body ]]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe WebScraper do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
extend WebScraper
|
8
|
+
end
|
9
|
+
|
10
|
+
context "[w/o web server]" do
|
11
|
+
it "should be raised an error when calling download method w/o :uri param" do
|
12
|
+
expect { download }.to raise_error
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context "[w/ web server]" do
|
17
|
+
before(:all) do
|
18
|
+
$web_server = TestWebServer.new
|
19
|
+
end
|
20
|
+
|
21
|
+
context "starting defining dsl" do
|
22
|
+
|
23
|
+
before(:each) do
|
24
|
+
$web_server.stub_response('Root Page', {:method => :post, :path => '/'})
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should accept a url and return its content" do
|
28
|
+
page = web_page "http://localhost:4000"
|
29
|
+
page.should eq 'Root Page'
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should execute a block as a param and return its content" do
|
33
|
+
page = web_page "http://localhost:4000" do
|
34
|
+
"test"
|
35
|
+
end
|
36
|
+
page.should eq "test"
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
context "download a page" do
|
42
|
+
before(:each) do
|
43
|
+
$web_server.stub_response('Web Page Scraped', {:method => :post, :path => '/foo'})
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should download a page" do
|
47
|
+
page = web_page "http://localhost:4000" do
|
48
|
+
download :uri => "/foo"
|
49
|
+
end
|
50
|
+
page.should eq "Web Page Scraped"
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
context "scrape a page" do
|
56
|
+
before(:each) do
|
57
|
+
$web_server.stub_response(read_resource('page.html'), {:method => :post, :path => '/page.html'})
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should scrape content from a downloaded of page" do
|
61
|
+
page = web_page "http://localhost:4000" do
|
62
|
+
download :uri => "/page.html"
|
63
|
+
scrape :avg_monthly_production => '//table/tr[4]/td/table/tr[17]/td[2]/b'
|
64
|
+
end
|
65
|
+
page.avg_monthly_production.should eq "1020"
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
after(:each) do
|
71
|
+
$web_server.detach
|
72
|
+
end
|
73
|
+
|
74
|
+
after(:all) do
|
75
|
+
$web_server.stop
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
data/web-scraper.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "web-scraper/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "web-scraper"
|
7
|
+
s.version = Web::Scraper::VERSION
|
8
|
+
s.authors = ["Gian Carlo Pace"]
|
9
|
+
s.email = ["giancarlo.pace@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Web Values scraper}
|
12
|
+
s.description = %q{It's an utility to scrape web pages}
|
13
|
+
|
14
|
+
s.rubyforge_project = "web-scraper"
|
15
|
+
|
16
|
+
s.add_dependency "httpclient", "~> 2.2.1"
|
17
|
+
s.add_dependency "rspec"
|
18
|
+
s.add_dependency "rack"
|
19
|
+
s.add_dependency "nokogiri"
|
20
|
+
s.add_dependency "logger"
|
21
|
+
|
22
|
+
s.files = `git ls-files`.split("\n")
|
23
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
24
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
25
|
+
s.require_paths = ["lib"]
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: web-scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Gian Carlo Pace
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-07-07 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: httpclient
|
16
|
+
requirement: &2160925440 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.2.1
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2160925440
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &2160925000 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2160925000
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rack
|
38
|
+
requirement: &2160924540 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *2160924540
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: nokogiri
|
49
|
+
requirement: &2160924120 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *2160924120
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: logger
|
60
|
+
requirement: &2160923700 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :runtime
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *2160923700
|
69
|
+
description: It's an utility to scrape web pages
|
70
|
+
email:
|
71
|
+
- giancarlo.pace@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- .gitignore
|
77
|
+
- .rspec
|
78
|
+
- Gemfile
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- lib/web-scraper/logger_configurer.rb
|
82
|
+
- lib/web-scraper/scraped_response.rb
|
83
|
+
- lib/web-scraper/version.rb
|
84
|
+
- lib/web_scraper.rb
|
85
|
+
- spec/logger_spec.rb
|
86
|
+
- spec/resources/page.html
|
87
|
+
- spec/spec_helper.rb
|
88
|
+
- spec/support/test_web_server.rb
|
89
|
+
- spec/web_scraper_spec.rb
|
90
|
+
- web-scraper.gemspec
|
91
|
+
homepage: ''
|
92
|
+
licenses: []
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ! '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
105
|
+
requirements:
|
106
|
+
- - ! '>='
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
requirements: []
|
110
|
+
rubyforge_project: web-scraper
|
111
|
+
rubygems_version: 1.8.5
|
112
|
+
signing_key:
|
113
|
+
specification_version: 3
|
114
|
+
summary: Web Values scraper
|
115
|
+
test_files:
|
116
|
+
- spec/logger_spec.rb
|
117
|
+
- spec/resources/page.html
|
118
|
+
- spec/spec_helper.rb
|
119
|
+
- spec/support/test_web_server.rb
|
120
|
+
- spec/web_scraper_spec.rb
|