scrapers 1.5.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/bin/new_scraper +15 -13
- data/bin/rubytapas +5 -1
- data/bin/wunderground +16 -0
- data/lib/scrapers/version.rb +3 -3
- data/lib/scrapers/wunderground.rb +30 -0
- data/lib/wunderground_thor.rb +56 -0
- data/spec/lib/scrapers/wunderground_spec.rb +20 -0
- data/spec/wunderground_thor_spec.rb +6 -0
- data/templates/bin/new_scraper_bin.erb +4 -0
- data/templates/lib/new_scraper_thor.rb.erb +7 -0
- data/templates/{new_scraper.tt → lib/scrapers/new_scraper_lib.rb.erb} +2 -1
- data/templates/{new_scraper_spec.tt → spec/lib/new_scraper_lib_spec.rb.erb} +2 -2
- data/templates/spec/new_scraper_thor_spec.rb.erb +6 -0
- metadata +44 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd5f3c7ae7ee4f4730897d6ae083e5f2fc00f93f
|
4
|
+
data.tar.gz: b8fbe17e0456534b91e70d406550ea6ad8daff67
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24e3339257c087aaad5ef86b9d618c7d71bb7145ea3f36cf672abc850eba581d0c2c3ddb7e077b4e5fcfdce2f4db924091c1ba0c677997f50c5072089e10adae
|
7
|
+
data.tar.gz: fb7e81cd96b2c42065256b29adc1a767f8998e7e5261b59152a4cf77a2c2878e07ff7b6cefb6025a8f27f7da4512960a8f1ea0996d9e7866ac7d4ef8a235235b
|
data/Gemfile
CHANGED
data/bin/new_scraper
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'thor'
|
3
|
-
require 'scrapers'
|
3
|
+
# require 'scrapers'
|
4
4
|
require "active_support/core_ext/string/inflections"
|
5
5
|
require 'pry'
|
6
6
|
|
@@ -8,26 +8,28 @@ class NewScraper < Thor::Group
|
|
8
8
|
include Thor::Actions
|
9
9
|
|
10
10
|
argument :name
|
11
|
-
class_option :
|
12
|
-
|
11
|
+
class_option :make_bin, :aliases => "-B", :default => true
|
12
|
+
|
13
13
|
def self.source_root
|
14
14
|
File.expand_path("../../", __FILE__)
|
15
15
|
end
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
template("templates/
|
17
|
+
def create_bin_file
|
18
|
+
return unless options["make_bin"]
|
19
|
+
template("templates/bin/new_scraper_bin.erb", "bin/#{file_name}")
|
20
|
+
template("templates/lib/new_scraper_thor.rb.erb", "lib/#{file_name}_thor.rb")
|
21
|
+
template("templates/spec/new_scraper_thor_spec.rb.erb", "spec/#{file_name}_thor_spec.rb")
|
20
22
|
end
|
21
23
|
|
22
|
-
def
|
24
|
+
def create_scraper_lib_file
|
25
|
+
template("templates/lib/scrapers/new_scraper_lib.rb.erb", "lib/scrapers/#{file_name}.rb")
|
26
|
+
template("templates/spec/lib/new_scraper_lib_spec.rb.erb", "spec/lib/scrapers/#{file_name}_spec.rb")
|
27
|
+
end
|
23
28
|
|
24
|
-
|
25
|
-
test = "test"
|
26
|
-
else
|
27
|
-
test = "spec"
|
28
|
-
end
|
29
|
+
private
|
29
30
|
|
30
|
-
|
31
|
+
def file_name
|
32
|
+
name.underscore
|
31
33
|
end
|
32
34
|
|
33
35
|
end
|
data/bin/rubytapas
CHANGED
@@ -60,7 +60,11 @@ class RubyTapasDownload < Thor
|
|
60
60
|
showlist_urls = Scrapers::RubyTapas.showlist(url, user, pw)
|
61
61
|
|
62
62
|
showlist_urls.each do |url|
|
63
|
-
|
63
|
+
begin
|
64
|
+
Scrapers::RubyTapas.scrape url, user, pw, destination
|
65
|
+
rescue Errno::EEXIST
|
66
|
+
puts "episode exists, skipping"
|
67
|
+
end
|
64
68
|
print "pausing..."
|
65
69
|
sleep 5
|
66
70
|
puts "."
|
data/bin/wunderground
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'wunderground_thor'
|
3
|
+
|
4
|
+
result = Wunderground.start(ARGV)
|
5
|
+
|
6
|
+
if result.is_a?(Hash) && result.has_key?("location")
|
7
|
+
puts <<-EOT
|
8
|
+
Current conditions for: #{result["location"]}:
|
9
|
+
#{"=" * 80}
|
10
|
+
Temperature: #{result["temp"]}
|
11
|
+
#{result["condition"]}, Feels like #{result["feel"]}
|
12
|
+
High today: #{result["high"]} Low: #{result["low"]}
|
13
|
+
EOT
|
14
|
+
else
|
15
|
+
puts result
|
16
|
+
end
|
data/lib/scrapers/version.rb
CHANGED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'mechanize'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
module Scrapers
|
6
|
+
module Wunderground
|
7
|
+
|
8
|
+
def self.scrape(url)
|
9
|
+
results = Hash.new
|
10
|
+
|
11
|
+
Mechanize.start do |m|
|
12
|
+
m.get(url)
|
13
|
+
results["title"] = m.page.title
|
14
|
+
results["url"] = m.page.uri.to_s
|
15
|
+
snippet = m.page.search('#weather-snippet')
|
16
|
+
results["snippet"] = snippet.to_html.gsub(/[\n\t]+/,'')
|
17
|
+
results["location"] = snippet.at_css('h1').text.strip
|
18
|
+
results["image"] = snippet.at_css('#condition-img div img')['src']
|
19
|
+
results["temp"] = snippet.at_css('#temp').text.gsub(/[[:space:]]+/,' ').strip
|
20
|
+
results["condition"] = snippet.at_css('#condition').text.strip
|
21
|
+
results["feel"] = snippet.at_css('#feel').text.gsub(/[[:space:]]+/,' ').strip
|
22
|
+
results["high"] = snippet.at_css('.high').text.gsub(/[[:space:]]+/,' ').strip
|
23
|
+
results["low"] = snippet.at_css('.low').text.gsub(/[[:space:]]+/,' ').strip
|
24
|
+
end
|
25
|
+
|
26
|
+
results
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'scrapers/wunderground'
|
3
|
+
require 'awesome_print'
|
4
|
+
|
5
|
+
class Wunderground < Thor
|
6
|
+
|
7
|
+
VERSION = '1.0.0'
|
8
|
+
WUNDERGROUND_URL = "http://www.wunderground.com/"
|
9
|
+
|
10
|
+
desc "current", "Show the current local weather conditions from #{WUNDERGROUND_URL}"
|
11
|
+
def current
|
12
|
+
current_conditions
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "location", "Show location for current local weather"
|
16
|
+
def location
|
17
|
+
current_conditions["location"]
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "image", "URL for current local weather conditions"
|
21
|
+
def image
|
22
|
+
current_conditions["image"]
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "temp", "Current local temperature with units"
|
26
|
+
def temp
|
27
|
+
current_conditions["temp"]
|
28
|
+
end
|
29
|
+
|
30
|
+
desc "condition", "Current condition"
|
31
|
+
def condition
|
32
|
+
current_conditions["condition"]
|
33
|
+
end
|
34
|
+
|
35
|
+
desc "feel", "Feels like temperature"
|
36
|
+
def feel
|
37
|
+
current_conditions["feel"]
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "high", "Forecast High Temperature"
|
41
|
+
def high
|
42
|
+
current_conditions["high"]
|
43
|
+
end
|
44
|
+
|
45
|
+
desc "low", "Forecast Low temperature"
|
46
|
+
def low
|
47
|
+
current_conditions["low"]
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def current_conditions
|
53
|
+
@_current_conditions ||= Scrapers::Wunderground.scrape(WUNDERGROUND_URL)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'scrapers/wunderground'
|
4
|
+
|
5
|
+
module Scrapers
|
6
|
+
|
7
|
+
describe Wunderground do
|
8
|
+
it{should respond_to :scrape}
|
9
|
+
context "scraping" do
|
10
|
+
before(:all) do
|
11
|
+
@scrape = VCR.use_cassette('wunderground') do
|
12
|
+
@result = Scrapers::Wunderground.scrape
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it {expect(@result).to_not be_nil}
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
require 'spec_helper'
|
3
|
-
require 'scrapers/<%=
|
3
|
+
require 'scrapers/<%= file_name %>'
|
4
4
|
|
5
5
|
module Scrapers
|
6
6
|
|
@@ -8,7 +8,7 @@ module Scrapers
|
|
8
8
|
it{should respond_to :scrape}
|
9
9
|
context "scraping" do
|
10
10
|
before(:all) do
|
11
|
-
@
|
11
|
+
@scrape = VCR.use_cassette('<%= file_name %>') do
|
12
12
|
@result = Scrapers::<%= name %>.scrape
|
13
13
|
end
|
14
14
|
end
|
metadata
CHANGED
@@ -1,181 +1,181 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: netrc
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: thor
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: activesupport
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: highline
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: bundler
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: rake
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rspec
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: guard
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- -
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
131
|
version: '0'
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: guard-rspec
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
142
142
|
requirements:
|
143
|
-
- -
|
143
|
+
- - ">="
|
144
144
|
- !ruby/object:Gem::Version
|
145
145
|
version: '0'
|
146
146
|
type: :development
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
|
-
- -
|
150
|
+
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
154
|
name: webmock
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
|
-
- -
|
157
|
+
- - ">="
|
158
158
|
- !ruby/object:Gem::Version
|
159
159
|
version: '0'
|
160
160
|
type: :development
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
|
-
- -
|
164
|
+
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: vcr
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
170
170
|
requirements:
|
171
|
-
- -
|
171
|
+
- - ">="
|
172
172
|
- !ruby/object:Gem::Version
|
173
173
|
version: '0'
|
174
174
|
type: :development
|
175
175
|
prerelease: false
|
176
176
|
version_requirements: !ruby/object:Gem::Requirement
|
177
177
|
requirements:
|
178
|
-
- -
|
178
|
+
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
181
|
description: A library of web site scrapers utilizing mechanize and other goodies.
|
@@ -186,11 +186,12 @@ executables:
|
|
186
186
|
- manning_books
|
187
187
|
- new_scraper
|
188
188
|
- rubytapas
|
189
|
+
- wunderground
|
189
190
|
extensions: []
|
190
191
|
extra_rdoc_files: []
|
191
192
|
files:
|
192
|
-
- .gitignore
|
193
|
-
- .rspec-example
|
193
|
+
- ".gitignore"
|
194
|
+
- ".rspec-example"
|
194
195
|
- Gemfile
|
195
196
|
- Guardfile
|
196
197
|
- LICENSE.txt
|
@@ -199,6 +200,7 @@ files:
|
|
199
200
|
- bin/manning_books
|
200
201
|
- bin/new_scraper
|
201
202
|
- bin/rubytapas
|
203
|
+
- bin/wunderground
|
202
204
|
- lib/scrapers.rb
|
203
205
|
- lib/scrapers/allrecipes.rb
|
204
206
|
- lib/scrapers/discoverynews.rb
|
@@ -212,8 +214,11 @@ files:
|
|
212
214
|
- lib/scrapers/rubytapas.rb
|
213
215
|
- lib/scrapers/sinfest.rb
|
214
216
|
- lib/scrapers/version.rb
|
217
|
+
- lib/scrapers/wunderground.rb
|
215
218
|
- lib/scrapers/xkcd.rb
|
219
|
+
- lib/wunderground_thor.rb
|
216
220
|
- scrapers.gemspec
|
221
|
+
- spec/lib/scrapers/wunderground_spec.rb
|
217
222
|
- spec/scrapers/allrecipes_spec.rb
|
218
223
|
- spec/scrapers/discoverynews_spec.rb
|
219
224
|
- spec/scrapers/download_spec.rb
|
@@ -225,8 +230,12 @@ files:
|
|
225
230
|
- spec/scrapers/xkcd_spec.rb
|
226
231
|
- spec/scrapers_spec.rb
|
227
232
|
- spec/spec_helper.rb
|
228
|
-
-
|
229
|
-
- templates/
|
233
|
+
- spec/wunderground_thor_spec.rb
|
234
|
+
- templates/bin/new_scraper_bin.erb
|
235
|
+
- templates/lib/new_scraper_thor.rb.erb
|
236
|
+
- templates/lib/scrapers/new_scraper_lib.rb.erb
|
237
|
+
- templates/spec/lib/new_scraper_lib_spec.rb.erb
|
238
|
+
- templates/spec/new_scraper_thor_spec.rb.erb
|
230
239
|
- vcr_cassettes/allrecipes_morning-glory-muffins-i.yml
|
231
240
|
- vcr_cassettes/disconews_history-of-space.yml
|
232
241
|
- vcr_cassettes/download-newfile.yml
|
@@ -246,21 +255,22 @@ require_paths:
|
|
246
255
|
- lib
|
247
256
|
required_ruby_version: !ruby/object:Gem::Requirement
|
248
257
|
requirements:
|
249
|
-
- -
|
258
|
+
- - ">="
|
250
259
|
- !ruby/object:Gem::Version
|
251
260
|
version: '0'
|
252
261
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
253
262
|
requirements:
|
254
|
-
- -
|
263
|
+
- - ">="
|
255
264
|
- !ruby/object:Gem::Version
|
256
265
|
version: '0'
|
257
266
|
requirements: []
|
258
267
|
rubyforge_project:
|
259
|
-
rubygems_version: 2.
|
268
|
+
rubygems_version: 2.2.2
|
260
269
|
signing_key:
|
261
270
|
specification_version: 4
|
262
271
|
summary: Web site scrapers
|
263
272
|
test_files:
|
273
|
+
- spec/lib/scrapers/wunderground_spec.rb
|
264
274
|
- spec/scrapers/allrecipes_spec.rb
|
265
275
|
- spec/scrapers/discoverynews_spec.rb
|
266
276
|
- spec/scrapers/download_spec.rb
|
@@ -272,3 +282,4 @@ test_files:
|
|
272
282
|
- spec/scrapers/xkcd_spec.rb
|
273
283
|
- spec/scrapers_spec.rb
|
274
284
|
- spec/spec_helper.rb
|
285
|
+
- spec/wunderground_thor_spec.rb
|