greenmonster 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Readme.markdown +3 -1
- data/lib/greenmonster/spider.rb +21 -8
- data/test/test_greenmonster_spider.rb +20 -2
- metadata +2 -2
data/Readme.markdown
CHANGED
@@ -13,7 +13,7 @@ If you don't want to specify a download location every time you run the spider,
|
|
13
13
|
# Set games folder location
|
14
14
|
>> Greenmonster.set_games_folder('/Users/geoff/games/')
|
15
15
|
|
16
|
-
The spider utility has three public class methods: Spider.pull_game, Spider.pull_day and Spider.pull_days.
|
16
|
+
The spider utility has three public class methods: Spider.pull_game, Spider.pull_day, and Spider.pull_days.
|
17
17
|
|
18
18
|
Spider.pull_game takes a game_id (the folder name of the game on the Gameday server) and a hash of options as arguments. If for some reason the game does not fall in the expected folder for the game's date or sport code, you can add those options to the arguments hash. Other options include :games_folder and :print_games (if false, game IDs are not printed to screen).
|
19
19
|
|
@@ -28,6 +28,8 @@ Spider.pull_day takes an hash of options as an argument. Greenmonster will creat
|
|
28
28
|
# Pulls all rookie league games for today
|
29
29
|
>> Greenmonster::Spider.pull_day({:sport_code => 'rok', :date => Date.today, :games_folder => './home/geoff/games'})
|
30
30
|
|
31
|
+
# Pulls all games in all sport codes for today
|
32
|
+
>> Greenmonster::Spider.pull_day({:all_sport_codes => true, :date => Date.today, :games_folder => './home/geoff/games'})
|
31
33
|
|
32
34
|
Spider.pull_days takes a range of dates to process as an argument, plus a hash of arguments to pass to Spider.pull.
|
33
35
|
|
data/lib/greenmonster/spider.rb
CHANGED
@@ -55,7 +55,7 @@ class Greenmonster::Spider
|
|
55
55
|
copy_gameday_xml(file,paths)
|
56
56
|
end
|
57
57
|
rescue StandardError => bang
|
58
|
-
puts "Unable to download some data for #{
|
58
|
+
puts "Unable to download some data for #{game_id}"
|
59
59
|
end
|
60
60
|
|
61
61
|
return game_id
|
@@ -80,17 +80,27 @@ class Greenmonster::Spider
|
|
80
80
|
:date => Date.today,
|
81
81
|
:sport_code => 'mlb',
|
82
82
|
}.merge(args)
|
83
|
-
|
83
|
+
|
84
|
+
# If we want all sport codes, set up the array.
|
85
|
+
if args[:all_sport_codes]
|
86
|
+
args[:sport_codes] = %w(aaa aax afa afx asx bbc fps hsb ind int jml nae naf nas nat naw oly rok win)
|
87
|
+
else
|
88
|
+
args[:sport_codes] = [args[:sport_code] || 'mlb']
|
89
|
+
end
|
90
|
+
|
84
91
|
# Iterate through every hyperlink on the page.
|
85
92
|
# These links represent the individual game folders
|
86
93
|
# for each date. Reject any links that aren't to game
|
87
94
|
# folders or that are to what look like backup game
|
88
95
|
# folders.
|
89
|
-
|
90
|
-
|
96
|
+
args[:sport_codes].each do |sport_code|
|
97
|
+
args[:sport_code] = sport_code
|
98
|
+
(Nokogiri::XML(self.get(gameday_league_and_date_url(args)))/"a").reject{|l| l.attribute('href').value[0,4] != "gid_" or l.attribute('href').value[-5,4] == "_bak"}.each do |e|
|
99
|
+
self.pull_game(e.attribute('href').value.gsub('/',''),args)
|
100
|
+
end
|
91
101
|
end
|
92
|
-
|
93
|
-
return
|
102
|
+
|
103
|
+
return args[:sport_code]
|
94
104
|
end
|
95
105
|
|
96
106
|
##
|
@@ -139,8 +149,11 @@ class Greenmonster::Spider
|
|
139
149
|
# paths: (Hash)
|
140
150
|
|
141
151
|
def self.copy_gameday_xml (file_name,paths)
|
142
|
-
|
143
|
-
|
152
|
+
download = self.get(paths[:mlbGameFolder] + "#{file_name =~ /inning/ ? 'inning/' : ''}" + file_name).body
|
153
|
+
unless download.include?('404 Not Found')
|
154
|
+
open(paths[:localGameFolder] + "#{file_name =~ /inning/ ? 'inning/' : ''}" + file_name, 'w') do |file|
|
155
|
+
file.write(download)
|
156
|
+
end
|
144
157
|
end
|
145
158
|
end
|
146
159
|
|
@@ -60,11 +60,17 @@ class GreenmonsterSpiderTest < MiniTest::Unit::TestCase
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def test_pull_games_prior_to_2008
|
63
|
-
Greenmonster::Spider.
|
63
|
+
Greenmonster::Spider.pull_game('gid_2007_04_15_detmlb_tormlb_1', {:games_folder => @local_test_data_location, :print_games => false})
|
64
64
|
|
65
65
|
assert_equal 12, Dir.entries(@local_test_data_location + '/mlb/year_2007/month_04/day_15/gid_2007_04_15_detmlb_tormlb_1/inning/').count
|
66
66
|
end
|
67
67
|
|
68
|
+
def test_pull_all_sport_codes_for_day
|
69
|
+
Greenmonster::Spider.pull_day({:all_sport_codes => true, :print_games => false, :games_folder => @local_test_data_location, :date => Date.new(2011,7,1)})
|
70
|
+
|
71
|
+
assert_equal 9, Dir.entries(@local_test_data_location).count
|
72
|
+
end
|
73
|
+
|
68
74
|
def test_pull_days
|
69
75
|
Greenmonster::Spider.pull_days((Date.new(2011,8,4)..Date.new(2011,8,5)), {:print_games => false, :games_folder => @local_test_data_location})
|
70
76
|
|
@@ -80,7 +86,19 @@ class GreenmonsterSpiderTest < MiniTest::Unit::TestCase
|
|
80
86
|
end
|
81
87
|
|
82
88
|
def test_pull_single_game_by_game_id
|
83
|
-
Greenmonster::Spider.pull_game('
|
89
|
+
Greenmonster::Spider.pull_game('gid_2011_07_04_tormlb_bosmlb_1', {:games_folder => @local_test_data_location, :print_games => false})
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_no_exception_raised_if_game_data_not_available
|
93
|
+
assert_output(nil,'') do
|
94
|
+
Greenmonster::Spider.pull_game('gid_2011_07_04_zzzmlb_yyymlb_1', {:games_folder => @local_test_data_location, :print_games => false})
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_local_file_not_created_if_remote_file_does_not_exist
|
99
|
+
Greenmonster::Spider.pull_game('gid_2011_07_01_xxxmlb_yyymlb_1', {:games_folder => @local_test_data_location, :print_games => false})
|
100
|
+
|
101
|
+
assert_equal 3, Dir.entries(@local_test_data_location + '/mlb/year_2011/month_07/day_01/gid_2011_07_01_xxxmlb_yyymlb_1').count
|
84
102
|
end
|
85
103
|
|
86
104
|
def teardown
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: greenmonster
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-14 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: A utility for working with MLB Gameday XML data.
|
15
15
|
email: geoff.harcourt@gmail.com
|