pitch_fx_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NDRlMDUwY2FhMjBmYjJiNTlhZGZiNGEzOWE3NWRlMTFjYTczNDU5Yg==
5
+ data.tar.gz: !binary |-
6
+ MGZmNzU3MmQ5YzhmNzRmZDg0Y2U4MTI3NGQ0NDkwNjVhNTEyNDExZg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODBjNDZlNGU0MzVmODgxNDJhNzA0NmUxODM1NDhiZjRhZTZiYTEwOTFmNjcw
10
+ YzY2OTI1ZTJkNjI0YzYwMDI1OWZiY2Y1ZmRiMTg5ZDAxMTY2YjUzZDBiNzY0
11
+ NjcyZjc5MDFmMWMyNjYwMjIyZWNmYTgzOTRjZWUzNzlhYzg0Zjc=
12
+ data.tar.gz: !binary |-
13
+ OTg4MmYwYzhmNDdmMjk3MDI1N2I5M2ZhNTJlZGI2OTU0NmMxMzc5YzU3OWVi
14
+ ZGU5ZTgzYzNmOTc5OGY1ZmJhMjQzYWEzZjJkOTc4NzliMjc1M2ZkNjE0OTky
15
+ MWY3MDQ4ZDhjYzAwNDk2OGU3YTQzNDk5MWI3YTEzNTg5OGU2Yzg=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pitch_fx_scraper.gemspec
4
+ gemspec
5
+
6
+ gem "chronic"
7
+ gem "nokogiri"
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 David Sachitano
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # PitchFxScraper
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'pitch_fx_scraper'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install pitch_fx_scraper
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require "pitch_fx_scraper"
3
+
4
+
5
+ desc "Fetch data files for a given date"
6
+ task :fetch_data_for_date, :date_string do |task, args|
7
+ Fetch.save_files_for_date(args.date_string)
8
+ end
@@ -0,0 +1,100 @@
1
+
2
+ require 'chronic'
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+
6
+ module Fetch
7
+
8
+ @@host_url_base = "http://gd2.mlb.com"
9
+
10
+ def Fetch.url_for_date(date, milb=false)
11
+ date_object = Chronic.parse(date)
12
+ if milb == false
13
+ url_base = @@host_url_base + "/components/game/mlb/"
14
+ else
15
+ url_base = @@host_url_base + "/components/game/milb/"
16
+ end
17
+ url_base += "year_#{"%04d" % date_object.year}/"
18
+ url_base += "month_#{"%02d" % date_object.month}/"
19
+ url_base += "day_#{"%02d" % date_object.day}"
20
+ end
21
+
22
+
23
+ ## Return a list of hashes containing game data urls
24
+ ## with game ids for a given date string
25
+ ##
26
+ def Fetch.get_game_urls(date, milb=false)
27
+ if milb == false
28
+ master_scoreboard_url = Fetch.url_for_date(date) + "/master_scoreboard.xml"
29
+ else
30
+ master_scoreboard_url = Fetch.url_for_date(date,milb) + "/milb_master_game_file.xml"
31
+ end
32
+
33
+ scoreboard = Nokogiri::XML(open(master_scoreboard_url))
34
+
35
+ game_urls = []
36
+
37
+ scoreboard.xpath("//game").each do |node|
38
+ if milb == false
39
+ game_path = node.attr('game_data_directory')
40
+ game_id = node.attr('gameday')
41
+ game_url = @@host_url_base + game_path + "/game_events.xml"
42
+ else
43
+ game_id = node.attr('id')
44
+ game_url = node.attr('boxscore')
45
+ end
46
+
47
+ game_urls << {:game_id => game_id, :game_url => game_url}
48
+ end
49
+
50
+ return game_urls
51
+ end
52
+
53
+ ## Return a list of hashes containing home team data
54
+ ## for a given date string
55
+ def Fetch.get_milb_schedule (date)
56
+ game_boxscores = Fetch.get_game_urls(date, true)
57
+
58
+ games = []
59
+
60
+ game_boxscores.each do |score|
61
+ begin
62
+ boxscore = Nokogiri::XML(open(score[:game_url]))
63
+
64
+ boxscore.xpath("//boxscore").each do |node|
65
+ home_sname = node.attr('home_sname')
66
+ home_fname = node.attr('home_fname')
67
+ file_date = node.attr('date')
68
+
69
+ games << {:home_sname => home_sname, :home_fname => home_fname, :date => file_date}
70
+ end
71
+ rescue OpenURI::HTTPError => e
72
+ # oops!
73
+ end
74
+ end
75
+
76
+ return games
77
+ end
78
+
79
+
80
+ ## expects a hash with keys {:game_id, :game_url}
81
+ ##
82
+ #def Fetch.save_file_to_disk (game_hash)
83
+ # open(@@game_data_path+"/"+game_hash[:game_id]+".xml", mode = "w+") do |file|
84
+ # file << open(game_hash[:game_url]).read
85
+ # end
86
+ #end
87
+
88
+
89
+ ## given a date as a string, fetch and save all
90
+ ## game files to disk
91
+ ##
92
+ #def Fetch.save_files_for_date (date_string)
93
+ # game_event_urls = Fetch.get_game_urls(date_string)
94
+ #
95
+ # game_event_urls.each do |game_data|
96
+ # Fetch.save_file_to_disk (game_data)
97
+ # end
98
+ #end
99
+
100
+ end
@@ -0,0 +1,3 @@
1
+ module PitchFxScraper
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,8 @@
1
+ require "pitch_fx_scraper/version"
2
+ require "pitch_fx_scraper/fetch.rb"
3
+
4
+ module PitchFxScraper
5
+ # Your code goes here...
6
+ end
7
+
8
+ include Fetch
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pitch_fx_scraper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pitch_fx_scraper"
8
+ spec.version = PitchFxScraper::VERSION
9
+ spec.authors = ["David Sachitano"]
10
+ spec.email = ["dsachita@gmail.com"]
11
+ spec.description = %q{Utilities for grabbing various data from mlb}
12
+ spec.summary = %q{Initial release is focused on getting minimal schedule data.}
13
+ spec.homepage = "http://thebaseballrun.com"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake", "~> 10.3"
23
+ spec.add_development_dependency "rspec", "~> 2.14"
24
+ end
@@ -0,0 +1,41 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fetch do
4
+
5
+ describe "url_for_date" do
6
+ it 'should return a valid url for April 3rd 2014' do
7
+ our_date = 'April 3rd 2014'
8
+ result = Fetch.url_for_date(our_date)
9
+ result.should == 'http://gd2.mlb.com/components/game/mlb/year_2014/month_04/day_03'
10
+ end
11
+
12
+ it 'should return a valid milb url for 2014-04-24' do
13
+ our_date = '2014-04-24'
14
+ result = Fetch.url_for_date(our_date, true)
15
+ result.should == 'http://gd2.mlb.com/components/game/milb/year_2014/month_04/day_24'
16
+ end
17
+ end
18
+
19
+ describe "get_game_urls" do
20
+ it 'should return list of games for April 3rd 2014' do
21
+
22
+ list_of_games = Fetch.get_game_urls('April 3rd 2014')
23
+
24
+ list_of_games.should include({:game_id => "2014_04_03_seamlb_oakmlb_1",
25
+ :game_url => "http://gd2.mlb.com/components/game/mlb/year_2014/month_04/day_03/gid_2014_04_03_seamlb_oakmlb_1/game_events.xml"})
26
+ end
27
+ end
28
+
29
+ describe "get_milb_schedule" do
30
+ it 'should return list of home teams for April 23rd 2014' do
31
+ list_of_games = Fetch.get_milb_schedule('April 23rd 2014')
32
+ list_of_games.should include({:home_sname=>"Tampa", :home_fname=>"Tampa Yankees", :date=>"April 23, 2014"})
33
+ end
34
+
35
+ it 'should return list of home teams for 2014-04-24' do
36
+ list_of_games = Fetch.get_milb_schedule('2014-04-24')
37
+ list_of_games.should include({:home_sname=>"Stockton", :home_fname=>"Stockton Ports", :date=>"April 24, 2014"})
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,24 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ Bundler.setup
8
+ require 'pitch_fx_scraper'
9
+
10
+
11
+ RSpec.configure do |config|
12
+ config.treat_symbols_as_metadata_keys_with_true_values = true
13
+ config.run_all_when_everything_filtered = true
14
+ config.filter_run :focus
15
+
16
+ # Run specs in random order to surface order dependencies. If you find an
17
+ # order dependency and want to debug it, you can fix the order by providing
18
+ # the seed, which is printed after each run.
19
+ # --seed 1234
20
+ config.order = 'random'
21
+ end
22
+
23
+ require_relative '../lib/pitch_fx_scraper/fetch'
24
+
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pitch_fx_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - David Sachitano
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '2.14'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '2.14'
55
+ description: Utilities for grabbing various data from mlb
56
+ email:
57
+ - dsachita@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - .rspec
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - lib/pitch_fx_scraper.rb
69
+ - lib/pitch_fx_scraper/fetch.rb
70
+ - lib/pitch_fx_scraper/version.rb
71
+ - pitch_fx_scraper.gemspec
72
+ - spec/fetch_spec.rb
73
+ - spec/spec_helper.rb
74
+ homepage: http://thebaseballrun.com
75
+ licenses:
76
+ - MIT
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ! '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.4.1
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Initial release is focused on getting minimal schedule data.
98
+ test_files:
99
+ - spec/fetch_spec.rb
100
+ - spec/spec_helper.rb