sportdb-importers 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +52 -12
- data/Rakefile +1 -2
- data/lib/sportdb/importers.rb +41 -32
- data/lib/sportdb/importers/import.rb +18 -15
- data/lib/sportdb/importers/version.rb +3 -2
- data/test/helper.rb +0 -1
- data/test/test_club.rb +2 -2
- data/test/test_import.rb +1 -1
- data/test/test_version.rb +5 -3
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a21cdc02ddf3bef0f067f6bab0d369d0618ac0c
|
4
|
+
data.tar.gz: bd325b2ce078afaa1ecc32a8ab2280dd463b22e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 320f53db2ab141a2994f7b0b8280daef4b066a951894270228c1671f10498db7c4e502f4418f588155b21309086c240ed92ed135318098167ff4225e5dda5f46
|
7
|
+
data.tar.gz: fc0b0adeeae78f515e7c6b1a8d6878dfc21b62d3c6ff1fef8155225a0a23551bd2e9c330e687b737a2e5c16880fd625220dd28d2dae21d15ccd144582f0a5f1b
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# sportdb-importers - tools 'n' scripts for importing sports (football) data in alternate (text) formats incl. comma-separated values (csv) format
|
1
|
+
# sportdb-importers - tools 'n' scripts for importing sports (football) data in alternate (text) formats incl. comma-separated values (csv) format
|
2
2
|
|
3
3
|
|
4
4
|
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
@@ -8,35 +8,75 @@
|
|
8
8
|
* forum :: [opensport](http://groups.google.com/group/opensport)
|
9
9
|
|
10
10
|
|
11
|
-
|
12
11
|
## Usage
|
13
12
|
|
14
|
-
|
15
|
-
|
13
|
+
|
14
|
+
**Step 1**
|
15
|
+
|
16
|
+
Setup the (SQL) database. Let's use and build a single-file SQLite database (from scratch),
|
17
|
+
as an example:
|
16
18
|
|
17
19
|
``` ruby
|
18
20
|
require 'sportdb/importers'
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
SportDb
|
22
|
+
SportDb.connect( adapter: 'sqlite3',
|
23
|
+
database: './england.db' )
|
24
|
+
SportDb.create_all ## build database schema (tables, indexes, etc.)
|
25
|
+
```
|
26
|
+
|
23
27
|
|
28
|
+
**Step 2**
|
24
29
|
|
25
|
-
|
26
|
-
|
30
|
+
Let's use the public domain football.csv datasets for England (see [`footballcsv/england`](https://github.com/footballcsv/england)), as an example:
|
31
|
+
|
32
|
+
```
|
33
|
+
Round, Date, Team 1, FT, HT, Team 2
|
34
|
+
1, (Fri) 9 Aug 2019, Liverpool FC, 4-1, 4-0, Norwich City FC
|
35
|
+
1, (Sat) 10 Aug 2019, West Ham United FC, 0-5, 0-1, Manchester City FC
|
36
|
+
1, (Sat) 10 Aug 2019, AFC Bournemouth, 1-1, 0-0, Sheffield United FC
|
37
|
+
1, (Sat) 10 Aug 2019, Burnley FC, 3-0, 0-0, Southampton FC
|
38
|
+
1, (Sat) 10 Aug 2019, Crystal Palace FC, 0-0, 0-0, Everton FC
|
39
|
+
1, (Sat) 10 Aug 2019, Watford FC, 0-3, 0-1, Brighton & Hove Albion FC
|
40
|
+
1, (Sat) 10 Aug 2019, Tottenham Hotspur FC, 3-1, 0-1, Aston Villa FC
|
41
|
+
1, (Sun) 11 Aug 2019, Leicester City FC, 0-0, 0-0, Wolverhampton Wanderers FC
|
42
|
+
1, (Sun) 11 Aug 2019, Newcastle United FC, 0-1, 0-0, Arsenal FC
|
43
|
+
1, (Sun) 11 Aug 2019, Manchester United FC, 4-0, 1-0, Chelsea FC
|
44
|
+
...
|
45
|
+
```
|
46
|
+
(Source: [england/2019-20/eng.1.csv](https://github.com/footballcsv/england/blob/master/2010s/2019-20/eng.1.csv))
|
27
47
|
|
28
|
-
SportDb.create_all ## build database schema / tables
|
29
48
|
|
49
|
+
and let's try:
|
30
50
|
|
51
|
+
``` ruby
|
52
|
+
## assumes football.csv datasets for England in ./england directory
|
53
|
+
## see github.com/footballcsv/england
|
54
|
+
SportDb.read_csv( './england/2019-20/eng.1.csv' )
|
55
|
+
|
56
|
+
## let's try another season
|
57
|
+
SportDb.read_csv( './england/2018-19/eng.1.csv' )
|
58
|
+
SportDb.read_csv( './england/2018-19/eng.2.csv' )
|
59
|
+
```
|
60
|
+
|
61
|
+
All leagues, seasons, clubs, match days and rounds, match fixtures and results,
|
62
|
+
and more are now in your (SQL) database of choice.
|
63
|
+
|
64
|
+
|
65
|
+
Bonus: Let's import all datafiles for all seasons (from 1888-89 to today)
|
66
|
+
for England, use:
|
67
|
+
|
68
|
+
``` ruby
|
31
69
|
## note: requires a local copy of the football.csv england datasets
|
32
70
|
## see https://github.com/footballcsv/england
|
33
|
-
|
34
|
-
|
71
|
+
SportDb.read_csv( './england' )
|
72
|
+
# -or- use a zip archive
|
73
|
+
SportDb.read_csv( './england.zip' )
|
35
74
|
```
|
36
75
|
|
37
76
|
That's it.
|
38
77
|
|
39
78
|
|
79
|
+
|
40
80
|
## License
|
41
81
|
|
42
82
|
The `sportdb-importers` scripts are dedicated to the public domain.
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/importers/version.rb'
|
|
3
3
|
|
4
4
|
Hoe.spec 'sportdb-importers' do
|
5
5
|
|
6
|
-
self.version = SportDb::Importers::VERSION
|
6
|
+
self.version = SportDb::Module::Importers::VERSION
|
7
7
|
|
8
8
|
self.summary = "sportdb-importers - tools 'n' scripts for importing sports (football) data in alternate (text) formats incl. comma-separated values (csv) format"
|
9
9
|
self.description = summary
|
@@ -20,7 +20,6 @@ Hoe.spec 'sportdb-importers' do
|
|
20
20
|
self.licenses = ['Public Domain']
|
21
21
|
|
22
22
|
self.extra_deps = [
|
23
|
-
['sportdb-text', '>= 0.4.0'],
|
24
23
|
['sportdb-sync', '>= 1.0.0'],
|
25
24
|
]
|
26
25
|
|
data/lib/sportdb/importers.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
## 3rd party gemss
|
5
|
-
require 'sportdb/text'
|
6
5
|
require 'sportdb/sync'
|
7
6
|
|
8
7
|
|
@@ -12,48 +11,29 @@ require 'sportdb/importers/version' # let version always go first
|
|
12
11
|
require 'sportdb/importers/import'
|
13
12
|
|
14
13
|
|
15
|
-
|
14
|
+
module SportDb
|
15
|
+
class Package
|
16
16
|
## (re)open class - note: adds more machinery; see sportdb-text for first/original/base definition
|
17
17
|
|
18
|
-
def
|
18
|
+
def read_csv( start: nil ) ### todo/fix - rename to read_csv !!!!!!
|
19
19
|
## start - season e.g. 1993/94 to start (skip older seasons)
|
20
20
|
## note: assume package holds country/national (club) league
|
21
21
|
# use for importing german bundesliga, english premier league, etc.
|
22
22
|
|
23
|
-
|
24
|
-
pp entries
|
25
|
-
|
26
|
-
entries.each_with_index do |(season_key, datafiles),i|
|
27
|
-
|
23
|
+
match_by_season( format: 'csv', start: start ).each_with_index do |(season_key, entries),i|
|
28
24
|
puts "season [#{i+1}] >#{season_key}<:"
|
29
25
|
|
30
|
-
|
31
|
-
## filter_season( clause, season_key )
|
32
|
-
## or better filter = SeasonFilter.new( clause )
|
33
|
-
## filter.skip? filter.include? ( season_sason_key )?
|
34
|
-
## fiteer.before?( season_key ) etc.
|
35
|
-
## find some good method names!!!!
|
36
|
-
if start
|
37
|
-
start_year = start[0..3].to_i
|
38
|
-
season_start_year = season_key[0..3].to_i
|
39
|
-
if season_start_year < start_year
|
40
|
-
puts "skip #{season_start_year} before #{start_year}"
|
41
|
-
next
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
datafiles.each do |datafile,j|
|
46
|
-
path = expand_path( datafile )
|
26
|
+
entries.each do |entry,j|
|
47
27
|
## note: assume datafile basename (without extension) is the league key
|
48
28
|
## e.g. eng.1, eng.3a, eng.3b, at.1, champs, world, etc.
|
49
|
-
league_key = File.basename(
|
29
|
+
league_key = File.basename( entry.name, File.extname( entry.name ) ) ## get basename WITHOUT extension
|
50
30
|
|
51
|
-
pp [
|
31
|
+
pp [entry.name, season_key, league_key]
|
52
32
|
|
53
|
-
event = CsvEventImporter.
|
54
|
-
|
33
|
+
event = CsvEventImporter.parse( entry.read, league: league_key,
|
34
|
+
season: season_key )
|
55
35
|
|
56
|
-
puts "added #{event.title}"
|
36
|
+
puts "added #{event.title} - from source >#{entry.name}<"
|
57
37
|
puts " #{event.teams.size} teams"
|
58
38
|
puts " #{event.rounds.size} rounds"
|
59
39
|
puts " #{event.games.size} games"
|
@@ -61,8 +41,37 @@ def import( start: nil )
|
|
61
41
|
end # each season
|
62
42
|
end # method import
|
63
43
|
|
64
|
-
end # class
|
44
|
+
end # class Package
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
############
|
49
|
+
# add convenience shortcut helper
|
50
|
+
def self.read_csv( path )
|
51
|
+
if File.directory?( path ) ## if directory assume "unzipped" package
|
52
|
+
DirPackage.new( path ).read_csv
|
53
|
+
elsif File.file?( path ) && File.extname( path ) == '.zip' ## check if file is a .zip (archive) file
|
54
|
+
ZipPackage.new( path ).read_csv
|
55
|
+
else ## no package; assume single (standalone) datafile
|
56
|
+
## assume single (free-standing) file
|
57
|
+
full_path = File.expand_path( path ) ## resolve/make path absolute
|
58
|
+
## 1) assume basename is the league key
|
59
|
+
## 2) assume last directory is the season key
|
60
|
+
league_key = File.basename( full_path, File.extname( full_path ) ) ## get basename WITHOUT extension
|
61
|
+
season_key = File.basename( File.dirname( full_path ) )
|
62
|
+
|
63
|
+
event = CsvEventImporter.read( full_path, league: league_key,
|
64
|
+
season: season_key )
|
65
|
+
|
66
|
+
puts "added #{event.title} - from source >#{path}<"
|
67
|
+
puts " #{event.teams.size} teams"
|
68
|
+
puts " #{event.rounds.size} rounds"
|
69
|
+
puts " #{event.games.size} games"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end # module SportDb
|
73
|
+
|
65
74
|
|
66
75
|
|
67
76
|
|
68
|
-
puts SportDb::Importers.banner # say hello
|
77
|
+
puts SportDb::Module::Importers.banner # say hello
|
@@ -1,6 +1,9 @@
|
|
1
1
|
|
2
2
|
## todo/fix: rename to CsvEventImporter or EventImporter !!! returns Event!!
|
3
|
-
|
3
|
+
## todo/fix/check: rename to CsvMatchReader and CsvMatchReader to CsvMatchParser - why? why not?
|
4
|
+
|
5
|
+
module SportDb
|
6
|
+
class CsvEventImporter
|
4
7
|
|
5
8
|
def self.read( path, league:, season:,
|
6
9
|
headers: nil )
|
@@ -24,8 +27,8 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
24
27
|
raise ArgumentError("string expected for season; got #{season.class.name}") unless season.is_a? String
|
25
28
|
|
26
29
|
## try mapping of league here - why? why not?
|
27
|
-
@league =
|
28
|
-
@season =
|
30
|
+
@league = Import.catalog.leagues.find!( league )
|
31
|
+
@season = Import::Season.new( season )
|
29
32
|
end
|
30
33
|
|
31
34
|
|
@@ -39,17 +42,17 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
39
42
|
opts = {}
|
40
43
|
opts[:headers] = @headers if @headers
|
41
44
|
|
42
|
-
matches =
|
45
|
+
matches = CsvMatchParser.parse( @txt, **opts )
|
43
46
|
|
44
|
-
matchlist =
|
47
|
+
matchlist = Import::Matchlist.new( matches )
|
45
48
|
|
46
49
|
team_names = matchlist.teams ## was: find_teams_in_matches_txt( matches_txt )
|
47
50
|
puts "#{team_names.size} teams:"
|
48
51
|
pp team_names
|
49
52
|
|
50
53
|
## note: allows duplicates - will return uniq struct recs in teams
|
51
|
-
teams =
|
52
|
-
|
54
|
+
teams = Import.catalog.teams.find_by!( name: team_names,
|
55
|
+
league: @league )
|
53
56
|
## build mapping - name => team struct record
|
54
57
|
team_mappings = team_names.zip( teams ).to_h
|
55
58
|
|
@@ -59,8 +62,8 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
59
62
|
#######
|
60
63
|
# start with database updates / sync here
|
61
64
|
|
62
|
-
event_rec =
|
63
|
-
|
65
|
+
event_rec = Sync::Event.find_or_create_by( league: @league,
|
66
|
+
season: @season )
|
64
67
|
|
65
68
|
## todo/fix:
|
66
69
|
## add check if event has teams
|
@@ -74,7 +77,7 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
74
77
|
|
75
78
|
# maps struct record "canonical" team name to active record db record!!
|
76
79
|
## note: use "canonical" team name as hash key for now (and NOT the object itself) - why? why not?
|
77
|
-
team_recs =
|
80
|
+
team_recs = Sync::Team.find_or_create( team_mappings.values.uniq )
|
78
81
|
|
79
82
|
## todo/fix/check:
|
80
83
|
## add check if event has teams
|
@@ -89,7 +92,7 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
89
92
|
|
90
93
|
|
91
94
|
## add catch-all/unclassified "dummy" round
|
92
|
-
round_rec =
|
95
|
+
round_rec = Model::Round.create!(
|
93
96
|
event_id: event_rec.id,
|
94
97
|
title: 'Matchday ??? / Missing / Catch-All', ## find a better name?
|
95
98
|
pos: 999,
|
@@ -98,14 +101,14 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
98
101
|
|
99
102
|
## add matches
|
100
103
|
matches.each do |match|
|
101
|
-
team1_rec =
|
102
|
-
team2_rec =
|
104
|
+
team1_rec = Sync::Team.cache[ team_mappings[match.team1].name ]
|
105
|
+
team2_rec = Sync::Team.cache[ team_mappings[match.team2].name ]
|
103
106
|
|
104
107
|
if match.date.nil?
|
105
108
|
puts "!!! WARN: skipping match - play date missing!!!!!"
|
106
109
|
pp match
|
107
110
|
else
|
108
|
-
rec =
|
111
|
+
rec = Model::Game.create!(
|
109
112
|
team1_id: team1_rec.id,
|
110
113
|
team2_id: team2_rec.id,
|
111
114
|
round_id: round_rec.id,
|
@@ -123,4 +126,4 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
123
126
|
end # method parse
|
124
127
|
|
125
128
|
end # class CsvEventImporter
|
126
|
-
|
129
|
+
end # module SportDb
|
@@ -1,13 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
|
4
|
-
|
5
4
|
module SportDb
|
5
|
+
module Module
|
6
6
|
module Importers
|
7
7
|
|
8
8
|
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
9
9
|
MINOR = 0
|
10
|
-
PATCH =
|
10
|
+
PATCH = 1
|
11
11
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
12
12
|
|
13
13
|
def self.version
|
@@ -23,4 +23,5 @@ module Importers
|
|
23
23
|
end
|
24
24
|
|
25
25
|
end # module Importers
|
26
|
+
end # module Module
|
26
27
|
end # module SportDb
|
data/test/helper.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
## note: use the local version of sportdb gems
|
2
2
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-formats/lib' ))
|
3
3
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-config/lib' ))
|
4
|
-
$LOAD_PATH.unshift( File.expand_path( '../sportdb-text/lib' ))
|
5
4
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-models/lib' ))
|
6
5
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-sync/lib' ))
|
7
6
|
|
data/test/test_club.rb
CHANGED
@@ -54,7 +54,7 @@ class TestClub < MiniTest::Test
|
|
54
54
|
score2i: 'HTAG'
|
55
55
|
}
|
56
56
|
|
57
|
-
matches =
|
57
|
+
matches = SportDb::CsvMatchParser.read( "#{SportDb::Test.data_dir}/england/2017-18/E0.csv",
|
58
58
|
headers: headers
|
59
59
|
)
|
60
60
|
|
@@ -117,7 +117,7 @@ class TestClub < MiniTest::Test
|
|
117
117
|
score2: 'AG',
|
118
118
|
}
|
119
119
|
|
120
|
-
matches =
|
120
|
+
matches = SportDb::CsvMatchParser.read( "#{SportDb::Test.data_dir}/austria/AUT.csv",
|
121
121
|
headers: headers,
|
122
122
|
filters: { 'Season' => '2016/2017' }
|
123
123
|
)
|
data/test/test_import.rb
CHANGED
@@ -34,7 +34,7 @@ class TestImport < MiniTest::Test
|
|
34
34
|
score2i: 'HTAG'
|
35
35
|
}
|
36
36
|
|
37
|
-
event_rec = CsvEventImporter.read( "#{SportDb::Test.data_dir}/england/2017-18/E0.csv",
|
37
|
+
event_rec = SportDb::CsvEventImporter.read( "#{SportDb::Test.data_dir}/england/2017-18/E0.csv",
|
38
38
|
headers: headers,
|
39
39
|
league: 'ENG', ## fetch English Premiere League
|
40
40
|
season: '2017/18'
|
data/test/test_version.rb
CHANGED
@@ -10,9 +10,11 @@ require 'helper'
|
|
10
10
|
class TestVersion < MiniTest::Test
|
11
11
|
|
12
12
|
def test_version
|
13
|
-
pp SportDb::Importers::VERSION
|
14
|
-
pp SportDb::Importers.banner
|
15
|
-
pp SportDb::Importers.root
|
13
|
+
pp SportDb::Module::Importers::VERSION
|
14
|
+
pp SportDb::Module::Importers.banner
|
15
|
+
pp SportDb::Module::Importers.root
|
16
|
+
|
17
|
+
pp SportDb::Module.constants
|
16
18
|
|
17
19
|
assert true
|
18
20
|
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-importers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-05-
|
11
|
+
date: 2020-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: sportdb-text
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 0.4.0
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 0.4.0
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: sportdb-sync
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|