sportdb-importers 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -12
- data/Rakefile +1 -2
- data/lib/sportdb/importers.rb +41 -32
- data/lib/sportdb/importers/import.rb +18 -15
- data/lib/sportdb/importers/version.rb +3 -2
- data/test/helper.rb +0 -1
- data/test/test_club.rb +2 -2
- data/test/test_import.rb +1 -1
- data/test/test_version.rb +5 -3
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a21cdc02ddf3bef0f067f6bab0d369d0618ac0c
|
4
|
+
data.tar.gz: bd325b2ce078afaa1ecc32a8ab2280dd463b22e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 320f53db2ab141a2994f7b0b8280daef4b066a951894270228c1671f10498db7c4e502f4418f588155b21309086c240ed92ed135318098167ff4225e5dda5f46
|
7
|
+
data.tar.gz: fc0b0adeeae78f515e7c6b1a8d6878dfc21b62d3c6ff1fef8155225a0a23551bd2e9c330e687b737a2e5c16880fd625220dd28d2dae21d15ccd144582f0a5f1b
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# sportdb-importers - tools 'n' scripts for importing sports (football) data in alternate (text) formats incl. comma-separated values (csv) format
|
1
|
+
# sportdb-importers - tools 'n' scripts for importing sports (football) data in alternate (text) formats incl. comma-separated values (csv) format
|
2
2
|
|
3
3
|
|
4
4
|
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
@@ -8,35 +8,75 @@
|
|
8
8
|
* forum :: [opensport](http://groups.google.com/group/opensport)
|
9
9
|
|
10
10
|
|
11
|
-
|
12
11
|
## Usage
|
13
12
|
|
14
|
-
|
15
|
-
|
13
|
+
|
14
|
+
**Step 1**
|
15
|
+
|
16
|
+
Setup the (SQL) database. Let's use and build a single-file SQLite database (from scratch),
|
17
|
+
as an example:
|
16
18
|
|
17
19
|
``` ruby
|
18
20
|
require 'sportdb/importers'
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
SportDb
|
22
|
+
SportDb.connect( adapter: 'sqlite3',
|
23
|
+
database: './england.db' )
|
24
|
+
SportDb.create_all ## build database schema (tables, indexes, etc.)
|
25
|
+
```
|
26
|
+
|
23
27
|
|
28
|
+
**Step 2**
|
24
29
|
|
25
|
-
|
26
|
-
|
30
|
+
Let's use the public domain football.csv datasets for England (see [`footballcsv/england`](https://github.com/footballcsv/england)), as an example:
|
31
|
+
|
32
|
+
```
|
33
|
+
Round, Date, Team 1, FT, HT, Team 2
|
34
|
+
1, (Fri) 9 Aug 2019, Liverpool FC, 4-1, 4-0, Norwich City FC
|
35
|
+
1, (Sat) 10 Aug 2019, West Ham United FC, 0-5, 0-1, Manchester City FC
|
36
|
+
1, (Sat) 10 Aug 2019, AFC Bournemouth, 1-1, 0-0, Sheffield United FC
|
37
|
+
1, (Sat) 10 Aug 2019, Burnley FC, 3-0, 0-0, Southampton FC
|
38
|
+
1, (Sat) 10 Aug 2019, Crystal Palace FC, 0-0, 0-0, Everton FC
|
39
|
+
1, (Sat) 10 Aug 2019, Watford FC, 0-3, 0-1, Brighton & Hove Albion FC
|
40
|
+
1, (Sat) 10 Aug 2019, Tottenham Hotspur FC, 3-1, 0-1, Aston Villa FC
|
41
|
+
1, (Sun) 11 Aug 2019, Leicester City FC, 0-0, 0-0, Wolverhampton Wanderers FC
|
42
|
+
1, (Sun) 11 Aug 2019, Newcastle United FC, 0-1, 0-0, Arsenal FC
|
43
|
+
1, (Sun) 11 Aug 2019, Manchester United FC, 4-0, 1-0, Chelsea FC
|
44
|
+
...
|
45
|
+
```
|
46
|
+
(Source: [england/2019-20/eng.1.csv](https://github.com/footballcsv/england/blob/master/2010s/2019-20/eng.1.csv))
|
27
47
|
|
28
|
-
SportDb.create_all ## build database schema / tables
|
29
48
|
|
49
|
+
and let's try:
|
30
50
|
|
51
|
+
``` ruby
|
52
|
+
## assumes football.csv datasets for England in ./england directory
|
53
|
+
## see github.com/footballcsv/england
|
54
|
+
SportDb.read_csv( './england/2019-20/eng.1.csv' )
|
55
|
+
|
56
|
+
## let's try another season
|
57
|
+
SportDb.read_csv( './england/2018-19/eng.1.csv' )
|
58
|
+
SportDb.read_csv( './england/2018-19/eng.2.csv' )
|
59
|
+
```
|
60
|
+
|
61
|
+
All leagues, seasons, clubs, match days and rounds, match fixtures and results,
|
62
|
+
and more are now in your (SQL) database of choice.
|
63
|
+
|
64
|
+
|
65
|
+
Bonus: Let's import all datafiles for all seasons (from 1888-89 to today)
|
66
|
+
for England, use:
|
67
|
+
|
68
|
+
``` ruby
|
31
69
|
## note: requires a local copy of the football.csv england datasets
|
32
70
|
## see https://github.com/footballcsv/england
|
33
|
-
|
34
|
-
|
71
|
+
SportDb.read_csv( './england' )
|
72
|
+
# -or- use a zip archive
|
73
|
+
SportDb.read_csv( './england.zip' )
|
35
74
|
```
|
36
75
|
|
37
76
|
That's it.
|
38
77
|
|
39
78
|
|
79
|
+
|
40
80
|
## License
|
41
81
|
|
42
82
|
The `sportdb-importers` scripts are dedicated to the public domain.
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/importers/version.rb'
|
|
3
3
|
|
4
4
|
Hoe.spec 'sportdb-importers' do
|
5
5
|
|
6
|
-
self.version = SportDb::Importers::VERSION
|
6
|
+
self.version = SportDb::Module::Importers::VERSION
|
7
7
|
|
8
8
|
self.summary = "sportdb-importers - tools 'n' scripts for importing sports (football) data in alternate (text) formats incl. comma-separated values (csv) format"
|
9
9
|
self.description = summary
|
@@ -20,7 +20,6 @@ Hoe.spec 'sportdb-importers' do
|
|
20
20
|
self.licenses = ['Public Domain']
|
21
21
|
|
22
22
|
self.extra_deps = [
|
23
|
-
['sportdb-text', '>= 0.4.0'],
|
24
23
|
['sportdb-sync', '>= 1.0.0'],
|
25
24
|
]
|
26
25
|
|
data/lib/sportdb/importers.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
## 3rd party gemss
|
5
|
-
require 'sportdb/text'
|
6
5
|
require 'sportdb/sync'
|
7
6
|
|
8
7
|
|
@@ -12,48 +11,29 @@ require 'sportdb/importers/version' # let version always go first
|
|
12
11
|
require 'sportdb/importers/import'
|
13
12
|
|
14
13
|
|
15
|
-
|
14
|
+
module SportDb
|
15
|
+
class Package
|
16
16
|
## (re)open class - note: adds more machinery; see sportdb-text for first/original/base definition
|
17
17
|
|
18
|
-
def
|
18
|
+
def read_csv( start: nil ) ### todo/fix - rename to read_csv !!!!!!
|
19
19
|
## start - season e.g. 1993/94 to start (skip older seasons)
|
20
20
|
## note: assume package holds country/national (club) league
|
21
21
|
# use for importing german bundesliga, english premier league, etc.
|
22
22
|
|
23
|
-
|
24
|
-
pp entries
|
25
|
-
|
26
|
-
entries.each_with_index do |(season_key, datafiles),i|
|
27
|
-
|
23
|
+
match_by_season( format: 'csv', start: start ).each_with_index do |(season_key, entries),i|
|
28
24
|
puts "season [#{i+1}] >#{season_key}<:"
|
29
25
|
|
30
|
-
|
31
|
-
## filter_season( clause, season_key )
|
32
|
-
## or better filter = SeasonFilter.new( clause )
|
33
|
-
## filter.skip? filter.include? ( season_sason_key )?
|
34
|
-
## fiteer.before?( season_key ) etc.
|
35
|
-
## find some good method names!!!!
|
36
|
-
if start
|
37
|
-
start_year = start[0..3].to_i
|
38
|
-
season_start_year = season_key[0..3].to_i
|
39
|
-
if season_start_year < start_year
|
40
|
-
puts "skip #{season_start_year} before #{start_year}"
|
41
|
-
next
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
datafiles.each do |datafile,j|
|
46
|
-
path = expand_path( datafile )
|
26
|
+
entries.each do |entry,j|
|
47
27
|
## note: assume datafile basename (without extension) is the league key
|
48
28
|
## e.g. eng.1, eng.3a, eng.3b, at.1, champs, world, etc.
|
49
|
-
league_key = File.basename(
|
29
|
+
league_key = File.basename( entry.name, File.extname( entry.name ) ) ## get basename WITHOUT extension
|
50
30
|
|
51
|
-
pp [
|
31
|
+
pp [entry.name, season_key, league_key]
|
52
32
|
|
53
|
-
event = CsvEventImporter.
|
54
|
-
|
33
|
+
event = CsvEventImporter.parse( entry.read, league: league_key,
|
34
|
+
season: season_key )
|
55
35
|
|
56
|
-
puts "added #{event.title}"
|
36
|
+
puts "added #{event.title} - from source >#{entry.name}<"
|
57
37
|
puts " #{event.teams.size} teams"
|
58
38
|
puts " #{event.rounds.size} rounds"
|
59
39
|
puts " #{event.games.size} games"
|
@@ -61,8 +41,37 @@ def import( start: nil )
|
|
61
41
|
end # each season
|
62
42
|
end # method import
|
63
43
|
|
64
|
-
end # class
|
44
|
+
end # class Package
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
############
|
49
|
+
# add convenience shortcut helper
|
50
|
+
def self.read_csv( path )
|
51
|
+
if File.directory?( path ) ## if directory assume "unzipped" package
|
52
|
+
DirPackage.new( path ).read_csv
|
53
|
+
elsif File.file?( path ) && File.extname( path ) == '.zip' ## check if file is a .zip (archive) file
|
54
|
+
ZipPackage.new( path ).read_csv
|
55
|
+
else ## no package; assume single (standalone) datafile
|
56
|
+
## assume single (free-standing) file
|
57
|
+
full_path = File.expand_path( path ) ## resolve/make path absolute
|
58
|
+
## 1) assume basename is the league key
|
59
|
+
## 2) assume last directory is the season key
|
60
|
+
league_key = File.basename( full_path, File.extname( full_path ) ) ## get basename WITHOUT extension
|
61
|
+
season_key = File.basename( File.dirname( full_path ) )
|
62
|
+
|
63
|
+
event = CsvEventImporter.read( full_path, league: league_key,
|
64
|
+
season: season_key )
|
65
|
+
|
66
|
+
puts "added #{event.title} - from source >#{path}<"
|
67
|
+
puts " #{event.teams.size} teams"
|
68
|
+
puts " #{event.rounds.size} rounds"
|
69
|
+
puts " #{event.games.size} games"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end # module SportDb
|
73
|
+
|
65
74
|
|
66
75
|
|
67
76
|
|
68
|
-
puts SportDb::Importers.banner # say hello
|
77
|
+
puts SportDb::Module::Importers.banner # say hello
|
@@ -1,6 +1,9 @@
|
|
1
1
|
|
2
2
|
## todo/fix: rename to CsvEventImporter or EventImporter !!! returns Event!!
|
3
|
-
|
3
|
+
## todo/fix/check: rename to CsvMatchReader and CsvMatchReader to CsvMatchParser - why? why not?
|
4
|
+
|
5
|
+
module SportDb
|
6
|
+
class CsvEventImporter
|
4
7
|
|
5
8
|
def self.read( path, league:, season:,
|
6
9
|
headers: nil )
|
@@ -24,8 +27,8 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
24
27
|
raise ArgumentError("string expected for season; got #{season.class.name}") unless season.is_a? String
|
25
28
|
|
26
29
|
## try mapping of league here - why? why not?
|
27
|
-
@league =
|
28
|
-
@season =
|
30
|
+
@league = Import.catalog.leagues.find!( league )
|
31
|
+
@season = Import::Season.new( season )
|
29
32
|
end
|
30
33
|
|
31
34
|
|
@@ -39,17 +42,17 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
39
42
|
opts = {}
|
40
43
|
opts[:headers] = @headers if @headers
|
41
44
|
|
42
|
-
matches =
|
45
|
+
matches = CsvMatchParser.parse( @txt, **opts )
|
43
46
|
|
44
|
-
matchlist =
|
47
|
+
matchlist = Import::Matchlist.new( matches )
|
45
48
|
|
46
49
|
team_names = matchlist.teams ## was: find_teams_in_matches_txt( matches_txt )
|
47
50
|
puts "#{team_names.size} teams:"
|
48
51
|
pp team_names
|
49
52
|
|
50
53
|
## note: allows duplicates - will return uniq struct recs in teams
|
51
|
-
teams =
|
52
|
-
|
54
|
+
teams = Import.catalog.teams.find_by!( name: team_names,
|
55
|
+
league: @league )
|
53
56
|
## build mapping - name => team struct record
|
54
57
|
team_mappings = team_names.zip( teams ).to_h
|
55
58
|
|
@@ -59,8 +62,8 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
59
62
|
#######
|
60
63
|
# start with database updates / sync here
|
61
64
|
|
62
|
-
event_rec =
|
63
|
-
|
65
|
+
event_rec = Sync::Event.find_or_create_by( league: @league,
|
66
|
+
season: @season )
|
64
67
|
|
65
68
|
## todo/fix:
|
66
69
|
## add check if event has teams
|
@@ -74,7 +77,7 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
74
77
|
|
75
78
|
# maps struct record "canonical" team name to active record db record!!
|
76
79
|
## note: use "canonical" team name as hash key for now (and NOT the object itself) - why? why not?
|
77
|
-
team_recs =
|
80
|
+
team_recs = Sync::Team.find_or_create( team_mappings.values.uniq )
|
78
81
|
|
79
82
|
## todo/fix/check:
|
80
83
|
## add check if event has teams
|
@@ -89,7 +92,7 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
89
92
|
|
90
93
|
|
91
94
|
## add catch-all/unclassified "dummy" round
|
92
|
-
round_rec =
|
95
|
+
round_rec = Model::Round.create!(
|
93
96
|
event_id: event_rec.id,
|
94
97
|
title: 'Matchday ??? / Missing / Catch-All', ## find a better name?
|
95
98
|
pos: 999,
|
@@ -98,14 +101,14 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
98
101
|
|
99
102
|
## add matches
|
100
103
|
matches.each do |match|
|
101
|
-
team1_rec =
|
102
|
-
team2_rec =
|
104
|
+
team1_rec = Sync::Team.cache[ team_mappings[match.team1].name ]
|
105
|
+
team2_rec = Sync::Team.cache[ team_mappings[match.team2].name ]
|
103
106
|
|
104
107
|
if match.date.nil?
|
105
108
|
puts "!!! WARN: skipping match - play date missing!!!!!"
|
106
109
|
pp match
|
107
110
|
else
|
108
|
-
rec =
|
111
|
+
rec = Model::Game.create!(
|
109
112
|
team1_id: team1_rec.id,
|
110
113
|
team2_id: team2_rec.id,
|
111
114
|
round_id: round_rec.id,
|
@@ -123,4 +126,4 @@ class CsvEventImporter ## todo/fix/check: rename to CsvMatchReader and CsvMat
|
|
123
126
|
end # method parse
|
124
127
|
|
125
128
|
end # class CsvEventImporter
|
126
|
-
|
129
|
+
end # module SportDb
|
@@ -1,13 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
|
4
|
-
|
5
4
|
module SportDb
|
5
|
+
module Module
|
6
6
|
module Importers
|
7
7
|
|
8
8
|
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
9
9
|
MINOR = 0
|
10
|
-
PATCH =
|
10
|
+
PATCH = 1
|
11
11
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
12
12
|
|
13
13
|
def self.version
|
@@ -23,4 +23,5 @@ module Importers
|
|
23
23
|
end
|
24
24
|
|
25
25
|
end # module Importers
|
26
|
+
end # module Module
|
26
27
|
end # module SportDb
|
data/test/helper.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
## note: use the local version of sportdb gems
|
2
2
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-formats/lib' ))
|
3
3
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-config/lib' ))
|
4
|
-
$LOAD_PATH.unshift( File.expand_path( '../sportdb-text/lib' ))
|
5
4
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-models/lib' ))
|
6
5
|
$LOAD_PATH.unshift( File.expand_path( '../sportdb-sync/lib' ))
|
7
6
|
|
data/test/test_club.rb
CHANGED
@@ -54,7 +54,7 @@ class TestClub < MiniTest::Test
|
|
54
54
|
score2i: 'HTAG'
|
55
55
|
}
|
56
56
|
|
57
|
-
matches =
|
57
|
+
matches = SportDb::CsvMatchParser.read( "#{SportDb::Test.data_dir}/england/2017-18/E0.csv",
|
58
58
|
headers: headers
|
59
59
|
)
|
60
60
|
|
@@ -117,7 +117,7 @@ class TestClub < MiniTest::Test
|
|
117
117
|
score2: 'AG',
|
118
118
|
}
|
119
119
|
|
120
|
-
matches =
|
120
|
+
matches = SportDb::CsvMatchParser.read( "#{SportDb::Test.data_dir}/austria/AUT.csv",
|
121
121
|
headers: headers,
|
122
122
|
filters: { 'Season' => '2016/2017' }
|
123
123
|
)
|
data/test/test_import.rb
CHANGED
@@ -34,7 +34,7 @@ class TestImport < MiniTest::Test
|
|
34
34
|
score2i: 'HTAG'
|
35
35
|
}
|
36
36
|
|
37
|
-
event_rec = CsvEventImporter.read( "#{SportDb::Test.data_dir}/england/2017-18/E0.csv",
|
37
|
+
event_rec = SportDb::CsvEventImporter.read( "#{SportDb::Test.data_dir}/england/2017-18/E0.csv",
|
38
38
|
headers: headers,
|
39
39
|
league: 'ENG', ## fetch English Premiere League
|
40
40
|
season: '2017/18'
|
data/test/test_version.rb
CHANGED
@@ -10,9 +10,11 @@ require 'helper'
|
|
10
10
|
class TestVersion < MiniTest::Test
|
11
11
|
|
12
12
|
def test_version
|
13
|
-
pp SportDb::Importers::VERSION
|
14
|
-
pp SportDb::Importers.banner
|
15
|
-
pp SportDb::Importers.root
|
13
|
+
pp SportDb::Module::Importers::VERSION
|
14
|
+
pp SportDb::Module::Importers.banner
|
15
|
+
pp SportDb::Module::Importers.root
|
16
|
+
|
17
|
+
pp SportDb::Module.constants
|
16
18
|
|
17
19
|
assert true
|
18
20
|
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-importers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-05-
|
11
|
+
date: 2020-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: sportdb-text
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 0.4.0
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 0.4.0
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: sportdb-sync
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|