sportdb-formats 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1188cc0edf55cd83ccdee10638662c64b302a44e
4
- data.tar.gz: 06b6b09902513298357a9f47deef83cdf6962b9e
3
+ metadata.gz: 29715e2e61cd99fe3520e861b1d84c4614055650
4
+ data.tar.gz: e8109a80c7f79926c271560fd63f8503a44fabd2
5
5
  SHA512:
6
- metadata.gz: 337fdb0f298625e847e40a29e47f2ee2e6c638e1ed8a4e72e340008b16de6d10ea5907923530594c0e4739de1a477dd4fe847f10d062372a64c5e4103ef3c06b
7
- data.tar.gz: 72c0281e3f32a74750f8eb494aafd26b25a3c5f497842f389ea714bcf430f41451346b9aec7aac005dba7b8e8ab283945aca58a49ef7ecc8df6f8116d87b6370
6
+ metadata.gz: 8cb97f1cd4ae2d56e3b81282a7f921f99b2dcd325bafcf739e9d4c8a8bf9139fa99652a1a2804cf33604949b36b15694e7da68d851cef6d91b9b7dd727144bcf
7
+ data.tar.gz: 5967f5b9558d963cd9a6974be5b53ad343b8862c84ae4640bb6c3bd6975f80ab1a687946aa2a91aeeeecb9e5908537f530eb6e837553fccfc2d92f43db082edc
data/Manifest.txt CHANGED
@@ -4,9 +4,14 @@ README.md
4
4
  Rakefile
5
5
  lib/sportdb/formats.rb
6
6
  lib/sportdb/formats/config.rb
7
+ lib/sportdb/formats/country/country_index.rb
8
+ lib/sportdb/formats/country/country_reader.rb
7
9
  lib/sportdb/formats/datafile.rb
8
10
  lib/sportdb/formats/datafile_package.rb
9
11
  lib/sportdb/formats/goals.rb
12
+ lib/sportdb/formats/league/league_index.rb
13
+ lib/sportdb/formats/league/league_outline_reader.rb
14
+ lib/sportdb/formats/league/league_reader.rb
10
15
  lib/sportdb/formats/match/conf_parser.rb
11
16
  lib/sportdb/formats/match/mapper.rb
12
17
  lib/sportdb/formats/match/mapper_teams.rb
@@ -29,13 +34,27 @@ lib/sportdb/formats/structs/season.rb
29
34
  lib/sportdb/formats/structs/standings.rb
30
35
  lib/sportdb/formats/structs/team.rb
31
36
  lib/sportdb/formats/structs/team_usage.rb
37
+ lib/sportdb/formats/team/club_index.rb
38
+ lib/sportdb/formats/team/club_reader.rb
39
+ lib/sportdb/formats/team/club_reader_props.rb
40
+ lib/sportdb/formats/team/national_team_index.rb
41
+ lib/sportdb/formats/team/team_index.rb
42
+ lib/sportdb/formats/team/wiki_reader.rb
32
43
  lib/sportdb/formats/version.rb
33
44
  test/helper.rb
45
+ test/test_club_index.rb
46
+ test/test_club_reader.rb
47
+ test/test_club_reader_props.rb
34
48
  test/test_clubs.rb
35
49
  test/test_conf.rb
50
+ test/test_country_index.rb
51
+ test/test_country_reader.rb
36
52
  test/test_csv_reader.rb
37
53
  test/test_datafile.rb
38
54
  test/test_goals.rb
55
+ test/test_league_index.rb
56
+ test/test_league_outline_reader.rb
57
+ test/test_league_reader.rb
39
58
  test/test_match.rb
40
59
  test/test_match_auto.rb
41
60
  test/test_match_auto_champs.rb
@@ -49,5 +68,7 @@ test/test_name_helper.rb
49
68
  test/test_outline_reader.rb
50
69
  test/test_package.rb
51
70
  test/test_package_match.rb
71
+ test/test_regex.rb
52
72
  test/test_scores.rb
53
73
  test/test_season.rb
74
+ test/test_wiki_reader.rb
@@ -69,6 +69,69 @@ require 'sportdb/formats/match/match_parser_auto_conf'
69
69
  require 'sportdb/formats/match/conf_parser'
70
70
 
71
71
 
72
+ require 'sportdb/formats/country/country_reader'
73
+ require 'sportdb/formats/country/country_index'
74
+
75
+
76
+ ## add convenience helper
77
+ module SportDb
78
+ module Import
79
+ class Country
80
+ def self.read( path ) CountryReader.read( path ); end
81
+ def self.parse( txt ) CountryReader.parse( txt ); end
82
+ end # class Country
83
+ end # module Import
84
+ end # module SportDb
85
+
86
+
87
+ require 'sportdb/formats/league/league_reader'
88
+ require 'sportdb/formats/league/league_index'
89
+ require 'sportdb/formats/league/league_outline_reader'
90
+
91
+ ##
92
+ ## add convenience helper / short-cuts
93
+ module SportDb
94
+ module Import
95
+ class League
96
+ def self.read( path ) LeagueReader.read( path ); end
97
+ def self.parse( txt ) LeagueReader.parse( txt ); end
98
+ end # class League
99
+ end # module Import
100
+ end # module SportDb
101
+
102
+
103
+ require 'sportdb/formats/team/club_reader'
104
+ require 'sportdb/formats/team/club_reader_props'
105
+ require 'sportdb/formats/team/club_index'
106
+ require 'sportdb/formats/team/wiki_reader'
107
+ require 'sportdb/formats/team/national_team_index'
108
+ require 'sportdb/formats/team/team_index'
109
+
110
+
111
+ ###
112
+ # add convenience helpers / shortcuts
113
+ module SportDb
114
+ module Import
115
+ class Club
116
+ def self.read( path ) ClubReader.read( path ); end
117
+ def self.parse( txt ) ClubReader.parse( txt ); end
118
+
119
+ def self.read_props( path ) ClubPropsReader.read( path ); end
120
+ def self.parse_props( txt ) ClubPropsReader.parse( txt ); end
121
+ ## todo/check: use ClubProps.read and ClubProps.parse convenience alternate shortcuts - why? why not?
122
+ end # class Club
123
+ end # module Import
124
+ end # module SportDb
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
72
135
  ## let's put test configuration in its own namespace / module
73
136
  module SportDb
74
137
  class Test ## todo/check: works with module too? use a module - why? why not?
@@ -0,0 +1,192 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+ ## built-in countries for (quick starter) auto-add
7
+ class CountryIndex
8
+
9
+ attr_reader :countries ## all country records
10
+
11
+ def initialize( recs )
12
+ @countries = []
13
+ @countries_by_code = {}
14
+ @countries_by_name = {}
15
+
16
+ add( recs )
17
+ end
18
+
19
+
20
+ ## helpers from country - use a helper module for includes (share with clubs etc.) - why? why not?
21
+ include NameHelper
22
+ ## incl. strip_year( name )
23
+ ## has_year?( name)
24
+ ## strip_lang( name )
25
+ ## normalize( name )
26
+
27
+
28
+ def add( recs )
29
+ ###########################################
30
+ ## auto-fill countries
31
+ ## pp recs
32
+ recs.each do |rec|
33
+ ## rec e.g. { key:'af', fifa:'AFG', name:'Afghanistan'}
34
+
35
+ @countries << rec
36
+
37
+ ## add codes lookups - key, fifa, ...
38
+ if @countries_by_code[ rec.key ]
39
+ puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
40
+ exit 1
41
+ else
42
+ @countries_by_code[ rec.key ] = rec
43
+ end
44
+
45
+ ## add fifa code (only) if different from key
46
+ if rec.key != rec.fifa.downcase
47
+ if @countries_by_code[ rec.fifa.downcase ]
48
+ puts "** !! ERROR !! country code (fifa) >#{rec.fifa.downcase}< already exits!!"
49
+ exit 1
50
+ else
51
+ @countries_by_code[ rec.fifa.downcase ] = rec
52
+ end
53
+ end
54
+
55
+
56
+ ## add all names (canonical name + alt names
57
+ names = [rec.name] + rec.alt_names
58
+ more_names = []
59
+ ## check "hand-typed" names for year (auto-add)
60
+ ## check for year(s) e.g. (1887-1911), (-2013),
61
+ ## (1946-2001,2013-) etc.
62
+ names.each do |name|
63
+ if has_year?( name )
64
+ more_names << strip_year( name )
65
+ end
66
+ end
67
+
68
+ names += more_names
69
+ ## check for duplicates - simple check for now - fix/improve
70
+ ## todo/fix: (auto)remove duplicates - why? why not?
71
+ count = names.size
72
+ count_uniq = names.uniq.size
73
+ if count != count_uniq
74
+ puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
75
+ pp names
76
+ pp rec
77
+ exit 1
78
+ end
79
+
80
+ names.each_with_index do |name,i|
81
+ ## check lang codes e.g. [en], [fr], etc.
82
+ ## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
83
+ name = strip_lang( name )
84
+ norm = normalize( name )
85
+ old_rec = @countries_by_name[ norm ]
86
+ if old_rec
87
+ ## check if country name already is included or is new country rec
88
+ msg = "** !!! ERROR !!! - name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<"
89
+ puts msg
90
+ exit 1
91
+ else
92
+ @countries_by_name[ norm ] = rec
93
+ end
94
+ end
95
+
96
+ end ## each record
97
+ end # method initialize
98
+
99
+
100
+
101
+ ## fix/todo: add find_by (alias for find_by_name/find_by_code)
102
+ def find_by_code( code )
103
+ code = code.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
104
+ @countries_by_code[ code ]
105
+ end
106
+
107
+ def find_by_name( name )
108
+ name = normalize( name.to_s ) ## allow symbols too (e.g. use to.s first)
109
+ @countries_by_name[ name ]
110
+ end
111
+
112
+ def []( key )
113
+ country = find_by_code( key )
114
+ country = find_by_name( key ) if country.nil? ## try lookup / find by (normalized) name
115
+ country
116
+ end
117
+ alias_method :find, :[]
118
+
119
+
120
+ ###
121
+ ## split/parse country line
122
+ ##
123
+ ## split on bullet e.g.
124
+ ## split into name and code with regex - make code optional
125
+ ##
126
+ ## Examples:
127
+ ## Österreich • Austria (at)
128
+ ## Österreich • Austria
129
+ ## Austria
130
+ ## Deutschland (de) • Germany
131
+ ##
132
+ ## todo/check: support more formats - why? why not?
133
+ ## e.g. Austria, AUT (e.g. with comma - why? why not?)
134
+ def parse( line )
135
+ values = line.split( '•' ) ## use/support multi-lingual separator
136
+ country = nil
137
+ values.each do |value|
138
+ value = value.strip
139
+ ## check for trailing country code e.g. (at), (eng), etc.
140
+ if value =~ /[ ]+\((?<code>[a-z]{1,4})\)$/ ## e.g. Austria (at)
141
+ code = $~[:code]
142
+ name = value[0...(value.size-code.size-2)].strip ## note: add -2 for brackets
143
+ candidates = [ find_by_code( code ), find_by_name( name ) ]
144
+ if candidates[0].nil?
145
+ puts "** !!! ERROR !!! country - unknown code >#{code}< in line: #{line}"
146
+ pp line
147
+ exit 1
148
+ end
149
+ if candidates[1].nil?
150
+ puts "** !!! ERROR !!! country - unknown name >#{code}< in line: #{line}"
151
+ pp line
152
+ exit 1
153
+ end
154
+ if candidates[0] != candidates[1]
155
+ puts "** !!! ERROR !!! country - name and code do NOT match the same country:"
156
+ pp line
157
+ pp candidates
158
+ exit 1
159
+ end
160
+ if country && country != candidates[0]
161
+ puts "** !!! ERROR !!! country - names do NOT match the same country:"
162
+ pp line
163
+ pp country
164
+ pp candidates
165
+ exit 1
166
+ end
167
+ country = candidates[0]
168
+ else
169
+ ## just assume value is name or code
170
+ candidate = find( value )
171
+ if candidate.nil?
172
+ puts "** !!! ERROR !!! country - unknown name or code >#{value}< in line: #{line}"
173
+ pp line
174
+ exit 1
175
+ end
176
+ if country && country != candidate
177
+ puts "** !!! ERROR !!! country - names do NOT match the same country:"
178
+ pp line
179
+ pp country
180
+ pp candidate
181
+ exit 1
182
+ end
183
+ country = candidate
184
+ end
185
+ end
186
+ country
187
+ end # method parse
188
+ end # class CountryIndex
189
+
190
+
191
+ end # module Import
192
+ end # module SportDb
@@ -0,0 +1,122 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class CountryReader
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+ def initialize( txt )
22
+ @txt = txt
23
+ end
24
+
25
+ def parse
26
+ countries = []
27
+ last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
+
29
+ OutlineReader.parse( @txt ).each do |node|
30
+
31
+ node_type = node[0]
32
+
33
+ if [:h1, :h2].include?( node_type )
34
+ ## skip headings (and headings) for now too
35
+ elsif node_type == :p ## paragraph
36
+ lines = node[1]
37
+ lines.each do |line|
38
+ if line.start_with?( '|' )
39
+ ## assume continuation with line of alternative names
40
+ ## note: skip leading pipe
41
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
+ ## strip and squish (white)spaces
43
+ # e.g. East Germany (-1989) => East Germany (-1989)
44
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
+ last_country.alt_names += values
46
+ else
47
+ ## assume "regular" line
48
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
49
+ ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
50
+ if line =~ /^([a-z]{2,4})[ ]+(.+)$/
51
+ key = $1
52
+ values = $2.split( ',' )
53
+ ## strip and squish (white)spaces
54
+ # e.g. East Germany (-1989) => East Germany (-1989)
55
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
56
+
57
+ ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
58
+ ## e.g. England › UK => England
59
+ ## Puerto Rico › US => Puerto Rico
60
+ geos = split_geo( values[0] )
61
+ name = geos[0] ## note: ignore all other geos for now
62
+
63
+ ## note: allow fifa country codes upto 4 (!!) e.g. Northern Cyprus
64
+ fifa = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
65
+ values[1]
66
+ else
67
+ if values[1]
68
+ puts "** !!! ERROR !!! wrong fifa code format >#{values[1]}<; expected three (or four)-letter all up-case"
69
+ else
70
+ puts "** !!! ERROR !!! missing fifa code for (canonical) country name"
71
+ end
72
+ exit 1
73
+ end
74
+
75
+ tags = if values[2] ## check if tags presents
76
+ split_tags( values[2] )
77
+ else
78
+ []
79
+ end
80
+
81
+ last_country = country = Country.new( key: key,
82
+ name: name,
83
+ fifa: fifa,
84
+ tags: tags )
85
+ countries << country
86
+ else
87
+ puts "** !! ERROR - missing key for (canonical) country name"
88
+ exit 1
89
+ end
90
+ end
91
+ end # each line
92
+ else
93
+ puts "** !! ERROR - unknown node type / (input) source line:"
94
+ pp node
95
+ exit 1
96
+ end
97
+ end # each node
98
+
99
+ countries
100
+ end # method parse
101
+
102
+
103
+
104
+ #######################################
105
+ ## helpers
106
+ def split_tags( str )
107
+ tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
108
+ tags = tags.map { |tag| tag.strip }
109
+ tags
110
+ end
111
+
112
+ def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
113
+ ## split into geo tree
114
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
115
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
116
+ geos
117
+ end
118
+
119
+ end # class CountryReader
120
+
121
+ end # module Import
122
+ end # module SportDb