sportdb-formats 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1188cc0edf55cd83ccdee10638662c64b302a44e
4
- data.tar.gz: 06b6b09902513298357a9f47deef83cdf6962b9e
3
+ metadata.gz: 29715e2e61cd99fe3520e861b1d84c4614055650
4
+ data.tar.gz: e8109a80c7f79926c271560fd63f8503a44fabd2
5
5
  SHA512:
6
- metadata.gz: 337fdb0f298625e847e40a29e47f2ee2e6c638e1ed8a4e72e340008b16de6d10ea5907923530594c0e4739de1a477dd4fe847f10d062372a64c5e4103ef3c06b
7
- data.tar.gz: 72c0281e3f32a74750f8eb494aafd26b25a3c5f497842f389ea714bcf430f41451346b9aec7aac005dba7b8e8ab283945aca58a49ef7ecc8df6f8116d87b6370
6
+ metadata.gz: 8cb97f1cd4ae2d56e3b81282a7f921f99b2dcd325bafcf739e9d4c8a8bf9139fa99652a1a2804cf33604949b36b15694e7da68d851cef6d91b9b7dd727144bcf
7
+ data.tar.gz: 5967f5b9558d963cd9a6974be5b53ad343b8862c84ae4640bb6c3bd6975f80ab1a687946aa2a91aeeeecb9e5908537f530eb6e837553fccfc2d92f43db082edc
data/Manifest.txt CHANGED
@@ -4,9 +4,14 @@ README.md
4
4
  Rakefile
5
5
  lib/sportdb/formats.rb
6
6
  lib/sportdb/formats/config.rb
7
+ lib/sportdb/formats/country/country_index.rb
8
+ lib/sportdb/formats/country/country_reader.rb
7
9
  lib/sportdb/formats/datafile.rb
8
10
  lib/sportdb/formats/datafile_package.rb
9
11
  lib/sportdb/formats/goals.rb
12
+ lib/sportdb/formats/league/league_index.rb
13
+ lib/sportdb/formats/league/league_outline_reader.rb
14
+ lib/sportdb/formats/league/league_reader.rb
10
15
  lib/sportdb/formats/match/conf_parser.rb
11
16
  lib/sportdb/formats/match/mapper.rb
12
17
  lib/sportdb/formats/match/mapper_teams.rb
@@ -29,13 +34,27 @@ lib/sportdb/formats/structs/season.rb
29
34
  lib/sportdb/formats/structs/standings.rb
30
35
  lib/sportdb/formats/structs/team.rb
31
36
  lib/sportdb/formats/structs/team_usage.rb
37
+ lib/sportdb/formats/team/club_index.rb
38
+ lib/sportdb/formats/team/club_reader.rb
39
+ lib/sportdb/formats/team/club_reader_props.rb
40
+ lib/sportdb/formats/team/national_team_index.rb
41
+ lib/sportdb/formats/team/team_index.rb
42
+ lib/sportdb/formats/team/wiki_reader.rb
32
43
  lib/sportdb/formats/version.rb
33
44
  test/helper.rb
45
+ test/test_club_index.rb
46
+ test/test_club_reader.rb
47
+ test/test_club_reader_props.rb
34
48
  test/test_clubs.rb
35
49
  test/test_conf.rb
50
+ test/test_country_index.rb
51
+ test/test_country_reader.rb
36
52
  test/test_csv_reader.rb
37
53
  test/test_datafile.rb
38
54
  test/test_goals.rb
55
+ test/test_league_index.rb
56
+ test/test_league_outline_reader.rb
57
+ test/test_league_reader.rb
39
58
  test/test_match.rb
40
59
  test/test_match_auto.rb
41
60
  test/test_match_auto_champs.rb
@@ -49,5 +68,7 @@ test/test_name_helper.rb
49
68
  test/test_outline_reader.rb
50
69
  test/test_package.rb
51
70
  test/test_package_match.rb
71
+ test/test_regex.rb
52
72
  test/test_scores.rb
53
73
  test/test_season.rb
74
+ test/test_wiki_reader.rb
@@ -69,6 +69,69 @@ require 'sportdb/formats/match/match_parser_auto_conf'
69
69
  require 'sportdb/formats/match/conf_parser'
70
70
 
71
71
 
72
+ require 'sportdb/formats/country/country_reader'
73
+ require 'sportdb/formats/country/country_index'
74
+
75
+
76
+ ## add convenience helper
77
+ module SportDb
78
+ module Import
79
+ class Country
80
+ def self.read( path ) CountryReader.read( path ); end
81
+ def self.parse( txt ) CountryReader.parse( txt ); end
82
+ end # class Country
83
+ end # module Import
84
+ end # module SportDb
85
+
86
+
87
+ require 'sportdb/formats/league/league_reader'
88
+ require 'sportdb/formats/league/league_index'
89
+ require 'sportdb/formats/league/league_outline_reader'
90
+
91
+ ##
92
+ ## add convenience helper / short-cuts
93
+ module SportDb
94
+ module Import
95
+ class League
96
+ def self.read( path ) LeagueReader.read( path ); end
97
+ def self.parse( txt ) LeagueReader.parse( txt ); end
98
+ end # class League
99
+ end # module Import
100
+ end # module SportDb
101
+
102
+
103
+ require 'sportdb/formats/team/club_reader'
104
+ require 'sportdb/formats/team/club_reader_props'
105
+ require 'sportdb/formats/team/club_index'
106
+ require 'sportdb/formats/team/wiki_reader'
107
+ require 'sportdb/formats/team/national_team_index'
108
+ require 'sportdb/formats/team/team_index'
109
+
110
+
111
+ ###
112
+ # add convenience helpers / shortcuts
113
+ module SportDb
114
+ module Import
115
+ class Club
116
+ def self.read( path ) ClubReader.read( path ); end
117
+ def self.parse( txt ) ClubReader.parse( txt ); end
118
+
119
+ def self.read_props( path ) ClubPropsReader.read( path ); end
120
+ def self.parse_props( txt ) ClubPropsReader.parse( txt ); end
121
+ ## todo/check: use ClubProps.read and ClubProps.parse convenience alternate shortcuts - why? why not?
122
+ end # class Club
123
+ end # module Import
124
+ end # module SportDb
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
72
135
  ## let's put test configuration in its own namespace / module
73
136
  module SportDb
74
137
  class Test ## todo/check: works with module too? use a module - why? why not?
@@ -0,0 +1,192 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+ ## built-in countries for (quick starter) auto-add
7
+ class CountryIndex
8
+
9
+ attr_reader :countries ## all country records
10
+
11
+ def initialize( recs )
12
+ @countries = []
13
+ @countries_by_code = {}
14
+ @countries_by_name = {}
15
+
16
+ add( recs )
17
+ end
18
+
19
+
20
+ ## helpers from country - use a helper module for includes (share with clubs etc.) - why? why not?
21
+ include NameHelper
22
+ ## incl. strip_year( name )
23
+ ## has_year?( name)
24
+ ## strip_lang( name )
25
+ ## normalize( name )
26
+
27
+
28
+ def add( recs )
29
+ ###########################################
30
+ ## auto-fill countries
31
+ ## pp recs
32
+ recs.each do |rec|
33
+ ## rec e.g. { key:'af', fifa:'AFG', name:'Afghanistan'}
34
+
35
+ @countries << rec
36
+
37
+ ## add codes lookups - key, fifa, ...
38
+ if @countries_by_code[ rec.key ]
39
+ puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
40
+ exit 1
41
+ else
42
+ @countries_by_code[ rec.key ] = rec
43
+ end
44
+
45
+ ## add fifa code (only) if different from key
46
+ if rec.key != rec.fifa.downcase
47
+ if @countries_by_code[ rec.fifa.downcase ]
48
+ puts "** !! ERROR !! country code (fifa) >#{rec.fifa.downcase}< already exits!!"
49
+ exit 1
50
+ else
51
+ @countries_by_code[ rec.fifa.downcase ] = rec
52
+ end
53
+ end
54
+
55
+
56
+ ## add all names (canonical name + alt names
57
+ names = [rec.name] + rec.alt_names
58
+ more_names = []
59
+ ## check "hand-typed" names for year (auto-add)
60
+ ## check for year(s) e.g. (1887-1911), (-2013),
61
+ ## (1946-2001,2013-) etc.
62
+ names.each do |name|
63
+ if has_year?( name )
64
+ more_names << strip_year( name )
65
+ end
66
+ end
67
+
68
+ names += more_names
69
+ ## check for duplicates - simple check for now - fix/improve
70
+ ## todo/fix: (auto)remove duplicates - why? why not?
71
+ count = names.size
72
+ count_uniq = names.uniq.size
73
+ if count != count_uniq
74
+ puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
75
+ pp names
76
+ pp rec
77
+ exit 1
78
+ end
79
+
80
+ names.each_with_index do |name,i|
81
+ ## check lang codes e.g. [en], [fr], etc.
82
+ ## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
83
+ name = strip_lang( name )
84
+ norm = normalize( name )
85
+ old_rec = @countries_by_name[ norm ]
86
+ if old_rec
87
+ ## check if country name already is included or is new country rec
88
+ msg = "** !!! ERROR !!! - name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<"
89
+ puts msg
90
+ exit 1
91
+ else
92
+ @countries_by_name[ norm ] = rec
93
+ end
94
+ end
95
+
96
+ end ## each record
97
+ end # method initialize
98
+
99
+
100
+
101
+ ## fix/todo: add find_by (alias for find_by_name/find_by_code)
102
+ def find_by_code( code )
103
+ code = code.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
104
+ @countries_by_code[ code ]
105
+ end
106
+
107
+ def find_by_name( name )
108
+ name = normalize( name.to_s ) ## allow symbols too (e.g. use to.s first)
109
+ @countries_by_name[ name ]
110
+ end
111
+
112
+ def []( key )
113
+ country = find_by_code( key )
114
+ country = find_by_name( key ) if country.nil? ## try lookup / find by (normalized) name
115
+ country
116
+ end
117
+ alias_method :find, :[]
118
+
119
+
120
+ ###
121
+ ## split/parse country line
122
+ ##
123
+ ## split on bullet e.g.
124
+ ## split into name and code with regex - make code optional
125
+ ##
126
+ ## Examples:
127
+ ## Österreich • Austria (at)
128
+ ## Österreich • Austria
129
+ ## Austria
130
+ ## Deutschland (de) • Germany
131
+ ##
132
+ ## todo/check: support more formats - why? why not?
133
+ ## e.g. Austria, AUT (e.g. with comma - why? why not?)
134
+ def parse( line )
135
+ values = line.split( '•' ) ## use/support multi-lingual separator
136
+ country = nil
137
+ values.each do |value|
138
+ value = value.strip
139
+ ## check for trailing country code e.g. (at), (eng), etc.
140
+ if value =~ /[ ]+\((?<code>[a-z]{1,4})\)$/ ## e.g. Austria (at)
141
+ code = $~[:code]
142
+ name = value[0...(value.size-code.size-2)].strip ## note: add -2 for brackets
143
+ candidates = [ find_by_code( code ), find_by_name( name ) ]
144
+ if candidates[0].nil?
145
+ puts "** !!! ERROR !!! country - unknown code >#{code}< in line: #{line}"
146
+ pp line
147
+ exit 1
148
+ end
149
+ if candidates[1].nil?
150
+ puts "** !!! ERROR !!! country - unknown name >#{code}< in line: #{line}"
151
+ pp line
152
+ exit 1
153
+ end
154
+ if candidates[0] != candidates[1]
155
+ puts "** !!! ERROR !!! country - name and code do NOT match the same country:"
156
+ pp line
157
+ pp candidates
158
+ exit 1
159
+ end
160
+ if country && country != candidates[0]
161
+ puts "** !!! ERROR !!! country - names do NOT match the same country:"
162
+ pp line
163
+ pp country
164
+ pp candidates
165
+ exit 1
166
+ end
167
+ country = candidates[0]
168
+ else
169
+ ## just assume value is name or code
170
+ candidate = find( value )
171
+ if candidate.nil?
172
+ puts "** !!! ERROR !!! country - unknown name or code >#{value}< in line: #{line}"
173
+ pp line
174
+ exit 1
175
+ end
176
+ if country && country != candidate
177
+ puts "** !!! ERROR !!! country - names do NOT match the same country:"
178
+ pp line
179
+ pp country
180
+ pp candidate
181
+ exit 1
182
+ end
183
+ country = candidate
184
+ end
185
+ end
186
+ country
187
+ end # method parse
188
+ end # class CountryIndex
189
+
190
+
191
+ end # module Import
192
+ end # module SportDb
@@ -0,0 +1,122 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class CountryReader
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+ def initialize( txt )
22
+ @txt = txt
23
+ end
24
+
25
+ def parse
26
+ countries = []
27
+ last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
+
29
+ OutlineReader.parse( @txt ).each do |node|
30
+
31
+ node_type = node[0]
32
+
33
+ if [:h1, :h2].include?( node_type )
34
+ ## skip headings (and headings) for now too
35
+ elsif node_type == :p ## paragraph
36
+ lines = node[1]
37
+ lines.each do |line|
38
+ if line.start_with?( '|' )
39
+ ## assume continuation with line of alternative names
40
+ ## note: skip leading pipe
41
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
+ ## strip and squish (white)spaces
43
+ # e.g. East Germany (-1989) => East Germany (-1989)
44
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
+ last_country.alt_names += values
46
+ else
47
+ ## assume "regular" line
48
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
49
+ ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
50
+ if line =~ /^([a-z]{2,4})[ ]+(.+)$/
51
+ key = $1
52
+ values = $2.split( ',' )
53
+ ## strip and squish (white)spaces
54
+ # e.g. East Germany (-1989) => East Germany (-1989)
55
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
56
+
57
+ ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
58
+ ## e.g. England › UK => England
59
+ ## Puerto Rico › US => Puerto Rico
60
+ geos = split_geo( values[0] )
61
+ name = geos[0] ## note: ignore all other geos for now
62
+
63
+ ## note: allow fifa country codes upto 4 (!!) e.g. Northern Cyprus
64
+ fifa = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
65
+ values[1]
66
+ else
67
+ if values[1]
68
+ puts "** !!! ERROR !!! wrong fifa code format >#{values[1]}<; expected three (or four)-letter all up-case"
69
+ else
70
+ puts "** !!! ERROR !!! missing fifa code for (canonical) country name"
71
+ end
72
+ exit 1
73
+ end
74
+
75
+ tags = if values[2] ## check if tags presents
76
+ split_tags( values[2] )
77
+ else
78
+ []
79
+ end
80
+
81
+ last_country = country = Country.new( key: key,
82
+ name: name,
83
+ fifa: fifa,
84
+ tags: tags )
85
+ countries << country
86
+ else
87
+ puts "** !! ERROR - missing key for (canonical) country name"
88
+ exit 1
89
+ end
90
+ end
91
+ end # each line
92
+ else
93
+ puts "** !! ERROR - unknown node type / (input) source line:"
94
+ pp node
95
+ exit 1
96
+ end
97
+ end # each node
98
+
99
+ countries
100
+ end # method parse
101
+
102
+
103
+
104
+ #######################################
105
+ ## helpers
106
+ def split_tags( str )
107
+ tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
108
+ tags = tags.map { |tag| tag.strip }
109
+ tags
110
+ end
111
+
112
+ def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
113
+ ## split into geo tree
114
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
115
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
116
+ geos
117
+ end
118
+
119
+ end # class CountryReader
120
+
121
+ end # module Import
122
+ end # module SportDb