sportdb-formats 2.0.2 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,142 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class CountryReader
9
-
10
-
11
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
- parse( txt )
14
- end
15
-
16
- def self.parse( txt )
17
- new( txt ).parse
18
- end
19
-
20
-
21
- def initialize( txt )
22
- @txt = txt
23
- end
24
-
25
- def parse
26
- countries = []
27
- last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
-
29
- OutlineReader.parse( @txt ).each do |node|
30
-
31
- node_type = node[0]
32
-
33
- if [:h1, :h2].include?( node_type )
34
- ## skip headings (and headings) for now too
35
- elsif node_type == :p ## paragraph
36
- lines = node[1]
37
- lines.each do |line|
38
- if line.start_with?( '|' )
39
- ## assume continuation with line of alternative names
40
- ## note: skip leading pipe
41
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
- ## strip and squish (white)spaces
43
- # e.g. East Germany (-1989) => East Germany (-1989)
44
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
- last_country.alt_names += values
46
- elsif line =~ /^-[ ]*(\d{4})
47
- [ ]+
48
- (.+)$
49
- /x ## check for historic lines e.g. -1989
50
- year = $1.to_i
51
- parts = $2.split( /=>|⇒/ )
52
- values = parts[0].split( ',' )
53
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
-
55
- name = values[0]
56
- code = values[1]
57
-
58
- last_country = country = Country.new( name: "#{name} (-#{year})",
59
- code: code )
60
- ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
-
62
- countries << country
63
- ## todo/fix: add reference to country today (in parts[1] !!!!)
64
- else
65
- ## assume "regular" line
66
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
- ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
- if line =~ /^([a-z]{2,4})
69
- [ ]+
70
- (.+)$/x
71
- key = $1
72
- values = $2.split( ',' )
73
- ## strip and squish (white)spaces
74
- # e.g. East Germany (-1989) => East Germany (-1989)
75
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
-
77
- ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
- ## e.g. England › UK => England
79
- ## Puerto Rico › US => Puerto Rico
80
- geos = split_geo( values[0] )
81
- name = geos[0] ## note: ignore all other geos for now
82
-
83
- ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
- code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
- values[1]
86
- else
87
- if values[1]
88
- puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
- else
90
- puts "** !!! ERROR !!! missing code for (canonical) country name"
91
- end
92
- exit 1
93
- end
94
-
95
- tags = if values[2] ## check if tags presents
96
- split_tags( values[2] )
97
- else
98
- []
99
- end
100
-
101
- last_country = country = Country.new( key: key,
102
- name: name,
103
- code: code,
104
- tags: tags )
105
- countries << country
106
- else
107
- puts "** !! ERROR - missing key for (canonical) country name"
108
- exit 1
109
- end
110
- end
111
- end # each line
112
- else
113
- puts "** !! ERROR - unknown node type / (input) source line:"
114
- pp node
115
- exit 1
116
- end
117
- end # each node
118
-
119
- countries
120
- end # method parse
121
-
122
-
123
-
124
- #######################################
125
- ## helpers
126
- def split_tags( str )
127
- tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
- tags = tags.map { |tag| tag.strip }
129
- tags
130
- end
131
-
132
- def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
- ## split into geo tree
134
- geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
- geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
- geos
137
- end
138
-
139
- end # class CountryReader
140
-
141
- end # module Import
142
- end # module SportDb
@@ -1,192 +0,0 @@
1
-
2
- module Sports
3
-
4
- ## "free-standing" goal event - for import/export in separate event / goal datafiles
5
- ## returned by CsvGoalParser and others
6
- class GoalEvent
7
-
8
- def self.build( row ) ## rename to parse or such - why? why not?
9
-
10
- ## split match_id
11
- team_str, more_str = row['Match'].split( '|' )
12
- team1_str, team2_str = team_str.split( ' - ' )
13
-
14
- more_str = more_str.strip
15
- team1_str = team1_str.strip
16
- team2_str = team2_str.strip
17
-
18
- # check if more_str is a date otherwise assume round
19
- date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
20
- '%b %d'
21
- elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
22
- '%b %d %Y'
23
- else
24
- nil
25
- end
26
-
27
- if date_fmt
28
- date = Date.strptime( more_str, date_fmt )
29
- round = nil
30
- else
31
- date = nil
32
- round = more_str
33
- end
34
-
35
-
36
- values = row['Score'].split('-')
37
- values = values.map { |value| value.strip }
38
- score1 = values[0].to_i
39
- score2 = values[1].to_i
40
-
41
- minute = nil
42
- offset = nil
43
- if m=%r{([0-9]+)
44
- (?:[ ]+
45
- \+([0-9]+)
46
- )?
47
- ['.]
48
- $}x.match( row['Minute'])
49
- minute = m[1].to_i
50
- offset = m[2] ? m[2].to_i : nil
51
- else
52
- puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
53
- exit 1
54
- end
55
-
56
- attributes = {
57
- team1: team1_str,
58
- team2: team2_str,
59
- date: date,
60
- round: round,
61
- score1: score1,
62
- score2: score2,
63
- minute: minute,
64
- offset: offset,
65
- player: row['Player'],
66
- owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
67
- penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
68
- notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
69
- }
70
-
71
- new( **attributes )
72
- end
73
-
74
-
75
- ## match id
76
- attr_reader :team1,
77
- :team2,
78
- :round, ## optional
79
- :date ## optional
80
-
81
- ## main attributes
82
- attr_reader :score1,
83
- :score2,
84
- :player,
85
- :minute,
86
- :offset,
87
- :owngoal,
88
- :penalty,
89
- :notes
90
-
91
-
92
- ## todo/check: or just use match.hash or such if match mapping known - why? why not?
93
- def match_id
94
- if round
95
- "#{@team1} - #{@team2} | #{@round}"
96
- else
97
- "#{@team1} - #{@team2} | #{@date}"
98
- end
99
- end
100
-
101
-
102
- def owngoal?() @owngoal==true; end
103
- def penalty?() @penalty==true; end
104
-
105
- def initialize( team1:,
106
- team2:,
107
- round: nil,
108
- date: nil,
109
- score1:,
110
- score2:,
111
- player:,
112
- minute:,
113
- offset: nil,
114
- owngoal: false,
115
- penalty: false,
116
- notes: nil
117
- )
118
- @team1 = team1
119
- @team2 = team2
120
- @round = round
121
- @date = date
122
-
123
- @score1 = score1
124
- @score2 = score2
125
- @player = player
126
- @minute = minute
127
- @offset = offset
128
- @owngoal = owngoal
129
- @penalty = penalty
130
- @notes = notes
131
- end
132
-
133
-
134
- ## note: lets you use normalize teams or such acts like a Match struct
135
- def update( **kwargs )
136
- ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
137
- @team1 = kwargs[:team1] if kwargs.has_key? :team1
138
- @team2 = kwargs[:team2] if kwargs.has_key? :team2
139
- end
140
- end # class GoalEvent
141
-
142
-
143
- ### extend "basic" goal struct with goal event build
144
- class Goal ### nested (non-freestanding) inside match (match is parent)
145
-
146
- def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
147
- ## build an array of goal structs from (csv) recs
148
- recs = []
149
-
150
- last_score1 = 0
151
- last_score2 = 0
152
-
153
- events.each do |event|
154
-
155
- if last_score1+1 == event.score1 && last_score2 == event.score2
156
- team = 1
157
- elsif last_score2+1 == event.score2 && last_score1 == event.score1
158
- team = 2
159
- else
160
- puts "!! ERROR - unexpected score advance (one goal at a time expected):"
161
- puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
162
- exit 1
163
- end
164
-
165
- last_score1 = event.score1
166
- last_score2 = event.score2
167
-
168
-
169
- attributes = {
170
- score1: event.score1,
171
- score2: event.score2,
172
- team: team,
173
- minute: event.minute,
174
- offset: event.offset,
175
- player: event.player,
176
- owngoal: event.owngoal,
177
- penalty: event.penalty,
178
- notes: event.notes
179
- }
180
-
181
- recs << new( **attributes )
182
- end
183
-
184
- recs
185
- end
186
- end # class Goal
187
-
188
-
189
- end # module Sports
190
-
191
-
192
-
@@ -1,28 +0,0 @@
1
-
2
- module SportDb
3
- class CsvGoalParser
4
-
5
-
6
- def self.read( path )
7
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
- parse( txt )
9
- end
10
-
11
- def self.parse( txt )
12
- new( txt ).parse
13
- end
14
-
15
-
16
- def initialize( txt )
17
- @txt = txt
18
- end
19
-
20
- def parse
21
- rows = parse_csv( @txt )
22
- recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
- ## pp recs[0]
24
- recs
25
- end
26
-
27
- end # class CsvGoalParser
28
- end # module Sports