sportdb-quick 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -2
- data/README.md +20 -1
- data/Rakefile +1 -1
- data/bin/fbt +3 -3
- data/lib/sportdb/quick/csv/goal.rb +192 -0
- data/lib/sportdb/quick/csv/goal_parser_csv.rb +28 -0
- data/lib/sportdb/quick/csv/match_parser_csv.rb +490 -0
- data/lib/sportdb/quick/csv/match_status_parser.rb +90 -0
- data/lib/sportdb/quick/version.rb +2 -2
- data/lib/sportdb/quick.rb +37 -2
- metadata +8 -6
- data/lib/sportdb/quick/linter.rb +0 -149
- data/lib/sportdb/quick/outline_reader.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83d71fcb827a95a654d7294a4ac51e351b9e3990ad059c1efa60d75a0ead0cb1
|
4
|
+
data.tar.gz: 30bafb710ef313eac75519621d70906b74144c063af1d413b4f2a008cc177ca3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 33c746e0840990423eee328165d4c7bb6331db50a9e3e751845df28cfe5fd42356b4683906ad90c796e069819cea6048eaeec3fcfb787ff3b284336d833bd09d
|
7
|
+
data.tar.gz: 4ea46d2e6e8f7b7f979dc5a537534e303cdd13dcd83ee6ffb2ddff505834ad2d60347f9fa3cf9c8b6c2de7a75d661b9e314e59ad1130ab4b4a6077b9af0be1eb
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -4,10 +4,12 @@ README.md
|
|
4
4
|
Rakefile
|
5
5
|
bin/fbt
|
6
6
|
lib/sportdb/quick.rb
|
7
|
-
lib/sportdb/quick/
|
7
|
+
lib/sportdb/quick/csv/goal.rb
|
8
|
+
lib/sportdb/quick/csv/goal_parser_csv.rb
|
9
|
+
lib/sportdb/quick/csv/match_parser_csv.rb
|
10
|
+
lib/sportdb/quick/csv/match_status_parser.rb
|
8
11
|
lib/sportdb/quick/match_parser.rb
|
9
12
|
lib/sportdb/quick/opts.rb
|
10
|
-
lib/sportdb/quick/outline_reader.rb
|
11
13
|
lib/sportdb/quick/quick_league_outline_reader.rb
|
12
14
|
lib/sportdb/quick/quick_match_reader.rb
|
13
15
|
lib/sportdb/quick/version.rb
|
data/README.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
|
-
# sportdb-quick - football.txt (quick) match
|
1
|
+
# sportdb-quick - football.txt (quick) match readers and more
|
2
2
|
|
3
3
|
|
4
4
|
|
5
5
|
|
6
|
+
## Usage
|
7
|
+
|
8
|
+
|
9
|
+
``` ruby
|
10
|
+
require 'sportdb/quick'
|
11
|
+
|
12
|
+
|
13
|
+
# path = "./euro/2024--germany/euro.txt"
|
14
|
+
path = "./deutschland/2024-25/1-bundesliga.txt"
|
15
|
+
|
16
|
+
matches = SportDb::QuickMatchReader.read( path )
|
17
|
+
pp matches
|
18
|
+
|
19
|
+
# try json for matches
|
20
|
+
data = matches.map {|match| match.as_json }
|
21
|
+
pp data
|
22
|
+
```
|
23
|
+
|
24
|
+
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ Hoe.spec 'sportdb-quick' do
|
|
6
6
|
|
7
7
|
self.version = SportDb::Module::Quick::VERSION
|
8
8
|
|
9
|
-
self.summary = "sportdb-quick - football.txt (quick) match
|
9
|
+
self.summary = "sportdb-quick - football.txt (quick) match readers and more"
|
10
10
|
self.description = summary
|
11
11
|
|
12
12
|
self.urls = { home: 'https://github.com/sportdb/sport.db' }
|
data/bin/fbt
CHANGED
@@ -27,7 +27,7 @@ require 'optparse'
|
|
27
27
|
args = ARGV
|
28
28
|
opts = { debug: false,
|
29
29
|
metal: false,
|
30
|
-
quick:
|
30
|
+
quick: true }
|
31
31
|
|
32
32
|
parser = OptionParser.new do |parser|
|
33
33
|
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
@@ -92,9 +92,9 @@ if opts[:quick]
|
|
92
92
|
puts " #{data.size} match(es)"
|
93
93
|
end
|
94
94
|
else
|
95
|
-
SportDb::
|
95
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
96
96
|
|
97
|
-
linter = SportDb::
|
97
|
+
linter = SportDb::Parser::Linter.new
|
98
98
|
|
99
99
|
errors = []
|
100
100
|
|
@@ -0,0 +1,192 @@
|
|
1
|
+
|
2
|
+
module Sports
|
3
|
+
|
4
|
+
## "free-standing" goal event - for import/export in separate event / goal datafiles
|
5
|
+
## returned by CsvGoalParser and others
|
6
|
+
class GoalEvent
|
7
|
+
|
8
|
+
def self.build( row ) ## rename to parse or such - why? why not?
|
9
|
+
|
10
|
+
## split match_id
|
11
|
+
team_str, more_str = row['Match'].split( '|' )
|
12
|
+
team1_str, team2_str = team_str.split( ' - ' )
|
13
|
+
|
14
|
+
more_str = more_str.strip
|
15
|
+
team1_str = team1_str.strip
|
16
|
+
team2_str = team2_str.strip
|
17
|
+
|
18
|
+
# check if more_str is a date otherwise assume round
|
19
|
+
date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
|
20
|
+
'%b %d'
|
21
|
+
elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
|
22
|
+
'%b %d %Y'
|
23
|
+
else
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
|
27
|
+
if date_fmt
|
28
|
+
date = Date.strptime( more_str, date_fmt )
|
29
|
+
round = nil
|
30
|
+
else
|
31
|
+
date = nil
|
32
|
+
round = more_str
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
values = row['Score'].split('-')
|
37
|
+
values = values.map { |value| value.strip }
|
38
|
+
score1 = values[0].to_i
|
39
|
+
score2 = values[1].to_i
|
40
|
+
|
41
|
+
minute = nil
|
42
|
+
offset = nil
|
43
|
+
if m=%r{([0-9]+)
|
44
|
+
(?:[ ]+
|
45
|
+
\+([0-9]+)
|
46
|
+
)?
|
47
|
+
['.]
|
48
|
+
$}x.match( row['Minute'])
|
49
|
+
minute = m[1].to_i
|
50
|
+
offset = m[2] ? m[2].to_i : nil
|
51
|
+
else
|
52
|
+
puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
|
53
|
+
exit 1
|
54
|
+
end
|
55
|
+
|
56
|
+
attributes = {
|
57
|
+
team1: team1_str,
|
58
|
+
team2: team2_str,
|
59
|
+
date: date,
|
60
|
+
round: round,
|
61
|
+
score1: score1,
|
62
|
+
score2: score2,
|
63
|
+
minute: minute,
|
64
|
+
offset: offset,
|
65
|
+
player: row['Player'],
|
66
|
+
owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
|
67
|
+
penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
|
68
|
+
notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
|
69
|
+
}
|
70
|
+
|
71
|
+
new( **attributes )
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
## match id
|
76
|
+
attr_reader :team1,
|
77
|
+
:team2,
|
78
|
+
:round, ## optional
|
79
|
+
:date ## optional
|
80
|
+
|
81
|
+
## main attributes
|
82
|
+
attr_reader :score1,
|
83
|
+
:score2,
|
84
|
+
:player,
|
85
|
+
:minute,
|
86
|
+
:offset,
|
87
|
+
:owngoal,
|
88
|
+
:penalty,
|
89
|
+
:notes
|
90
|
+
|
91
|
+
|
92
|
+
## todo/check: or just use match.hash or such if match mapping known - why? why not?
|
93
|
+
def match_id
|
94
|
+
if round
|
95
|
+
"#{@team1} - #{@team2} | #{@round}"
|
96
|
+
else
|
97
|
+
"#{@team1} - #{@team2} | #{@date}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
def owngoal?() @owngoal==true; end
|
103
|
+
def penalty?() @penalty==true; end
|
104
|
+
|
105
|
+
def initialize( team1:,
|
106
|
+
team2:,
|
107
|
+
round: nil,
|
108
|
+
date: nil,
|
109
|
+
score1:,
|
110
|
+
score2:,
|
111
|
+
player:,
|
112
|
+
minute:,
|
113
|
+
offset: nil,
|
114
|
+
owngoal: false,
|
115
|
+
penalty: false,
|
116
|
+
notes: nil
|
117
|
+
)
|
118
|
+
@team1 = team1
|
119
|
+
@team2 = team2
|
120
|
+
@round = round
|
121
|
+
@date = date
|
122
|
+
|
123
|
+
@score1 = score1
|
124
|
+
@score2 = score2
|
125
|
+
@player = player
|
126
|
+
@minute = minute
|
127
|
+
@offset = offset
|
128
|
+
@owngoal = owngoal
|
129
|
+
@penalty = penalty
|
130
|
+
@notes = notes
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
## note: lets you use normalize teams or such acts like a Match struct
|
135
|
+
def update( **kwargs )
|
136
|
+
## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
|
137
|
+
@team1 = kwargs[:team1] if kwargs.has_key? :team1
|
138
|
+
@team2 = kwargs[:team2] if kwargs.has_key? :team2
|
139
|
+
end
|
140
|
+
end # class GoalEvent
|
141
|
+
|
142
|
+
|
143
|
+
### extend "basic" goal struct with goal event build
|
144
|
+
class Goal ### nested (non-freestanding) inside match (match is parent)
|
145
|
+
|
146
|
+
def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
|
147
|
+
## build an array of goal structs from (csv) recs
|
148
|
+
recs = []
|
149
|
+
|
150
|
+
last_score1 = 0
|
151
|
+
last_score2 = 0
|
152
|
+
|
153
|
+
events.each do |event|
|
154
|
+
|
155
|
+
if last_score1+1 == event.score1 && last_score2 == event.score2
|
156
|
+
team = 1
|
157
|
+
elsif last_score2+1 == event.score2 && last_score1 == event.score1
|
158
|
+
team = 2
|
159
|
+
else
|
160
|
+
puts "!! ERROR - unexpected score advance (one goal at a time expected):"
|
161
|
+
puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
|
162
|
+
exit 1
|
163
|
+
end
|
164
|
+
|
165
|
+
last_score1 = event.score1
|
166
|
+
last_score2 = event.score2
|
167
|
+
|
168
|
+
|
169
|
+
attributes = {
|
170
|
+
score1: event.score1,
|
171
|
+
score2: event.score2,
|
172
|
+
team: team,
|
173
|
+
minute: event.minute,
|
174
|
+
offset: event.offset,
|
175
|
+
player: event.player,
|
176
|
+
owngoal: event.owngoal,
|
177
|
+
penalty: event.penalty,
|
178
|
+
notes: event.notes
|
179
|
+
}
|
180
|
+
|
181
|
+
recs << new( **attributes )
|
182
|
+
end
|
183
|
+
|
184
|
+
recs
|
185
|
+
end
|
186
|
+
end # class Goal
|
187
|
+
|
188
|
+
|
189
|
+
end # module Sports
|
190
|
+
|
191
|
+
|
192
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvGoalParser
|
4
|
+
|
5
|
+
|
6
|
+
def self.read( path )
|
7
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
8
|
+
parse( txt )
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.parse( txt )
|
12
|
+
new( txt ).parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def initialize( txt )
|
17
|
+
@txt = txt
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse
|
21
|
+
rows = parse_csv( @txt )
|
22
|
+
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
23
|
+
## pp recs[0]
|
24
|
+
recs
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class CsvGoalParser
|
28
|
+
end # module Sports
|
@@ -0,0 +1,490 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvMatchParser
|
4
|
+
|
5
|
+
#############
|
6
|
+
# helpers
|
7
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
8
|
+
|
9
|
+
## check if headers incl. season if yes,has priority over col mapping
|
10
|
+
## e.g. no need to specify twice (if using headers)
|
11
|
+
col = headers[:season] if headers && headers[:season]
|
12
|
+
|
13
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
14
|
+
|
15
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
16
|
+
## use read_csv with block to switch to foreach!!!!
|
17
|
+
rows = read_csv( path, sep: sep )
|
18
|
+
|
19
|
+
rows.each_with_index do |row,i|
|
20
|
+
puts "[#{i}] " + row.inspect if i < 2
|
21
|
+
|
22
|
+
season = row[ col ] ## column name defaults to 'Season'
|
23
|
+
seasons[ season ] += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
pp seasons
|
27
|
+
|
28
|
+
## note: only return season keys/names (not hash with usage counter)
|
29
|
+
seasons.keys
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
##########
|
34
|
+
# main machinery
|
35
|
+
|
36
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
37
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
38
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
39
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
40
|
+
## CsvHash.parse( text, sep: sep )
|
41
|
+
## end
|
42
|
+
|
43
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
44
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
45
|
+
parse( txt, headers: headers,
|
46
|
+
filters: filters,
|
47
|
+
converters: converters,
|
48
|
+
sep: sep )
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
52
|
+
new( txt ).parse( headers: headers,
|
53
|
+
filters: filters,
|
54
|
+
converters: converters,
|
55
|
+
sep: sep )
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def initialize( txt )
|
60
|
+
@txt = txt
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
64
|
+
|
65
|
+
headers_mapping = {}
|
66
|
+
|
67
|
+
rows = parse_csv( @txt, sep: sep )
|
68
|
+
|
69
|
+
return [] if rows.empty? ## no rows / empty?
|
70
|
+
|
71
|
+
|
72
|
+
## fix/todo: use logger!!!!
|
73
|
+
## pp csv
|
74
|
+
|
75
|
+
if headers ## use user supplied headers if present
|
76
|
+
headers_mapping = headers_mapping.merge( headers )
|
77
|
+
else
|
78
|
+
|
79
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
80
|
+
headers = rows[0].keys
|
81
|
+
pp headers
|
82
|
+
|
83
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
84
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
85
|
+
|
86
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
87
|
+
## assume our own football.csv format, see github.com/footballcsv
|
88
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
89
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
90
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
91
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
92
|
+
|
93
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
94
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
95
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
96
|
+
|
97
|
+
headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
|
98
|
+
|
99
|
+
## optional headers - note: find_header returns nil if header NOT found
|
100
|
+
header_stage = find_header( headers, ['Stage'] )
|
101
|
+
headers_mapping[:stage] = header_stage if header_stage
|
102
|
+
|
103
|
+
header_group = find_header( headers, ['Group'] )
|
104
|
+
headers_mapping[:group] = header_group if header_group
|
105
|
+
|
106
|
+
|
107
|
+
header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
|
108
|
+
headers_mapping[:score_et] = header_et if header_et
|
109
|
+
|
110
|
+
header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
|
111
|
+
headers_mapping[:score_p] = header_p if header_p
|
112
|
+
|
113
|
+
header_notes = find_header( headers, ['Notes', 'Comments'] )
|
114
|
+
headers_mapping[:notes] = header_notes if header_notes
|
115
|
+
|
116
|
+
|
117
|
+
header_league = find_header( headers, ['League'] )
|
118
|
+
headers_mapping[:league] = header_league if header_league
|
119
|
+
else
|
120
|
+
## else try footballdata.uk and others
|
121
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
122
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
123
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
124
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
125
|
+
|
126
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
127
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
128
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
129
|
+
|
130
|
+
## check for half time scores ?
|
131
|
+
## note: HT = Half Time
|
132
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
133
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
pp headers_mapping
|
138
|
+
|
139
|
+
### todo/fix: check headers - how?
|
140
|
+
## if present HomeTeam or HT required etc.
|
141
|
+
## issue error/warn is not present
|
142
|
+
##
|
143
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
144
|
+
## exit 1
|
145
|
+
##
|
146
|
+
|
147
|
+
matches = []
|
148
|
+
|
149
|
+
rows.each_with_index do |row,i|
|
150
|
+
|
151
|
+
## fix/todo: use logger!!!!
|
152
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
153
|
+
|
154
|
+
|
155
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
156
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
157
|
+
skip = false
|
158
|
+
filters.each do |header, value|
|
159
|
+
if row[ header ] != value ## e.g. row['Season']
|
160
|
+
skip = true
|
161
|
+
break
|
162
|
+
end
|
163
|
+
end
|
164
|
+
next if skip ## if header values NOT matching
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
## note:
|
169
|
+
## add converters after filters for now (why not before filters?)
|
170
|
+
if converters ## any converters defined?
|
171
|
+
## convert single proc shortcut to array with single converter
|
172
|
+
converters = [converters] if converters.is_a?( Proc )
|
173
|
+
|
174
|
+
## assumes array of procs
|
175
|
+
converters.each do |converter|
|
176
|
+
row = converter.call( row )
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
183
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
184
|
+
|
185
|
+
|
186
|
+
## check if data present - if not skip (might be empty row)
|
187
|
+
## note: (old classic) csv reader returns nil for empty fields
|
188
|
+
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
189
|
+
if (team1.nil? || team1.empty?) &&
|
190
|
+
(team2.nil? || team2.empty?)
|
191
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
192
|
+
pp row
|
193
|
+
next
|
194
|
+
end
|
195
|
+
|
196
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
197
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
198
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
199
|
+
|
200
|
+
|
201
|
+
|
202
|
+
col = row[ headers_mapping[ :time ]]
|
203
|
+
|
204
|
+
if col.nil?
|
205
|
+
time = nil
|
206
|
+
else
|
207
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
208
|
+
|
209
|
+
if col.empty?
|
210
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
211
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
212
|
+
## note: allow missing / unknown date for match
|
213
|
+
time = nil
|
214
|
+
else
|
215
|
+
if col =~ /^\d{1,2}:\d{2}$/
|
216
|
+
time_fmt = '%H:%M' # e.g. 17:00 or 3:00
|
217
|
+
elsif col =~ /^\d{1,2}.\d{2}$/
|
218
|
+
time_fmt = '%H.%M' # e.g. 17:00 or 3:00
|
219
|
+
else
|
220
|
+
puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
|
221
|
+
## todo/fix: add to errors/warns list - why? why not?
|
222
|
+
exit 1
|
223
|
+
end
|
224
|
+
|
225
|
+
## todo/check: use date object (keep string?) - why? why not?
|
226
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
227
|
+
time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
col = row[ headers_mapping[ :date ]]
|
234
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
235
|
+
|
236
|
+
if col.empty? ||
|
237
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
238
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
239
|
+
## note: allow missing / unknown date for match
|
240
|
+
date = nil
|
241
|
+
else
|
242
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
243
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
244
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
245
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
246
|
+
|
247
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
248
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
249
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
250
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
251
|
+
elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
|
252
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
253
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
254
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
255
|
+
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
256
|
+
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
257
|
+
else
|
258
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
259
|
+
## todo/fix: add to errors/warns list - why? why not?
|
260
|
+
exit 1
|
261
|
+
end
|
262
|
+
|
263
|
+
## todo/check: use date object (keep string?) - why? why not?
|
264
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
265
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
##
|
270
|
+
## todo/fix: round might not always be just a simple integer number!!!
|
271
|
+
## might be text such as Final | Leg 1 or such!!!!
|
272
|
+
round = nil
|
273
|
+
## check for (optional) round / matchday
|
274
|
+
if headers_mapping[ :round ]
|
275
|
+
col = row[ headers_mapping[ :round ]]
|
276
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
277
|
+
## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
278
|
+
|
279
|
+
## note: make round always a string for now!!!! e.g. "1", "2" too!!
|
280
|
+
round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
281
|
+
## note: allow missing round for match / defaults to nil
|
282
|
+
nil
|
283
|
+
else
|
284
|
+
col
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
score1 = nil
|
290
|
+
score2 = nil
|
291
|
+
score1i = nil
|
292
|
+
score2i = nil
|
293
|
+
|
294
|
+
## check for full time scores ?
|
295
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
296
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
297
|
+
row[ headers_mapping[ :score2 ]] ]
|
298
|
+
|
299
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
300
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
301
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
302
|
+
end
|
303
|
+
|
304
|
+
## check for half time scores ?
|
305
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
306
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
307
|
+
row[ headers_mapping[ :score2i ]] ]
|
308
|
+
|
309
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
310
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
311
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
## check for all-in-one full time scores?
|
316
|
+
if headers_mapping[ :score ]
|
317
|
+
col = row[ headers_mapping[ :score ]]
|
318
|
+
score = parse_score( col )
|
319
|
+
if score
|
320
|
+
score1 = score[0]
|
321
|
+
score2 = score[1]
|
322
|
+
else
|
323
|
+
puts "!! ERROR - invalid score (ft) format >#{col}<:"
|
324
|
+
pp row
|
325
|
+
exit 1
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
if headers_mapping[ :scorei ]
|
330
|
+
col = row[ headers_mapping[ :scorei ]]
|
331
|
+
score = parse_score( col )
|
332
|
+
if score
|
333
|
+
score1i = score[0]
|
334
|
+
score2i = score[1]
|
335
|
+
else
|
336
|
+
puts "!! ERROR - invalid score (ht) format >#{col}<:"
|
337
|
+
pp row
|
338
|
+
exit 1
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
####
|
343
|
+
## try optional score - extra time (et) and penalities (p/pen)
|
344
|
+
score1et = nil
|
345
|
+
score2et = nil
|
346
|
+
score1p = nil
|
347
|
+
score2p = nil
|
348
|
+
|
349
|
+
if headers_mapping[ :score_et ]
|
350
|
+
col = row[ headers_mapping[ :score_et ]]
|
351
|
+
score = parse_score( col )
|
352
|
+
if score
|
353
|
+
score1et = score[0]
|
354
|
+
score2et = score[1]
|
355
|
+
else
|
356
|
+
puts "!! ERROR - invalid score (et) format >#{col}<:"
|
357
|
+
pp row
|
358
|
+
exit 1
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
if headers_mapping[ :score_p ]
|
363
|
+
col = row[ headers_mapping[ :score_p ]]
|
364
|
+
score = parse_score( col )
|
365
|
+
if score
|
366
|
+
score1p = score[0]
|
367
|
+
score2p = score[1]
|
368
|
+
else
|
369
|
+
puts "!! ERROR - invalid score (p) format >#{col}<:"
|
370
|
+
pp row
|
371
|
+
exit 1
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
## try some optional headings / columns
|
377
|
+
stage = nil
|
378
|
+
if headers_mapping[ :stage ]
|
379
|
+
col = row[ headers_mapping[ :stage ]]
|
380
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
381
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
382
|
+
## note: allow missing stage for match / defaults to "regular"
|
383
|
+
nil
|
384
|
+
elsif col == '?'
|
385
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
386
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
387
|
+
else
|
388
|
+
col
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
group = nil
|
393
|
+
if headers_mapping[ :group ]
|
394
|
+
col = row[ headers_mapping[ :group ]]
|
395
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
396
|
+
group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
397
|
+
## note: allow missing stage for match / defaults to "regular"
|
398
|
+
nil
|
399
|
+
else
|
400
|
+
col
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
|
405
|
+
if headers_mapping[ :notes ]
|
406
|
+
col = row[ headers_mapping[ :notes ]]
|
407
|
+
## check for optional (match) status in notes / comments
|
408
|
+
status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
409
|
+
nil
|
410
|
+
else
|
411
|
+
StatusParser.parse( col ) # note: returns nil if no (match) status found
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
league = nil
|
417
|
+
league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
|
418
|
+
|
419
|
+
|
420
|
+
## puts 'match attributes:'
|
421
|
+
attributes = {
|
422
|
+
date: date,
|
423
|
+
time: time,
|
424
|
+
team1: team1, team2: team2,
|
425
|
+
score1: score1, score2: score2,
|
426
|
+
score1i: score1i, score2i: score2i,
|
427
|
+
score1et: score1et, score2et: score2et,
|
428
|
+
score1p: score1p, score2p: score2p,
|
429
|
+
round: round,
|
430
|
+
stage: stage,
|
431
|
+
group: group,
|
432
|
+
status: status,
|
433
|
+
league: league
|
434
|
+
}
|
435
|
+
## pp attributes
|
436
|
+
|
437
|
+
match = Sports::Match.new( **attributes )
|
438
|
+
matches << match
|
439
|
+
end
|
440
|
+
|
441
|
+
## pp matches
|
442
|
+
matches
|
443
|
+
end
|
444
|
+
|
445
|
+
|
446
|
+
private
|
447
|
+
|
448
|
+
def find_header( headers, candidates )
|
449
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
450
|
+
|
451
|
+
candidates.each do |candidate|
|
452
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
453
|
+
end
|
454
|
+
nil ## no matching header found!!!
|
455
|
+
end
|
456
|
+
|
457
|
+
########
|
458
|
+
# more helpers
|
459
|
+
#
|
460
|
+
|
461
|
+
def parse_score( str )
|
462
|
+
if str.nil? ## todo/check: remove nil case - possible? - why? why not?
|
463
|
+
[nil,nil]
|
464
|
+
else
|
465
|
+
## remove (optional single) note/footnote/endnote markers
|
466
|
+
## e.g. (*) or (a), (b),
|
467
|
+
## or [*], [A], [1], etc.
|
468
|
+
## - allow (1) or maybe (*1) in the future - why? why not?
|
469
|
+
str = str.sub( /\( [a-z*] \)
|
470
|
+
|
|
471
|
+
\[ [1-9a-z*] \]
|
472
|
+
/ix, '' ).strip
|
473
|
+
|
474
|
+
if str.empty? || str == '?' || str == '-' || str == 'n/a'
|
475
|
+
[nil,nil]
|
476
|
+
### todo/check: use regex with named capture groups here - why? why not?
|
477
|
+
elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
|
478
|
+
score = str.split( /[:-]/ )
|
479
|
+
[score[0].to_i, score[1].to_i]
|
480
|
+
else
|
481
|
+
nil ## note: returns nil if invalid / unparseable format!!!
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end # method parse_score
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
end # class CsvMatchParser
|
489
|
+
end # module Sports
|
490
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#####################
|
2
|
+
# helpers for parsing & finding match status e.g.
|
3
|
+
# - cancelled / canceled
|
4
|
+
# - awarded
|
5
|
+
# - abandoned
|
6
|
+
# - replay
|
7
|
+
# etc.
|
8
|
+
|
9
|
+
|
10
|
+
module SportDb
|
11
|
+
|
12
|
+
|
13
|
+
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
+
|
15
|
+
class Status
|
16
|
+
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
+
# move class into Match e.g. Match::Status - why? why not?
|
18
|
+
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
+
AWARDED = 'AWARDED'
|
20
|
+
POSTPONED = 'POSTPONED'
|
21
|
+
ABANDONED = 'ABANDONED'
|
22
|
+
REPLAY = 'REPLAY'
|
23
|
+
end # class Status
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
class StatusParser
|
28
|
+
|
29
|
+
def self.parse( str )
|
30
|
+
## note: returns nil if no match found
|
31
|
+
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
+
if str =~ /^(cancelled|
|
33
|
+
canceled|
|
34
|
+
can\.
|
35
|
+
)/xi
|
36
|
+
Status::CANCELLED
|
37
|
+
elsif str =~ /^(awarded|
|
38
|
+
awd\.
|
39
|
+
)/xi
|
40
|
+
Status::AWARDED
|
41
|
+
elsif str =~ /^(postponed
|
42
|
+
)/xi
|
43
|
+
Status::POSTPONED
|
44
|
+
elsif str =~ /^(abandoned|
|
45
|
+
abd\.
|
46
|
+
)/xi
|
47
|
+
Status::ABANDONED
|
48
|
+
elsif str =~ /^(replay
|
49
|
+
)/xi
|
50
|
+
Status::REPLAY
|
51
|
+
else
|
52
|
+
# no match
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
RUN_RE = /\[
|
59
|
+
(?<text>[^\]]+)
|
60
|
+
\]
|
61
|
+
/x
|
62
|
+
def self.find!( line )
|
63
|
+
## for now check all "protected" text run blocks e.g. []
|
64
|
+
## puts "line: >#{line}<"
|
65
|
+
|
66
|
+
status = nil
|
67
|
+
|
68
|
+
str = line
|
69
|
+
while m = str.match( RUN_RE )
|
70
|
+
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
+
|
72
|
+
## check for status match
|
73
|
+
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
+
text = m[:text].strip
|
75
|
+
## puts " text: >#{text}<"
|
76
|
+
|
77
|
+
status = parse( text )
|
78
|
+
|
79
|
+
if status
|
80
|
+
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
+
break
|
82
|
+
end
|
83
|
+
end # while match
|
84
|
+
|
85
|
+
status
|
86
|
+
end # method find!
|
87
|
+
end # class StatusParser
|
88
|
+
|
89
|
+
end # module SportDb
|
90
|
+
|
data/lib/sportdb/quick.rb
CHANGED
@@ -17,8 +17,10 @@ end
|
|
17
17
|
## our own code
|
18
18
|
require_relative 'quick/version'
|
19
19
|
require_relative 'quick/opts'
|
20
|
-
|
21
|
-
require_relative 'quick/
|
20
|
+
|
21
|
+
# require_relative 'quick/linter'
|
22
|
+
# require_relative 'quick/outline_reader'
|
23
|
+
|
22
24
|
|
23
25
|
require_relative 'quick/match_parser'
|
24
26
|
|
@@ -26,6 +28,39 @@ require_relative 'quick/quick_league_outline_reader'
|
|
26
28
|
require_relative 'quick/quick_match_reader'
|
27
29
|
|
28
30
|
|
31
|
+
|
32
|
+
|
33
|
+
###
|
34
|
+
# csv (tabular dataset) support / machinery
|
35
|
+
require_relative 'quick/csv/match_status_parser'
|
36
|
+
require_relative 'quick/csv/goal'
|
37
|
+
require_relative 'quick/csv/goal_parser_csv'
|
38
|
+
require_relative 'quick/csv/match_parser_csv'
|
39
|
+
|
40
|
+
|
41
|
+
### add convenience shortcut helpers
|
42
|
+
module Sports
|
43
|
+
class Match
|
44
|
+
def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
45
|
+
SportDb::CsvMatchParser.read( path,
|
46
|
+
headers: headers,
|
47
|
+
filters: filters,
|
48
|
+
converters: converters,
|
49
|
+
sep: sep )
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
53
|
+
SportDb::CsvMatchParser.parse( txt,
|
54
|
+
headers: headers,
|
55
|
+
filters: filters,
|
56
|
+
converters: converters,
|
57
|
+
sep: sep )
|
58
|
+
end
|
59
|
+
end # class Match
|
60
|
+
end # module Sports
|
61
|
+
|
62
|
+
|
63
|
+
|
29
64
|
puts SportDb::Module::Quick.banner # say hello
|
30
65
|
|
31
66
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-quick
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-parser
|
@@ -86,7 +86,7 @@ dependencies:
|
|
86
86
|
- - "~>"
|
87
87
|
- !ruby/object:Gem::Version
|
88
88
|
version: '4.1'
|
89
|
-
description: sportdb-quick - football.txt (quick) match
|
89
|
+
description: sportdb-quick - football.txt (quick) match readers and more
|
90
90
|
email: gerald.bauer@gmail.com
|
91
91
|
executables:
|
92
92
|
- fbt
|
@@ -102,10 +102,12 @@ files:
|
|
102
102
|
- Rakefile
|
103
103
|
- bin/fbt
|
104
104
|
- lib/sportdb/quick.rb
|
105
|
-
- lib/sportdb/quick/
|
105
|
+
- lib/sportdb/quick/csv/goal.rb
|
106
|
+
- lib/sportdb/quick/csv/goal_parser_csv.rb
|
107
|
+
- lib/sportdb/quick/csv/match_parser_csv.rb
|
108
|
+
- lib/sportdb/quick/csv/match_status_parser.rb
|
106
109
|
- lib/sportdb/quick/match_parser.rb
|
107
110
|
- lib/sportdb/quick/opts.rb
|
108
|
-
- lib/sportdb/quick/outline_reader.rb
|
109
111
|
- lib/sportdb/quick/quick_league_outline_reader.rb
|
110
112
|
- lib/sportdb/quick/quick_match_reader.rb
|
111
113
|
- lib/sportdb/quick/version.rb
|
@@ -133,5 +135,5 @@ requirements: []
|
|
133
135
|
rubygems_version: 3.4.10
|
134
136
|
signing_key:
|
135
137
|
specification_version: 4
|
136
|
-
summary: sportdb-quick - football.txt (quick) match
|
138
|
+
summary: sportdb-quick - football.txt (quick) match readers and more
|
137
139
|
test_files: []
|
data/lib/sportdb/quick/linter.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Quick
|
4
|
-
|
5
|
-
###
|
6
|
-
## note - Linter for now nested inside Parser - keep? why? why not?
|
7
|
-
class Linter
|
8
|
-
|
9
|
-
def self.debug=(value) @@debug = value; end
|
10
|
-
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
|
-
def debug?() self.class.debug?; end
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
attr_reader :errors
|
16
|
-
|
17
|
-
def initialize
|
18
|
-
@errors = []
|
19
|
-
@parser = Parser.new ## use own parser instance (not shared) - why? why not?
|
20
|
-
end
|
21
|
-
|
22
|
-
|
23
|
-
def errors?() @errors.size > 0; end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
28
|
-
## make sure mon feb 12 18:10 will not match
|
29
|
-
## allow 1. FC Köln etc.
|
30
|
-
## Mainz 05:
|
31
|
-
## limit to 30 chars max
|
32
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
33
|
-
##
|
34
|
-
## Group A:
|
35
|
-
## Group B: - remove colon
|
36
|
-
## or lookup first
|
37
|
-
|
38
|
-
ATTRIB_RE = %r{^
|
39
|
-
[ ]*? # slurp leading spaces
|
40
|
-
(?<key>[^:|\]\[()\/; -]
|
41
|
-
[^:|\]\[()\/;]{0,30}
|
42
|
-
)
|
43
|
-
[ ]*? # slurp trailing spaces
|
44
|
-
:[ ]+
|
45
|
-
(?<value>.+)
|
46
|
-
[ ]*? # slurp trailing spaces
|
47
|
-
$
|
48
|
-
}ix
|
49
|
-
|
50
|
-
|
51
|
-
#########
|
52
|
-
## parse - false (default) - tokenize (only)
|
53
|
-
## - true - tokenize & parse
|
54
|
-
def read( path, parse: false )
|
55
|
-
## note: every (new) read call - resets errors list to empty
|
56
|
-
@errors = []
|
57
|
-
|
58
|
-
nodes = OutlineReader.read( path )
|
59
|
-
|
60
|
-
## process nodes
|
61
|
-
h1 = nil
|
62
|
-
orphans = 0 ## track paragraphs's with no heading
|
63
|
-
|
64
|
-
attrib_found = false
|
65
|
-
|
66
|
-
|
67
|
-
nodes.each do |node|
|
68
|
-
type = node[0]
|
69
|
-
|
70
|
-
if type == :h1
|
71
|
-
h1 = node[1] ## get heading text
|
72
|
-
puts
|
73
|
-
puts " = Heading 1 >#{node[1]}<"
|
74
|
-
elsif type == :p
|
75
|
-
|
76
|
-
if h1.nil?
|
77
|
-
orphans += 1 ## only warn once
|
78
|
-
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
79
|
-
next
|
80
|
-
end
|
81
|
-
|
82
|
-
lines = node[1]
|
83
|
-
|
84
|
-
tree = []
|
85
|
-
lines.each_with_index do |line,i|
|
86
|
-
|
87
|
-
if debug?
|
88
|
-
puts
|
89
|
-
puts "line >#{line}<"
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
## skip new (experimental attrib syntax)
|
94
|
-
if attrib_found == false &&
|
95
|
-
ATTRIB_RE.match?( line )
|
96
|
-
## note: check attrib regex AFTER group def e.g.:
|
97
|
-
## Group A:
|
98
|
-
## Group B: etc.
|
99
|
-
## todo/fix - change Group A: to Group A etc.
|
100
|
-
## Group B: to Group B
|
101
|
-
attrib_found = true
|
102
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
103
|
-
next
|
104
|
-
end
|
105
|
-
|
106
|
-
if attrib_found
|
107
|
-
## check if line ends with dot
|
108
|
-
## if not slurp up lines to the next do!!!
|
109
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
110
|
-
attrib_found = false if line.end_with?( '.' )
|
111
|
-
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
112
|
-
next
|
113
|
-
end
|
114
|
-
|
115
|
-
t, error_messages = if parse
|
116
|
-
@parser.parse_with_errors( line )
|
117
|
-
else
|
118
|
-
@parser.tokenize_with_errors( line )
|
119
|
-
end
|
120
|
-
|
121
|
-
|
122
|
-
if error_messages.size > 0
|
123
|
-
## add to "global" error list
|
124
|
-
## make a triplet tuple (file / msg / line text)
|
125
|
-
error_messages.each do |msg|
|
126
|
-
@errors << [ path,
|
127
|
-
msg,
|
128
|
-
line
|
129
|
-
]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
pp t if debug?
|
134
|
-
|
135
|
-
tree << t
|
136
|
-
end
|
137
|
-
|
138
|
-
## pp tree
|
139
|
-
else
|
140
|
-
pp node
|
141
|
-
raise ArgumentError, "unsupported (node) type >#{type}<"
|
142
|
-
end
|
143
|
-
end # each node
|
144
|
-
end # read
|
145
|
-
end # class Linter
|
146
|
-
|
147
|
-
|
148
|
-
end # module Quick
|
149
|
-
end # module SportDb
|
@@ -1,97 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
class OutlineReader
|
6
|
-
|
7
|
-
def self.debug=(value) @@debug = value; end
|
8
|
-
def self.debug?() @@debug ||= false; end
|
9
|
-
def debug?() self.class.debug?; end
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
14
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
15
|
-
parse( txt )
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.parse( txt )
|
19
|
-
new( txt ).parse
|
20
|
-
end
|
21
|
-
|
22
|
-
def initialize( txt )
|
23
|
-
@txt = txt
|
24
|
-
end
|
25
|
-
|
26
|
-
## note: skip "decorative" only heading e.g. ========
|
27
|
-
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
28
|
-
HEADING_BLANK_RE = %r{\A
|
29
|
-
={1,}
|
30
|
-
\z}x
|
31
|
-
|
32
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
33
|
-
HEADING_RE = %r{\A
|
34
|
-
(?<marker>={1,}) ## 1. leading ======
|
35
|
-
[ ]*
|
36
|
-
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
37
|
-
[ ]*
|
38
|
-
=* ## 3. (optional) trailing ====
|
39
|
-
\z}x
|
40
|
-
|
41
|
-
def parse
|
42
|
-
outline=[] ## outline structure
|
43
|
-
start_para = true ## start new para(graph) on new text line?
|
44
|
-
|
45
|
-
@txt.each_line do |line|
|
46
|
-
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
47
|
-
|
48
|
-
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
49
|
-
start_para = true
|
50
|
-
next
|
51
|
-
end
|
52
|
-
|
53
|
-
break if line == '__END__'
|
54
|
-
|
55
|
-
next if line.start_with?( '#' ) ## skip comments too
|
56
|
-
## strip inline (until end-of-line) comments too
|
57
|
-
## e.g Eupen | KAS Eupen ## [de]
|
58
|
-
## => Eupen | KAS Eupen
|
59
|
-
## e.g bq Bonaire, BOE # CONCACAF
|
60
|
-
## => bq Bonaire, BOE
|
61
|
-
line = line.sub( /#.*/, '' ).strip
|
62
|
-
pp line if debug?
|
63
|
-
|
64
|
-
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
65
|
-
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
66
|
-
|
67
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
68
|
-
if m=HEADING_RE.match( line )
|
69
|
-
start_para = true
|
70
|
-
|
71
|
-
heading_marker = m[:marker]
|
72
|
-
heading_level = heading_marker.length ## count number of = for heading level
|
73
|
-
heading = m[:text].strip
|
74
|
-
|
75
|
-
puts "heading #{heading_level} >#{heading}<" if debug?
|
76
|
-
outline << [:"h#{heading_level}", heading]
|
77
|
-
else ## assume it's a (plain/regular) text line
|
78
|
-
if start_para
|
79
|
-
outline << [:p, [line]]
|
80
|
-
start_para = false
|
81
|
-
else
|
82
|
-
node = outline[-1] ## get last entry
|
83
|
-
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
84
|
-
node[1] << line ## add line to p(aragraph)
|
85
|
-
else
|
86
|
-
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
87
|
-
pp node
|
88
|
-
exit 1
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
outline
|
94
|
-
end # method read
|
95
|
-
end # class OutlineReader
|
96
|
-
|
97
|
-
end # module SportDb
|