sportdb-quick 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -2
- data/README.md +20 -1
- data/Rakefile +1 -1
- data/bin/fbt +6 -6
- data/lib/sportdb/quick/csv/goal.rb +192 -0
- data/lib/sportdb/quick/csv/goal_parser_csv.rb +28 -0
- data/lib/sportdb/quick/csv/match_parser_csv.rb +490 -0
- data/lib/sportdb/quick/csv/match_status_parser.rb +90 -0
- data/lib/sportdb/quick/quick_league_outline_reader.rb +9 -0
- data/lib/sportdb/quick/version.rb +1 -1
- data/lib/sportdb/quick.rb +37 -2
- metadata +8 -6
- data/lib/sportdb/quick/linter.rb +0 -149
- data/lib/sportdb/quick/outline_reader.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ee93b658f2f6fce94a131931ea37908a2edbfa19a182ec20072447b57ffc847
|
4
|
+
data.tar.gz: f5fa5ab664d71ea9a2ab8c62e1bd30d42fa678e0b2a7564124623dc5d7cd2963
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0df0b970600ee24a88d09ddffec1caa8ef94cfa75561569638da954357cec2d97ffc7ea566f03e42ec814a5998c99da79f861c54a863a439f1778a3f0426ab0
|
7
|
+
data.tar.gz: 01e1ad205c4d271a68136784a17b8f1b7987bdfa5e9b4ba5f395b42f6b488a53747525ecd7bea1e2c188012219529d7611e7d35f49ec4e5e7d499cc791e4e80a
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -4,10 +4,12 @@ README.md
|
|
4
4
|
Rakefile
|
5
5
|
bin/fbt
|
6
6
|
lib/sportdb/quick.rb
|
7
|
-
lib/sportdb/quick/
|
7
|
+
lib/sportdb/quick/csv/goal.rb
|
8
|
+
lib/sportdb/quick/csv/goal_parser_csv.rb
|
9
|
+
lib/sportdb/quick/csv/match_parser_csv.rb
|
10
|
+
lib/sportdb/quick/csv/match_status_parser.rb
|
8
11
|
lib/sportdb/quick/match_parser.rb
|
9
12
|
lib/sportdb/quick/opts.rb
|
10
|
-
lib/sportdb/quick/outline_reader.rb
|
11
13
|
lib/sportdb/quick/quick_league_outline_reader.rb
|
12
14
|
lib/sportdb/quick/quick_match_reader.rb
|
13
15
|
lib/sportdb/quick/version.rb
|
data/README.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
|
-
# sportdb-quick - football.txt (quick) match
|
1
|
+
# sportdb-quick - football.txt (quick) match readers and more
|
2
2
|
|
3
3
|
|
4
4
|
|
5
5
|
|
6
|
+
## Usage
|
7
|
+
|
8
|
+
|
9
|
+
``` ruby
|
10
|
+
require 'sportdb/quick'
|
11
|
+
|
12
|
+
|
13
|
+
# path = "./euro/2024--germany/euro.txt"
|
14
|
+
path = "./deutschland/2024-25/1-bundesliga.txt"
|
15
|
+
|
16
|
+
matches = SportDb::QuickMatchReader.read( path )
|
17
|
+
pp matches
|
18
|
+
|
19
|
+
# try json for matches
|
20
|
+
data = matches.map {|match| match.as_json }
|
21
|
+
pp data
|
22
|
+
```
|
23
|
+
|
24
|
+
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ Hoe.spec 'sportdb-quick' do
|
|
6
6
|
|
7
7
|
self.version = SportDb::Module::Quick::VERSION
|
8
8
|
|
9
|
-
self.summary = "sportdb-quick - football.txt (quick) match
|
9
|
+
self.summary = "sportdb-quick - football.txt (quick) match readers and more"
|
10
10
|
self.description = summary
|
11
11
|
|
12
12
|
self.urls = { home: 'https://github.com/sportdb/sport.db' }
|
data/bin/fbt
CHANGED
@@ -27,7 +27,7 @@ require 'optparse'
|
|
27
27
|
args = ARGV
|
28
28
|
opts = { debug: false,
|
29
29
|
metal: false,
|
30
|
-
quick:
|
30
|
+
quick: true }
|
31
31
|
|
32
32
|
parser = OptionParser.new do |parser|
|
33
33
|
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
@@ -39,19 +39,19 @@ require 'optparse'
|
|
39
39
|
|
40
40
|
parser.on( "--verbose", "--debug",
|
41
41
|
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
42
|
-
opts[:debug] =
|
42
|
+
opts[:debug] = true
|
43
43
|
end
|
44
44
|
|
45
45
|
parser.on( "--metal",
|
46
46
|
"turn off typed parse tree; show to the metal tokens"+
|
47
47
|
" (default: #{opts[:metal]})" ) do |metal|
|
48
|
-
opts[:metal] =
|
48
|
+
opts[:metal] = true
|
49
49
|
end
|
50
50
|
|
51
51
|
parser.on( "--quick",
|
52
52
|
"use quick match reader; output matches in json"+
|
53
53
|
" (default: #{opts[:quick]})" ) do |quick|
|
54
|
-
opts[:quick] =
|
54
|
+
opts[:quick] = true
|
55
55
|
end
|
56
56
|
end
|
57
57
|
parser.parse!( args )
|
@@ -92,9 +92,9 @@ if opts[:quick]
|
|
92
92
|
puts " #{data.size} match(es)"
|
93
93
|
end
|
94
94
|
else
|
95
|
-
SportDb::
|
95
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
96
96
|
|
97
|
-
linter = SportDb::
|
97
|
+
linter = SportDb::Parser::Linter.new
|
98
98
|
|
99
99
|
errors = []
|
100
100
|
|
@@ -0,0 +1,192 @@
|
|
1
|
+
|
2
|
+
module Sports
|
3
|
+
|
4
|
+
## "free-standing" goal event - for import/export in separate event / goal datafiles
|
5
|
+
## returned by CsvGoalParser and others
|
6
|
+
class GoalEvent
|
7
|
+
|
8
|
+
def self.build( row ) ## rename to parse or such - why? why not?
|
9
|
+
|
10
|
+
## split match_id
|
11
|
+
team_str, more_str = row['Match'].split( '|' )
|
12
|
+
team1_str, team2_str = team_str.split( ' - ' )
|
13
|
+
|
14
|
+
more_str = more_str.strip
|
15
|
+
team1_str = team1_str.strip
|
16
|
+
team2_str = team2_str.strip
|
17
|
+
|
18
|
+
# check if more_str is a date otherwise assume round
|
19
|
+
date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
|
20
|
+
'%b %d'
|
21
|
+
elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
|
22
|
+
'%b %d %Y'
|
23
|
+
else
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
|
27
|
+
if date_fmt
|
28
|
+
date = Date.strptime( more_str, date_fmt )
|
29
|
+
round = nil
|
30
|
+
else
|
31
|
+
date = nil
|
32
|
+
round = more_str
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
values = row['Score'].split('-')
|
37
|
+
values = values.map { |value| value.strip }
|
38
|
+
score1 = values[0].to_i
|
39
|
+
score2 = values[1].to_i
|
40
|
+
|
41
|
+
minute = nil
|
42
|
+
offset = nil
|
43
|
+
if m=%r{([0-9]+)
|
44
|
+
(?:[ ]+
|
45
|
+
\+([0-9]+)
|
46
|
+
)?
|
47
|
+
['.]
|
48
|
+
$}x.match( row['Minute'])
|
49
|
+
minute = m[1].to_i
|
50
|
+
offset = m[2] ? m[2].to_i : nil
|
51
|
+
else
|
52
|
+
puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
|
53
|
+
exit 1
|
54
|
+
end
|
55
|
+
|
56
|
+
attributes = {
|
57
|
+
team1: team1_str,
|
58
|
+
team2: team2_str,
|
59
|
+
date: date,
|
60
|
+
round: round,
|
61
|
+
score1: score1,
|
62
|
+
score2: score2,
|
63
|
+
minute: minute,
|
64
|
+
offset: offset,
|
65
|
+
player: row['Player'],
|
66
|
+
owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
|
67
|
+
penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
|
68
|
+
notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
|
69
|
+
}
|
70
|
+
|
71
|
+
new( **attributes )
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
## match id
|
76
|
+
attr_reader :team1,
|
77
|
+
:team2,
|
78
|
+
:round, ## optional
|
79
|
+
:date ## optional
|
80
|
+
|
81
|
+
## main attributes
|
82
|
+
attr_reader :score1,
|
83
|
+
:score2,
|
84
|
+
:player,
|
85
|
+
:minute,
|
86
|
+
:offset,
|
87
|
+
:owngoal,
|
88
|
+
:penalty,
|
89
|
+
:notes
|
90
|
+
|
91
|
+
|
92
|
+
## todo/check: or just use match.hash or such if match mapping known - why? why not?
|
93
|
+
def match_id
|
94
|
+
if round
|
95
|
+
"#{@team1} - #{@team2} | #{@round}"
|
96
|
+
else
|
97
|
+
"#{@team1} - #{@team2} | #{@date}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
def owngoal?() @owngoal==true; end
|
103
|
+
def penalty?() @penalty==true; end
|
104
|
+
|
105
|
+
def initialize( team1:,
|
106
|
+
team2:,
|
107
|
+
round: nil,
|
108
|
+
date: nil,
|
109
|
+
score1:,
|
110
|
+
score2:,
|
111
|
+
player:,
|
112
|
+
minute:,
|
113
|
+
offset: nil,
|
114
|
+
owngoal: false,
|
115
|
+
penalty: false,
|
116
|
+
notes: nil
|
117
|
+
)
|
118
|
+
@team1 = team1
|
119
|
+
@team2 = team2
|
120
|
+
@round = round
|
121
|
+
@date = date
|
122
|
+
|
123
|
+
@score1 = score1
|
124
|
+
@score2 = score2
|
125
|
+
@player = player
|
126
|
+
@minute = minute
|
127
|
+
@offset = offset
|
128
|
+
@owngoal = owngoal
|
129
|
+
@penalty = penalty
|
130
|
+
@notes = notes
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
## note: lets you use normalize teams or such acts like a Match struct
|
135
|
+
def update( **kwargs )
|
136
|
+
## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
|
137
|
+
@team1 = kwargs[:team1] if kwargs.has_key? :team1
|
138
|
+
@team2 = kwargs[:team2] if kwargs.has_key? :team2
|
139
|
+
end
|
140
|
+
end # class GoalEvent
|
141
|
+
|
142
|
+
|
143
|
+
### extend "basic" goal struct with goal event build
|
144
|
+
class Goal ### nested (non-freestanding) inside match (match is parent)
|
145
|
+
|
146
|
+
def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
|
147
|
+
## build an array of goal structs from (csv) recs
|
148
|
+
recs = []
|
149
|
+
|
150
|
+
last_score1 = 0
|
151
|
+
last_score2 = 0
|
152
|
+
|
153
|
+
events.each do |event|
|
154
|
+
|
155
|
+
if last_score1+1 == event.score1 && last_score2 == event.score2
|
156
|
+
team = 1
|
157
|
+
elsif last_score2+1 == event.score2 && last_score1 == event.score1
|
158
|
+
team = 2
|
159
|
+
else
|
160
|
+
puts "!! ERROR - unexpected score advance (one goal at a time expected):"
|
161
|
+
puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
|
162
|
+
exit 1
|
163
|
+
end
|
164
|
+
|
165
|
+
last_score1 = event.score1
|
166
|
+
last_score2 = event.score2
|
167
|
+
|
168
|
+
|
169
|
+
attributes = {
|
170
|
+
score1: event.score1,
|
171
|
+
score2: event.score2,
|
172
|
+
team: team,
|
173
|
+
minute: event.minute,
|
174
|
+
offset: event.offset,
|
175
|
+
player: event.player,
|
176
|
+
owngoal: event.owngoal,
|
177
|
+
penalty: event.penalty,
|
178
|
+
notes: event.notes
|
179
|
+
}
|
180
|
+
|
181
|
+
recs << new( **attributes )
|
182
|
+
end
|
183
|
+
|
184
|
+
recs
|
185
|
+
end
|
186
|
+
end # class Goal
|
187
|
+
|
188
|
+
|
189
|
+
end # module Sports
|
190
|
+
|
191
|
+
|
192
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvGoalParser
|
4
|
+
|
5
|
+
|
6
|
+
def self.read( path )
|
7
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
8
|
+
parse( txt )
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.parse( txt )
|
12
|
+
new( txt ).parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def initialize( txt )
|
17
|
+
@txt = txt
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse
|
21
|
+
rows = parse_csv( @txt )
|
22
|
+
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
23
|
+
## pp recs[0]
|
24
|
+
recs
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class CsvGoalParser
|
28
|
+
end # module Sports
|
@@ -0,0 +1,490 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvMatchParser
|
4
|
+
|
5
|
+
#############
|
6
|
+
# helpers
|
7
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
8
|
+
|
9
|
+
## check if headers incl. season if yes,has priority over col mapping
|
10
|
+
## e.g. no need to specify twice (if using headers)
|
11
|
+
col = headers[:season] if headers && headers[:season]
|
12
|
+
|
13
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
14
|
+
|
15
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
16
|
+
## use read_csv with block to switch to foreach!!!!
|
17
|
+
rows = read_csv( path, sep: sep )
|
18
|
+
|
19
|
+
rows.each_with_index do |row,i|
|
20
|
+
puts "[#{i}] " + row.inspect if i < 2
|
21
|
+
|
22
|
+
season = row[ col ] ## column name defaults to 'Season'
|
23
|
+
seasons[ season ] += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
pp seasons
|
27
|
+
|
28
|
+
## note: only return season keys/names (not hash with usage counter)
|
29
|
+
seasons.keys
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
##########
|
34
|
+
# main machinery
|
35
|
+
|
36
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
37
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
38
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
39
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
40
|
+
## CsvHash.parse( text, sep: sep )
|
41
|
+
## end
|
42
|
+
|
43
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
44
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
45
|
+
parse( txt, headers: headers,
|
46
|
+
filters: filters,
|
47
|
+
converters: converters,
|
48
|
+
sep: sep )
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
52
|
+
new( txt ).parse( headers: headers,
|
53
|
+
filters: filters,
|
54
|
+
converters: converters,
|
55
|
+
sep: sep )
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def initialize( txt )
|
60
|
+
@txt = txt
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
64
|
+
|
65
|
+
headers_mapping = {}
|
66
|
+
|
67
|
+
rows = parse_csv( @txt, sep: sep )
|
68
|
+
|
69
|
+
return [] if rows.empty? ## no rows / empty?
|
70
|
+
|
71
|
+
|
72
|
+
## fix/todo: use logger!!!!
|
73
|
+
## pp csv
|
74
|
+
|
75
|
+
if headers ## use user supplied headers if present
|
76
|
+
headers_mapping = headers_mapping.merge( headers )
|
77
|
+
else
|
78
|
+
|
79
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
80
|
+
headers = rows[0].keys
|
81
|
+
pp headers
|
82
|
+
|
83
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
84
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
85
|
+
|
86
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
87
|
+
## assume our own football.csv format, see github.com/footballcsv
|
88
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
89
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
90
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
91
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
92
|
+
|
93
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
94
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
95
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
96
|
+
|
97
|
+
headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
|
98
|
+
|
99
|
+
## optional headers - note: find_header returns nil if header NOT found
|
100
|
+
header_stage = find_header( headers, ['Stage'] )
|
101
|
+
headers_mapping[:stage] = header_stage if header_stage
|
102
|
+
|
103
|
+
header_group = find_header( headers, ['Group'] )
|
104
|
+
headers_mapping[:group] = header_group if header_group
|
105
|
+
|
106
|
+
|
107
|
+
header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
|
108
|
+
headers_mapping[:score_et] = header_et if header_et
|
109
|
+
|
110
|
+
header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
|
111
|
+
headers_mapping[:score_p] = header_p if header_p
|
112
|
+
|
113
|
+
header_notes = find_header( headers, ['Notes', 'Comments'] )
|
114
|
+
headers_mapping[:notes] = header_notes if header_notes
|
115
|
+
|
116
|
+
|
117
|
+
header_league = find_header( headers, ['League'] )
|
118
|
+
headers_mapping[:league] = header_league if header_league
|
119
|
+
else
|
120
|
+
## else try footballdata.uk and others
|
121
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
122
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
123
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
124
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
125
|
+
|
126
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
127
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
128
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
129
|
+
|
130
|
+
## check for half time scores ?
|
131
|
+
## note: HT = Half Time
|
132
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
133
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
pp headers_mapping
|
138
|
+
|
139
|
+
### todo/fix: check headers - how?
|
140
|
+
## if present HomeTeam or HT required etc.
|
141
|
+
## issue error/warn is not present
|
142
|
+
##
|
143
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
144
|
+
## exit 1
|
145
|
+
##
|
146
|
+
|
147
|
+
matches = []
|
148
|
+
|
149
|
+
rows.each_with_index do |row,i|
|
150
|
+
|
151
|
+
## fix/todo: use logger!!!!
|
152
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
153
|
+
|
154
|
+
|
155
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
156
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
157
|
+
skip = false
|
158
|
+
filters.each do |header, value|
|
159
|
+
if row[ header ] != value ## e.g. row['Season']
|
160
|
+
skip = true
|
161
|
+
break
|
162
|
+
end
|
163
|
+
end
|
164
|
+
next if skip ## if header values NOT matching
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
## note:
|
169
|
+
## add converters after filters for now (why not before filters?)
|
170
|
+
if converters ## any converters defined?
|
171
|
+
## convert single proc shortcut to array with single converter
|
172
|
+
converters = [converters] if converters.is_a?( Proc )
|
173
|
+
|
174
|
+
## assumes array of procs
|
175
|
+
converters.each do |converter|
|
176
|
+
row = converter.call( row )
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
183
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
184
|
+
|
185
|
+
|
186
|
+
## check if data present - if not skip (might be empty row)
|
187
|
+
## note: (old classic) csv reader returns nil for empty fields
|
188
|
+
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
189
|
+
if (team1.nil? || team1.empty?) &&
|
190
|
+
(team2.nil? || team2.empty?)
|
191
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
192
|
+
pp row
|
193
|
+
next
|
194
|
+
end
|
195
|
+
|
196
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
197
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
198
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
199
|
+
|
200
|
+
|
201
|
+
|
202
|
+
col = row[ headers_mapping[ :time ]]
|
203
|
+
|
204
|
+
if col.nil?
|
205
|
+
time = nil
|
206
|
+
else
|
207
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
208
|
+
|
209
|
+
if col.empty?
|
210
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
211
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
212
|
+
## note: allow missing / unknown date for match
|
213
|
+
time = nil
|
214
|
+
else
|
215
|
+
if col =~ /^\d{1,2}:\d{2}$/
|
216
|
+
time_fmt = '%H:%M' # e.g. 17:00 or 3:00
|
217
|
+
elsif col =~ /^\d{1,2}.\d{2}$/
|
218
|
+
time_fmt = '%H.%M' # e.g. 17:00 or 3:00
|
219
|
+
else
|
220
|
+
puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
|
221
|
+
## todo/fix: add to errors/warns list - why? why not?
|
222
|
+
exit 1
|
223
|
+
end
|
224
|
+
|
225
|
+
## todo/check: use date object (keep string?) - why? why not?
|
226
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
227
|
+
time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
col = row[ headers_mapping[ :date ]]
|
234
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
235
|
+
|
236
|
+
if col.empty? ||
|
237
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
238
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
239
|
+
## note: allow missing / unknown date for match
|
240
|
+
date = nil
|
241
|
+
else
|
242
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
243
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
244
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
245
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
246
|
+
|
247
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
248
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
249
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
250
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
251
|
+
elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
|
252
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
253
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
254
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
255
|
+
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
256
|
+
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
257
|
+
else
|
258
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
259
|
+
## todo/fix: add to errors/warns list - why? why not?
|
260
|
+
exit 1
|
261
|
+
end
|
262
|
+
|
263
|
+
## todo/check: use date object (keep string?) - why? why not?
|
264
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
265
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
##
|
270
|
+
## todo/fix: round might not always be just a simple integer number!!!
|
271
|
+
## might be text such as Final | Leg 1 or such!!!!
|
272
|
+
round = nil
|
273
|
+
## check for (optional) round / matchday
|
274
|
+
if headers_mapping[ :round ]
|
275
|
+
col = row[ headers_mapping[ :round ]]
|
276
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
277
|
+
## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
278
|
+
|
279
|
+
## note: make round always a string for now!!!! e.g. "1", "2" too!!
|
280
|
+
round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
281
|
+
## note: allow missing round for match / defaults to nil
|
282
|
+
nil
|
283
|
+
else
|
284
|
+
col
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
score1 = nil
|
290
|
+
score2 = nil
|
291
|
+
score1i = nil
|
292
|
+
score2i = nil
|
293
|
+
|
294
|
+
## check for full time scores ?
|
295
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
296
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
297
|
+
row[ headers_mapping[ :score2 ]] ]
|
298
|
+
|
299
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
300
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
301
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
302
|
+
end
|
303
|
+
|
304
|
+
## check for half time scores ?
|
305
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
306
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
307
|
+
row[ headers_mapping[ :score2i ]] ]
|
308
|
+
|
309
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
310
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
311
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
## check for all-in-one full time scores?
|
316
|
+
if headers_mapping[ :score ]
|
317
|
+
col = row[ headers_mapping[ :score ]]
|
318
|
+
score = parse_score( col )
|
319
|
+
if score
|
320
|
+
score1 = score[0]
|
321
|
+
score2 = score[1]
|
322
|
+
else
|
323
|
+
puts "!! ERROR - invalid score (ft) format >#{col}<:"
|
324
|
+
pp row
|
325
|
+
exit 1
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
if headers_mapping[ :scorei ]
|
330
|
+
col = row[ headers_mapping[ :scorei ]]
|
331
|
+
score = parse_score( col )
|
332
|
+
if score
|
333
|
+
score1i = score[0]
|
334
|
+
score2i = score[1]
|
335
|
+
else
|
336
|
+
puts "!! ERROR - invalid score (ht) format >#{col}<:"
|
337
|
+
pp row
|
338
|
+
exit 1
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
####
|
343
|
+
## try optional score - extra time (et) and penalities (p/pen)
|
344
|
+
score1et = nil
|
345
|
+
score2et = nil
|
346
|
+
score1p = nil
|
347
|
+
score2p = nil
|
348
|
+
|
349
|
+
if headers_mapping[ :score_et ]
|
350
|
+
col = row[ headers_mapping[ :score_et ]]
|
351
|
+
score = parse_score( col )
|
352
|
+
if score
|
353
|
+
score1et = score[0]
|
354
|
+
score2et = score[1]
|
355
|
+
else
|
356
|
+
puts "!! ERROR - invalid score (et) format >#{col}<:"
|
357
|
+
pp row
|
358
|
+
exit 1
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
if headers_mapping[ :score_p ]
|
363
|
+
col = row[ headers_mapping[ :score_p ]]
|
364
|
+
score = parse_score( col )
|
365
|
+
if score
|
366
|
+
score1p = score[0]
|
367
|
+
score2p = score[1]
|
368
|
+
else
|
369
|
+
puts "!! ERROR - invalid score (p) format >#{col}<:"
|
370
|
+
pp row
|
371
|
+
exit 1
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
## try some optional headings / columns
|
377
|
+
stage = nil
|
378
|
+
if headers_mapping[ :stage ]
|
379
|
+
col = row[ headers_mapping[ :stage ]]
|
380
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
381
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
382
|
+
## note: allow missing stage for match / defaults to "regular"
|
383
|
+
nil
|
384
|
+
elsif col == '?'
|
385
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
386
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
387
|
+
else
|
388
|
+
col
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
group = nil
|
393
|
+
if headers_mapping[ :group ]
|
394
|
+
col = row[ headers_mapping[ :group ]]
|
395
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
396
|
+
group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
397
|
+
## note: allow missing stage for match / defaults to "regular"
|
398
|
+
nil
|
399
|
+
else
|
400
|
+
col
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
|
405
|
+
if headers_mapping[ :notes ]
|
406
|
+
col = row[ headers_mapping[ :notes ]]
|
407
|
+
## check for optional (match) status in notes / comments
|
408
|
+
status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
409
|
+
nil
|
410
|
+
else
|
411
|
+
StatusParser.parse( col ) # note: returns nil if no (match) status found
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
league = nil
|
417
|
+
league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
|
418
|
+
|
419
|
+
|
420
|
+
## puts 'match attributes:'
|
421
|
+
attributes = {
|
422
|
+
date: date,
|
423
|
+
time: time,
|
424
|
+
team1: team1, team2: team2,
|
425
|
+
score1: score1, score2: score2,
|
426
|
+
score1i: score1i, score2i: score2i,
|
427
|
+
score1et: score1et, score2et: score2et,
|
428
|
+
score1p: score1p, score2p: score2p,
|
429
|
+
round: round,
|
430
|
+
stage: stage,
|
431
|
+
group: group,
|
432
|
+
status: status,
|
433
|
+
league: league
|
434
|
+
}
|
435
|
+
## pp attributes
|
436
|
+
|
437
|
+
match = Sports::Match.new( **attributes )
|
438
|
+
matches << match
|
439
|
+
end
|
440
|
+
|
441
|
+
## pp matches
|
442
|
+
matches
|
443
|
+
end
|
444
|
+
|
445
|
+
|
446
|
+
private
|
447
|
+
|
448
|
+
def find_header( headers, candidates )
|
449
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
450
|
+
|
451
|
+
candidates.each do |candidate|
|
452
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
453
|
+
end
|
454
|
+
nil ## no matching header found!!!
|
455
|
+
end
|
456
|
+
|
457
|
+
########
|
458
|
+
# more helpers
|
459
|
+
#
|
460
|
+
|
461
|
+
def parse_score( str )
|
462
|
+
if str.nil? ## todo/check: remove nil case - possible? - why? why not?
|
463
|
+
[nil,nil]
|
464
|
+
else
|
465
|
+
## remove (optional single) note/footnote/endnote markers
|
466
|
+
## e.g. (*) or (a), (b),
|
467
|
+
## or [*], [A], [1], etc.
|
468
|
+
## - allow (1) or maybe (*1) in the future - why? why not?
|
469
|
+
str = str.sub( /\( [a-z*] \)
|
470
|
+
|
|
471
|
+
\[ [1-9a-z*] \]
|
472
|
+
/ix, '' ).strip
|
473
|
+
|
474
|
+
if str.empty? || str == '?' || str == '-' || str == 'n/a'
|
475
|
+
[nil,nil]
|
476
|
+
### todo/check: use regex with named capture groups here - why? why not?
|
477
|
+
elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
|
478
|
+
score = str.split( /[:-]/ )
|
479
|
+
[score[0].to_i, score[1].to_i]
|
480
|
+
else
|
481
|
+
nil ## note: returns nil if invalid / unparseable format!!!
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end # method parse_score
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
end # class CsvMatchParser
|
489
|
+
end # module Sports
|
490
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#####################
|
2
|
+
# helpers for parsing & finding match status e.g.
|
3
|
+
# - cancelled / canceled
|
4
|
+
# - awarded
|
5
|
+
# - abandoned
|
6
|
+
# - replay
|
7
|
+
# etc.
|
8
|
+
|
9
|
+
|
10
|
+
module SportDb
|
11
|
+
|
12
|
+
|
13
|
+
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
+
|
15
|
+
class Status
|
16
|
+
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
+
# move class into Match e.g. Match::Status - why? why not?
|
18
|
+
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
+
AWARDED = 'AWARDED'
|
20
|
+
POSTPONED = 'POSTPONED'
|
21
|
+
ABANDONED = 'ABANDONED'
|
22
|
+
REPLAY = 'REPLAY'
|
23
|
+
end # class Status
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
class StatusParser
|
28
|
+
|
29
|
+
def self.parse( str )
|
30
|
+
## note: returns nil if no match found
|
31
|
+
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
+
if str =~ /^(cancelled|
|
33
|
+
canceled|
|
34
|
+
can\.
|
35
|
+
)/xi
|
36
|
+
Status::CANCELLED
|
37
|
+
elsif str =~ /^(awarded|
|
38
|
+
awd\.
|
39
|
+
)/xi
|
40
|
+
Status::AWARDED
|
41
|
+
elsif str =~ /^(postponed
|
42
|
+
)/xi
|
43
|
+
Status::POSTPONED
|
44
|
+
elsif str =~ /^(abandoned|
|
45
|
+
abd\.
|
46
|
+
)/xi
|
47
|
+
Status::ABANDONED
|
48
|
+
elsif str =~ /^(replay
|
49
|
+
)/xi
|
50
|
+
Status::REPLAY
|
51
|
+
else
|
52
|
+
# no match
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
RUN_RE = /\[
|
59
|
+
(?<text>[^\]]+)
|
60
|
+
\]
|
61
|
+
/x
|
62
|
+
def self.find!( line )
|
63
|
+
## for now check all "protected" text run blocks e.g. []
|
64
|
+
## puts "line: >#{line}<"
|
65
|
+
|
66
|
+
status = nil
|
67
|
+
|
68
|
+
str = line
|
69
|
+
while m = str.match( RUN_RE )
|
70
|
+
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
+
|
72
|
+
## check for status match
|
73
|
+
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
+
text = m[:text].strip
|
75
|
+
## puts " text: >#{text}<"
|
76
|
+
|
77
|
+
status = parse( text )
|
78
|
+
|
79
|
+
if status
|
80
|
+
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
+
break
|
82
|
+
end
|
83
|
+
end # while match
|
84
|
+
|
85
|
+
status
|
86
|
+
end # method find!
|
87
|
+
end # class StatusParser
|
88
|
+
|
89
|
+
end # module SportDb
|
90
|
+
|
@@ -41,6 +41,15 @@ class QuickLeagueOutlineReader
|
|
41
41
|
pp heading
|
42
42
|
exit 1
|
43
43
|
end
|
44
|
+
elsif node[0] == :h2
|
45
|
+
## todo/check - make sure parsed h1 first
|
46
|
+
heading = node[1]
|
47
|
+
## reuse league, season from h1
|
48
|
+
secs << { league: secs[-1][:league],
|
49
|
+
season: secs[-1][:season],
|
50
|
+
stage: heading,
|
51
|
+
lines: []
|
52
|
+
}
|
44
53
|
elsif node[0] == :p ## paragraph with (text) lines
|
45
54
|
lines = node[1]
|
46
55
|
## note: skip lines if no heading seen
|
data/lib/sportdb/quick.rb
CHANGED
@@ -17,8 +17,10 @@ end
|
|
17
17
|
## our own code
|
18
18
|
require_relative 'quick/version'
|
19
19
|
require_relative 'quick/opts'
|
20
|
-
|
21
|
-
require_relative 'quick/
|
20
|
+
|
21
|
+
# require_relative 'quick/linter'
|
22
|
+
# require_relative 'quick/outline_reader'
|
23
|
+
|
22
24
|
|
23
25
|
require_relative 'quick/match_parser'
|
24
26
|
|
@@ -26,6 +28,39 @@ require_relative 'quick/quick_league_outline_reader'
|
|
26
28
|
require_relative 'quick/quick_match_reader'
|
27
29
|
|
28
30
|
|
31
|
+
|
32
|
+
|
33
|
+
###
|
34
|
+
# csv (tabular dataset) support / machinery
|
35
|
+
require_relative 'quick/csv/match_status_parser'
|
36
|
+
require_relative 'quick/csv/goal'
|
37
|
+
require_relative 'quick/csv/goal_parser_csv'
|
38
|
+
require_relative 'quick/csv/match_parser_csv'
|
39
|
+
|
40
|
+
|
41
|
+
### add convenience shortcut helpers
|
42
|
+
module Sports
|
43
|
+
class Match
|
44
|
+
def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
45
|
+
SportDb::CsvMatchParser.read( path,
|
46
|
+
headers: headers,
|
47
|
+
filters: filters,
|
48
|
+
converters: converters,
|
49
|
+
sep: sep )
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
53
|
+
SportDb::CsvMatchParser.parse( txt,
|
54
|
+
headers: headers,
|
55
|
+
filters: filters,
|
56
|
+
converters: converters,
|
57
|
+
sep: sep )
|
58
|
+
end
|
59
|
+
end # class Match
|
60
|
+
end # module Sports
|
61
|
+
|
62
|
+
|
63
|
+
|
29
64
|
puts SportDb::Module::Quick.banner # say hello
|
30
65
|
|
31
66
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-quick
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-parser
|
@@ -86,7 +86,7 @@ dependencies:
|
|
86
86
|
- - "~>"
|
87
87
|
- !ruby/object:Gem::Version
|
88
88
|
version: '4.1'
|
89
|
-
description: sportdb-quick - football.txt (quick) match
|
89
|
+
description: sportdb-quick - football.txt (quick) match readers and more
|
90
90
|
email: gerald.bauer@gmail.com
|
91
91
|
executables:
|
92
92
|
- fbt
|
@@ -102,10 +102,12 @@ files:
|
|
102
102
|
- Rakefile
|
103
103
|
- bin/fbt
|
104
104
|
- lib/sportdb/quick.rb
|
105
|
-
- lib/sportdb/quick/
|
105
|
+
- lib/sportdb/quick/csv/goal.rb
|
106
|
+
- lib/sportdb/quick/csv/goal_parser_csv.rb
|
107
|
+
- lib/sportdb/quick/csv/match_parser_csv.rb
|
108
|
+
- lib/sportdb/quick/csv/match_status_parser.rb
|
106
109
|
- lib/sportdb/quick/match_parser.rb
|
107
110
|
- lib/sportdb/quick/opts.rb
|
108
|
-
- lib/sportdb/quick/outline_reader.rb
|
109
111
|
- lib/sportdb/quick/quick_league_outline_reader.rb
|
110
112
|
- lib/sportdb/quick/quick_match_reader.rb
|
111
113
|
- lib/sportdb/quick/version.rb
|
@@ -133,5 +135,5 @@ requirements: []
|
|
133
135
|
rubygems_version: 3.4.10
|
134
136
|
signing_key:
|
135
137
|
specification_version: 4
|
136
|
-
summary: sportdb-quick - football.txt (quick) match
|
138
|
+
summary: sportdb-quick - football.txt (quick) match readers and more
|
137
139
|
test_files: []
|
data/lib/sportdb/quick/linter.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Quick
|
4
|
-
|
5
|
-
###
|
6
|
-
## note - Linter for now nested inside Parser - keep? why? why not?
|
7
|
-
class Linter
|
8
|
-
|
9
|
-
def self.debug=(value) @@debug = value; end
|
10
|
-
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
|
-
def debug?() self.class.debug?; end
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
attr_reader :errors
|
16
|
-
|
17
|
-
def initialize
|
18
|
-
@errors = []
|
19
|
-
@parser = Parser.new ## use own parser instance (not shared) - why? why not?
|
20
|
-
end
|
21
|
-
|
22
|
-
|
23
|
-
def errors?() @errors.size > 0; end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
28
|
-
## make sure mon feb 12 18:10 will not match
|
29
|
-
## allow 1. FC Köln etc.
|
30
|
-
## Mainz 05:
|
31
|
-
## limit to 30 chars max
|
32
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
33
|
-
##
|
34
|
-
## Group A:
|
35
|
-
## Group B: - remove colon
|
36
|
-
## or lookup first
|
37
|
-
|
38
|
-
ATTRIB_RE = %r{^
|
39
|
-
[ ]*? # slurp leading spaces
|
40
|
-
(?<key>[^:|\]\[()\/; -]
|
41
|
-
[^:|\]\[()\/;]{0,30}
|
42
|
-
)
|
43
|
-
[ ]*? # slurp trailing spaces
|
44
|
-
:[ ]+
|
45
|
-
(?<value>.+)
|
46
|
-
[ ]*? # slurp trailing spaces
|
47
|
-
$
|
48
|
-
}ix
|
49
|
-
|
50
|
-
|
51
|
-
#########
|
52
|
-
## parse - false (default) - tokenize (only)
|
53
|
-
## - true - tokenize & parse
|
54
|
-
def read( path, parse: false )
|
55
|
-
## note: every (new) read call - resets errors list to empty
|
56
|
-
@errors = []
|
57
|
-
|
58
|
-
nodes = OutlineReader.read( path )
|
59
|
-
|
60
|
-
## process nodes
|
61
|
-
h1 = nil
|
62
|
-
orphans = 0 ## track paragraphs's with no heading
|
63
|
-
|
64
|
-
attrib_found = false
|
65
|
-
|
66
|
-
|
67
|
-
nodes.each do |node|
|
68
|
-
type = node[0]
|
69
|
-
|
70
|
-
if type == :h1
|
71
|
-
h1 = node[1] ## get heading text
|
72
|
-
puts
|
73
|
-
puts " = Heading 1 >#{node[1]}<"
|
74
|
-
elsif type == :p
|
75
|
-
|
76
|
-
if h1.nil?
|
77
|
-
orphans += 1 ## only warn once
|
78
|
-
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
79
|
-
next
|
80
|
-
end
|
81
|
-
|
82
|
-
lines = node[1]
|
83
|
-
|
84
|
-
tree = []
|
85
|
-
lines.each_with_index do |line,i|
|
86
|
-
|
87
|
-
if debug?
|
88
|
-
puts
|
89
|
-
puts "line >#{line}<"
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
## skip new (experimental attrib syntax)
|
94
|
-
if attrib_found == false &&
|
95
|
-
ATTRIB_RE.match?( line )
|
96
|
-
## note: check attrib regex AFTER group def e.g.:
|
97
|
-
## Group A:
|
98
|
-
## Group B: etc.
|
99
|
-
## todo/fix - change Group A: to Group A etc.
|
100
|
-
## Group B: to Group B
|
101
|
-
attrib_found = true
|
102
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
103
|
-
next
|
104
|
-
end
|
105
|
-
|
106
|
-
if attrib_found
|
107
|
-
## check if line ends with dot
|
108
|
-
## if not slurp up lines to the next do!!!
|
109
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
110
|
-
attrib_found = false if line.end_with?( '.' )
|
111
|
-
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
112
|
-
next
|
113
|
-
end
|
114
|
-
|
115
|
-
t, error_messages = if parse
|
116
|
-
@parser.parse_with_errors( line )
|
117
|
-
else
|
118
|
-
@parser.tokenize_with_errors( line )
|
119
|
-
end
|
120
|
-
|
121
|
-
|
122
|
-
if error_messages.size > 0
|
123
|
-
## add to "global" error list
|
124
|
-
## make a triplet tuple (file / msg / line text)
|
125
|
-
error_messages.each do |msg|
|
126
|
-
@errors << [ path,
|
127
|
-
msg,
|
128
|
-
line
|
129
|
-
]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
pp t if debug?
|
134
|
-
|
135
|
-
tree << t
|
136
|
-
end
|
137
|
-
|
138
|
-
## pp tree
|
139
|
-
else
|
140
|
-
pp node
|
141
|
-
raise ArgumentError, "unsupported (node) type >#{type}<"
|
142
|
-
end
|
143
|
-
end # each node
|
144
|
-
end # read
|
145
|
-
end # class Linter
|
146
|
-
|
147
|
-
|
148
|
-
end # module Quick
|
149
|
-
end # module SportDb
|
@@ -1,97 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
class OutlineReader
|
6
|
-
|
7
|
-
def self.debug=(value) @@debug = value; end
|
8
|
-
def self.debug?() @@debug ||= false; end
|
9
|
-
def debug?() self.class.debug?; end
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
14
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
15
|
-
parse( txt )
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.parse( txt )
|
19
|
-
new( txt ).parse
|
20
|
-
end
|
21
|
-
|
22
|
-
def initialize( txt )
|
23
|
-
@txt = txt
|
24
|
-
end
|
25
|
-
|
26
|
-
## note: skip "decorative" only heading e.g. ========
|
27
|
-
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
28
|
-
HEADING_BLANK_RE = %r{\A
|
29
|
-
={1,}
|
30
|
-
\z}x
|
31
|
-
|
32
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
33
|
-
HEADING_RE = %r{\A
|
34
|
-
(?<marker>={1,}) ## 1. leading ======
|
35
|
-
[ ]*
|
36
|
-
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
37
|
-
[ ]*
|
38
|
-
=* ## 3. (optional) trailing ====
|
39
|
-
\z}x
|
40
|
-
|
41
|
-
def parse
|
42
|
-
outline=[] ## outline structure
|
43
|
-
start_para = true ## start new para(graph) on new text line?
|
44
|
-
|
45
|
-
@txt.each_line do |line|
|
46
|
-
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
47
|
-
|
48
|
-
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
49
|
-
start_para = true
|
50
|
-
next
|
51
|
-
end
|
52
|
-
|
53
|
-
break if line == '__END__'
|
54
|
-
|
55
|
-
next if line.start_with?( '#' ) ## skip comments too
|
56
|
-
## strip inline (until end-of-line) comments too
|
57
|
-
## e.g Eupen | KAS Eupen ## [de]
|
58
|
-
## => Eupen | KAS Eupen
|
59
|
-
## e.g bq Bonaire, BOE # CONCACAF
|
60
|
-
## => bq Bonaire, BOE
|
61
|
-
line = line.sub( /#.*/, '' ).strip
|
62
|
-
pp line if debug?
|
63
|
-
|
64
|
-
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
65
|
-
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
66
|
-
|
67
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
68
|
-
if m=HEADING_RE.match( line )
|
69
|
-
start_para = true
|
70
|
-
|
71
|
-
heading_marker = m[:marker]
|
72
|
-
heading_level = heading_marker.length ## count number of = for heading level
|
73
|
-
heading = m[:text].strip
|
74
|
-
|
75
|
-
puts "heading #{heading_level} >#{heading}<" if debug?
|
76
|
-
outline << [:"h#{heading_level}", heading]
|
77
|
-
else ## assume it's a (plain/regular) text line
|
78
|
-
if start_para
|
79
|
-
outline << [:p, [line]]
|
80
|
-
start_para = false
|
81
|
-
else
|
82
|
-
node = outline[-1] ## get last entry
|
83
|
-
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
84
|
-
node[1] << line ## add line to p(aragraph)
|
85
|
-
else
|
86
|
-
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
87
|
-
pp node
|
88
|
-
exit 1
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
outline
|
94
|
-
end # method read
|
95
|
-
end # class OutlineReader
|
96
|
-
|
97
|
-
end # module SportDb
|