sportdb-parser 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/bin/fbtok +1 -55
- data/lib/sportdb/parser/fbtok/main.rb +139 -0
- data/lib/sportdb/parser/lang.rb +21 -13
- data/lib/sportdb/parser/opts.rb +78 -0
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +9 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c4d7c1c55b5122f11fc1fdd863e2b8a5cc929b9ae9c57bcc0322400516ab4b8
|
4
|
+
data.tar.gz: f5fcae93a1010e1d74fd4f453cb6a201f8f6e11de1ad0d0c9a572c9bb00bd540
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93cb2dc95acf74b1a26088afe3162f0f87a25e949d3f81cd6408c35ec9c42d7309a7c5f59a4c29870146b72e77ed6d5bc3e14c754abf2925b1564b55cafe0ac3
|
7
|
+
data.tar.gz: 6de22cc1e40f8e423786cef7386ae890533a627b6877f6f17c73d47e6956b2de181c6f3d357a731be4476781c363abaab0de9b21c83a4f88f8b058f8a3a7d005
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -4,8 +4,10 @@ README.md
|
|
4
4
|
Rakefile
|
5
5
|
bin/fbtok
|
6
6
|
lib/sportdb/parser.rb
|
7
|
+
lib/sportdb/parser/fbtok/main.rb
|
7
8
|
lib/sportdb/parser/lang.rb
|
8
9
|
lib/sportdb/parser/linter.rb
|
10
|
+
lib/sportdb/parser/opts.rb
|
9
11
|
lib/sportdb/parser/outline_reader.rb
|
10
12
|
lib/sportdb/parser/parser.rb
|
11
13
|
lib/sportdb/parser/token-date.rb
|
data/bin/fbtok
CHANGED
@@ -6,61 +6,7 @@
|
|
6
6
|
require 'sportdb/parser'
|
7
7
|
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
args=ARGV
|
13
|
-
|
14
|
-
|
15
|
-
opts = {
|
16
|
-
debug: true,
|
17
|
-
metal: false,
|
18
|
-
}
|
19
|
-
|
20
|
-
parser = OptionParser.new do |parser|
|
21
|
-
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
22
|
-
|
23
|
-
parser.on( "--verbose", "--debug",
|
24
|
-
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
25
|
-
opts[:debug] = true
|
26
|
-
end
|
27
|
-
|
28
|
-
parser.on( "--metal",
|
29
|
-
"turn off typed parse tree; show to the metal tokens"+
|
30
|
-
" (default: #{opts[:metal]})" ) do |metal|
|
31
|
-
opts[:metal] = true
|
32
|
-
end
|
33
|
-
end
|
34
|
-
parser.parse!( args )
|
35
|
-
|
36
|
-
puts "OPTS:"
|
37
|
-
p opts
|
38
|
-
puts "ARGV:"
|
39
|
-
p args
|
40
|
-
|
41
|
-
|
42
|
-
SportDb::Parser::Linter.debug = true if opts[:debug]
|
43
|
-
|
44
|
-
linter = SportDb::Parser::Linter.new
|
45
|
-
errors = []
|
46
|
-
|
47
|
-
paths = args
|
48
|
-
paths.each_with_index do |path,i|
|
49
|
-
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
50
|
-
linter.read( path, parse: !opts[:metal] )
|
51
|
-
|
52
|
-
errors += linter.errors if linter.errors?
|
53
|
-
end
|
54
|
-
|
55
|
-
if errors.size > 0
|
56
|
-
puts
|
57
|
-
pp errors
|
58
|
-
puts
|
59
|
-
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
60
|
-
else
|
61
|
-
puts
|
62
|
-
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
63
|
-
end
|
9
|
+
Fbtok.main( ARGV )
|
64
10
|
|
65
11
|
|
66
12
|
puts "bye"
|
@@ -0,0 +1,139 @@
|
|
1
|
+
|
2
|
+
module Fbtok
|
3
|
+
def self.main( args=ARGV )
|
4
|
+
|
5
|
+
opts = {
|
6
|
+
debug: true,
|
7
|
+
metal: false,
|
8
|
+
file: nil,
|
9
|
+
}
|
10
|
+
|
11
|
+
parser = OptionParser.new do |parser|
|
12
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
13
|
+
|
14
|
+
|
15
|
+
parser.on( "-q", "--quiet",
|
16
|
+
"less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
|
17
|
+
opts[:debug] = false
|
18
|
+
end
|
19
|
+
# parser.on( "--verbose", "--debug",
|
20
|
+
# "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
21
|
+
# opts[:debug] = true
|
22
|
+
# end
|
23
|
+
|
24
|
+
parser.on( "--metal",
|
25
|
+
"turn off typed parse tree; show to the metal tokens"+
|
26
|
+
" (default: #{opts[:metal]})" ) do |metal|
|
27
|
+
opts[:metal] = true
|
28
|
+
end
|
29
|
+
|
30
|
+
parser.on( "-f FILE", "--file FILE",
|
31
|
+
"read datafiles (pathspecs) via .csv file") do |file|
|
32
|
+
opts[:file] = file
|
33
|
+
end
|
34
|
+
end
|
35
|
+
parser.parse!( args )
|
36
|
+
|
37
|
+
puts "OPTS:"
|
38
|
+
p opts
|
39
|
+
puts "ARGV:"
|
40
|
+
p args
|
41
|
+
|
42
|
+
|
43
|
+
## todo/check - use packs or projects or such
|
44
|
+
## instead of specs - why? why not?
|
45
|
+
specs = []
|
46
|
+
if opts[:file]
|
47
|
+
recs = read_csv( opts[:file] )
|
48
|
+
pp recs
|
49
|
+
## note - make pathspecs relative to passed in file arg!!!
|
50
|
+
basedir = File.dirname( opts[:file] )
|
51
|
+
recs.each do |rec|
|
52
|
+
paths = SportDb::Parser::Opts.find( rec['path'], dir: basedir )
|
53
|
+
specs << [paths, rec]
|
54
|
+
end
|
55
|
+
else
|
56
|
+
paths = if args.empty?
|
57
|
+
[
|
58
|
+
'../../../openfootball/euro/2021--europe/euro.txt',
|
59
|
+
'../../../openfootball/euro/2024--germany/euro.txt',
|
60
|
+
]
|
61
|
+
else
|
62
|
+
## check for directories
|
63
|
+
## and auto-expand
|
64
|
+
SportDb::Parser::Opts.expand_args( args )
|
65
|
+
end
|
66
|
+
specs << [paths, {}]
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
71
|
+
|
72
|
+
linter = SportDb::Parser::Linter.new
|
73
|
+
|
74
|
+
|
75
|
+
specs.each_with_index do |(paths, rec),i|
|
76
|
+
errors = []
|
77
|
+
|
78
|
+
paths.each_with_index do |path,j|
|
79
|
+
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
80
|
+
linter.read( path, parse: !opts[:metal] )
|
81
|
+
|
82
|
+
errors += linter.errors if linter.errors?
|
83
|
+
end
|
84
|
+
|
85
|
+
if errors.size > 0
|
86
|
+
puts
|
87
|
+
pp errors
|
88
|
+
puts
|
89
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
90
|
+
else
|
91
|
+
puts
|
92
|
+
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
93
|
+
end
|
94
|
+
|
95
|
+
## add errors to rec via rec['errors'] to allow
|
96
|
+
## for further processing/reporting
|
97
|
+
rec['errors'] = errors
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
###
|
102
|
+
## generate a report if --file option used
|
103
|
+
if opts[:file]
|
104
|
+
|
105
|
+
buf = String.new
|
106
|
+
|
107
|
+
buf << "# fbtok summary report - #{specs.size} dataset(s)\n\n"
|
108
|
+
|
109
|
+
specs.each_with_index do |(paths, rec),i|
|
110
|
+
errors = rec['errors']
|
111
|
+
|
112
|
+
if errors.size > 0
|
113
|
+
buf << "!! #{errors.size} ERROR(S) "
|
114
|
+
else
|
115
|
+
buf << " OK "
|
116
|
+
end
|
117
|
+
buf << "%-20s" % rec['path']
|
118
|
+
buf << " - #{paths.size} datafile(s)"
|
119
|
+
buf << "\n"
|
120
|
+
|
121
|
+
if errors.size > 0
|
122
|
+
buf << errors.pretty_inspect
|
123
|
+
buf << "\n"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
puts
|
128
|
+
puts "SUMMARY:"
|
129
|
+
puts buf
|
130
|
+
|
131
|
+
# maybe write out in the future?
|
132
|
+
# basedir = File.dirname( opts[:file] )
|
133
|
+
# basename = File.basename( opts[:file], File.extname( opts[:file] ))
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
end # method self.main
|
139
|
+
end # module Fbtok
|
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -26,11 +26,13 @@ end
|
|
26
26
|
|
27
27
|
|
28
28
|
ROUND_RE = %r{^(
|
29
|
-
|
30
29
|
## add special case for group play-off rounds!
|
31
30
|
## group 2 play-off (e.g. worldcup 1954, 1958)
|
32
|
-
|
33
|
-
|
31
|
+
##
|
32
|
+
### note - allow Group ("stand-alone") as "generic" round for now
|
33
|
+
## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
|
34
|
+
(?: Group [ ] [A-Z0-9]+ [ ] Play-?offs? |
|
35
|
+
Group
|
34
36
|
)
|
35
37
|
|
|
36
38
|
# round - note - requiers number e.g. round 1,2, etc.
|
@@ -44,29 +46,30 @@ ROUND_RE = %r{^(
|
|
44
46
|
)
|
45
47
|
|
|
46
48
|
## starting with qual(ification)
|
47
|
-
##
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
|
50
|
+
## or
|
51
|
+
## Playoff Round 1
|
52
|
+
## Play-in Round 1
|
53
|
+
(?: (?: Qual \. |
|
54
|
+
Play-?off |
|
55
|
+
Play-?in
|
56
|
+
)
|
57
|
+
[ ] Round [ ] [1-9][0-9]* )
|
52
58
|
|
|
53
59
|
## 1. Round / 2. Round / 3. Round / etc.
|
54
|
-
## Play-off Round
|
55
60
|
## First Round
|
61
|
+
## Play-off Round
|
56
62
|
## Final Round (e.g. Worldcup 1950)
|
57
63
|
(?:
|
58
64
|
(?: [1-9][0-9]* \. |
|
59
|
-
Play-?off |
|
60
65
|
1st | First |
|
61
66
|
2nd | Second |
|
67
|
+
Play-?off |
|
62
68
|
Final
|
63
69
|
)
|
64
70
|
[ ] Round
|
65
71
|
)
|
66
72
|
|
|
67
|
-
## Playoff Round 1
|
68
|
-
(?: Play-?off [ ] Round [ ] [1-9][0-9]* )
|
69
|
-
|
|
70
73
|
## starting with preliminary
|
71
74
|
# e.g. Preliminary round
|
72
75
|
(?: Preliminary [ ]
|
@@ -135,6 +138,11 @@ ROUND_RE = %r{^(
|
|
135
138
|
)
|
136
139
|
[ ] Replays?
|
137
140
|
)
|
141
|
+
|
|
142
|
+
## more
|
143
|
+
(?:
|
144
|
+
Reclassification
|
145
|
+
)
|
138
146
|
)$}ix
|
139
147
|
|
140
148
|
|
@@ -0,0 +1,78 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Parser
|
4
|
+
|
5
|
+
|
6
|
+
###
|
7
|
+
## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
|
8
|
+
class Opts
|
9
|
+
|
10
|
+
SEASON_RE = %r{ (?:
|
11
|
+
\d{4}-\d{2}
|
12
|
+
| \d{4}(--[a-z0-9_-]+)?
|
13
|
+
)
|
14
|
+
}x
|
15
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
16
|
+
|
17
|
+
|
18
|
+
## note: if pattern includes directory add here
|
19
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
20
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
21
|
+
#{SEASON}
|
22
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
23
|
+
}x
|
24
|
+
|
25
|
+
|
26
|
+
def self.find( path, dir: nil )
|
27
|
+
## check - rename dir
|
28
|
+
## use root_dir or work_dir or cd or such - why? why not?
|
29
|
+
|
30
|
+
datafiles = []
|
31
|
+
|
32
|
+
## note: normalize path - use File.expand_path ??
|
33
|
+
## change all backslash to slash for now
|
34
|
+
## path = path.gsub( "\\", '/' )
|
35
|
+
path = if dir
|
36
|
+
File.expand_path( path, File.expand_path( dir ))
|
37
|
+
else
|
38
|
+
File.expand_path( path )
|
39
|
+
end
|
40
|
+
|
41
|
+
## check all txt files
|
42
|
+
## note: incl. files starting with dot (.)) as candidates
|
43
|
+
## (normally excluded with just *)
|
44
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
45
|
+
## pp candidates
|
46
|
+
candidates.each do |candidate|
|
47
|
+
datafiles << candidate if MATCH_RE.match( candidate )
|
48
|
+
end
|
49
|
+
|
50
|
+
## pp datafiles
|
51
|
+
datafiles
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
def self.expand_args( args )
|
56
|
+
paths = []
|
57
|
+
|
58
|
+
args.each do |arg|
|
59
|
+
## check if directory
|
60
|
+
if Dir.exist?( arg )
|
61
|
+
datafiles = find( arg )
|
62
|
+
puts
|
63
|
+
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
64
|
+
pp datafiles
|
65
|
+
paths += datafiles
|
66
|
+
else
|
67
|
+
## assume it's a file
|
68
|
+
paths << arg
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
paths
|
73
|
+
end
|
74
|
+
end # class Opts
|
75
|
+
|
76
|
+
|
77
|
+
end # class Parser
|
78
|
+
end # module SportDb
|
data/lib/sportdb/parser.rb
CHANGED
@@ -3,6 +3,10 @@ require 'cocos'
|
|
3
3
|
require 'season/formats' # e.g. Season() support machinery
|
4
4
|
|
5
5
|
|
6
|
+
## more stdlibs
|
7
|
+
require 'optparse' ## check - already auto-required in cocos? keep? why? why not?
|
8
|
+
|
9
|
+
|
6
10
|
|
7
11
|
####
|
8
12
|
# try a (simple) tokenizer/parser with regex
|
@@ -28,7 +32,12 @@ require_relative 'parser/parser'
|
|
28
32
|
## todo/check - move outline reader upstream to cocos - why? why not?
|
29
33
|
## use read_outline(), parse_outline() - why? why not?
|
30
34
|
require_relative 'parser/outline_reader'
|
35
|
+
|
36
|
+
|
37
|
+
require_relative 'parser/opts'
|
31
38
|
require_relative 'parser/linter'
|
39
|
+
require_relative 'parser/fbtok/main'
|
40
|
+
|
32
41
|
|
33
42
|
###
|
34
43
|
# make parser api (easily) available - why? why not?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -88,8 +88,10 @@ files:
|
|
88
88
|
- Rakefile
|
89
89
|
- bin/fbtok
|
90
90
|
- lib/sportdb/parser.rb
|
91
|
+
- lib/sportdb/parser/fbtok/main.rb
|
91
92
|
- lib/sportdb/parser/lang.rb
|
92
93
|
- lib/sportdb/parser/linter.rb
|
94
|
+
- lib/sportdb/parser/opts.rb
|
93
95
|
- lib/sportdb/parser/outline_reader.rb
|
94
96
|
- lib/sportdb/parser/parser.rb
|
95
97
|
- lib/sportdb/parser/token-date.rb
|