sportdb-parser 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/bin/fbtok +1 -55
- data/lib/sportdb/parser/fbtok/main.rb +139 -0
- data/lib/sportdb/parser/lang.rb +3 -0
- data/lib/sportdb/parser/linter.rb +8 -1
- data/lib/sportdb/parser/opts.rb +78 -0
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +9 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c2deeaae15626c5ed7118035667bf689af0426f3580b9ac50abe48ad310aaf1c
|
4
|
+
data.tar.gz: a3cdff8f2ed1dfb7bf3fa116a0d30928de27f25ecbaf63e75cbcdc90fc60fe0d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 865a3b6844014660fddce03d3236a5a1e3dec003282681c8eb4b6c7ad83e2e91e936d5dd4030188de7578d11258707c72543a1e7b198440d5e9486743e648d9b
|
7
|
+
data.tar.gz: f1449389d729388b1fa29086ad7a89d74690351cd5ead9d561a8f7f5ebbc38dec09487612000055343728094bda859200ecaf71dfef7fd42fb2ecec16a078e98
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -4,8 +4,10 @@ README.md
|
|
4
4
|
Rakefile
|
5
5
|
bin/fbtok
|
6
6
|
lib/sportdb/parser.rb
|
7
|
+
lib/sportdb/parser/fbtok/main.rb
|
7
8
|
lib/sportdb/parser/lang.rb
|
8
9
|
lib/sportdb/parser/linter.rb
|
10
|
+
lib/sportdb/parser/opts.rb
|
9
11
|
lib/sportdb/parser/outline_reader.rb
|
10
12
|
lib/sportdb/parser/parser.rb
|
11
13
|
lib/sportdb/parser/token-date.rb
|
data/bin/fbtok
CHANGED
@@ -6,61 +6,7 @@
|
|
6
6
|
require 'sportdb/parser'
|
7
7
|
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
args=ARGV
|
13
|
-
|
14
|
-
|
15
|
-
opts = {
|
16
|
-
debug: true,
|
17
|
-
metal: false,
|
18
|
-
}
|
19
|
-
|
20
|
-
parser = OptionParser.new do |parser|
|
21
|
-
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
22
|
-
|
23
|
-
parser.on( "--verbose", "--debug",
|
24
|
-
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
25
|
-
opts[:debug] = debug
|
26
|
-
end
|
27
|
-
|
28
|
-
parser.on( "--metal",
|
29
|
-
"turn off typed parse tree; show to the metal tokens"+
|
30
|
-
" (default: #{opts[:metal]})" ) do |metal|
|
31
|
-
opts[:metal] = metal
|
32
|
-
end
|
33
|
-
end
|
34
|
-
parser.parse!( args )
|
35
|
-
|
36
|
-
puts "OPTS:"
|
37
|
-
p opts
|
38
|
-
puts "ARGV:"
|
39
|
-
p args
|
40
|
-
|
41
|
-
|
42
|
-
SportDb::Parser::Linter.debug = true if opts[:debug]
|
43
|
-
|
44
|
-
linter = SportDb::Parser::Linter.new
|
45
|
-
errors = []
|
46
|
-
|
47
|
-
paths = args
|
48
|
-
paths.each_with_index do |path,i|
|
49
|
-
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
50
|
-
linter.read( path, parse: !opts[:metal] )
|
51
|
-
|
52
|
-
errors += linter.errors if linter.errors?
|
53
|
-
end
|
54
|
-
|
55
|
-
if errors.size > 0
|
56
|
-
puts
|
57
|
-
pp errors
|
58
|
-
puts
|
59
|
-
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
60
|
-
else
|
61
|
-
puts
|
62
|
-
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
63
|
-
end
|
9
|
+
Fbtok.main( ARGV )
|
64
10
|
|
65
11
|
|
66
12
|
puts "bye"
|
@@ -0,0 +1,139 @@
|
|
1
|
+
|
2
|
+
module Fbtok
|
3
|
+
def self.main( args=ARGV )
|
4
|
+
|
5
|
+
opts = {
|
6
|
+
debug: true,
|
7
|
+
metal: false,
|
8
|
+
file: nil,
|
9
|
+
}
|
10
|
+
|
11
|
+
parser = OptionParser.new do |parser|
|
12
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
13
|
+
|
14
|
+
|
15
|
+
parser.on( "-q", "--quiet",
|
16
|
+
"less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
|
17
|
+
opts[:debug] = false
|
18
|
+
end
|
19
|
+
# parser.on( "--verbose", "--debug",
|
20
|
+
# "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
21
|
+
# opts[:debug] = true
|
22
|
+
# end
|
23
|
+
|
24
|
+
parser.on( "--metal",
|
25
|
+
"turn off typed parse tree; show to the metal tokens"+
|
26
|
+
" (default: #{opts[:metal]})" ) do |metal|
|
27
|
+
opts[:metal] = true
|
28
|
+
end
|
29
|
+
|
30
|
+
parser.on( "-f FILE", "--file FILE",
|
31
|
+
"read datafiles (pathspecs) via .csv file") do |file|
|
32
|
+
opts[:file] = file
|
33
|
+
end
|
34
|
+
end
|
35
|
+
parser.parse!( args )
|
36
|
+
|
37
|
+
puts "OPTS:"
|
38
|
+
p opts
|
39
|
+
puts "ARGV:"
|
40
|
+
p args
|
41
|
+
|
42
|
+
|
43
|
+
## todo/check - use packs or projects or such
|
44
|
+
## instead of specs - why? why not?
|
45
|
+
specs = []
|
46
|
+
if opts[:file]
|
47
|
+
recs = read_csv( opts[:file] )
|
48
|
+
pp recs
|
49
|
+
## note - make pathspecs relative to passed in file arg!!!
|
50
|
+
basedir = File.dirname( opts[:file] )
|
51
|
+
recs.each do |rec|
|
52
|
+
paths = SportDb::Parser::Opts.find( rec['path'], dir: basedir )
|
53
|
+
specs << [paths, rec]
|
54
|
+
end
|
55
|
+
else
|
56
|
+
paths = if args.empty?
|
57
|
+
[
|
58
|
+
'../../../openfootball/euro/2021--europe/euro.txt',
|
59
|
+
'../../../openfootball/euro/2024--germany/euro.txt',
|
60
|
+
]
|
61
|
+
else
|
62
|
+
## check for directories
|
63
|
+
## and auto-expand
|
64
|
+
SportDb::Parser::Opts.expand_args( args )
|
65
|
+
end
|
66
|
+
specs << [paths, {}]
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
71
|
+
|
72
|
+
linter = SportDb::Parser::Linter.new
|
73
|
+
|
74
|
+
|
75
|
+
specs.each_with_index do |(paths, rec),i|
|
76
|
+
errors = []
|
77
|
+
|
78
|
+
paths.each_with_index do |path,j|
|
79
|
+
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
80
|
+
linter.read( path, parse: !opts[:metal] )
|
81
|
+
|
82
|
+
errors += linter.errors if linter.errors?
|
83
|
+
end
|
84
|
+
|
85
|
+
if errors.size > 0
|
86
|
+
puts
|
87
|
+
pp errors
|
88
|
+
puts
|
89
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
90
|
+
else
|
91
|
+
puts
|
92
|
+
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
93
|
+
end
|
94
|
+
|
95
|
+
## add errors to rec via rec['errors'] to allow
|
96
|
+
## for further processing/reporting
|
97
|
+
rec['errors'] = errors
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
###
|
102
|
+
## generate a report if --file option used
|
103
|
+
if opts[:file]
|
104
|
+
|
105
|
+
buf = String.new
|
106
|
+
|
107
|
+
buf << "# fbtok summary report - #{specs.size} dataset(s)\n\n"
|
108
|
+
|
109
|
+
specs.each_with_index do |(paths, rec),i|
|
110
|
+
errors = rec['errors']
|
111
|
+
|
112
|
+
if errors.size > 0
|
113
|
+
buf << "!! #{errors.size} ERROR(S) "
|
114
|
+
else
|
115
|
+
buf << " OK "
|
116
|
+
end
|
117
|
+
buf << "%-20s" % rec['path']
|
118
|
+
buf << " - #{paths.size} datafile(s)"
|
119
|
+
buf << "\n"
|
120
|
+
|
121
|
+
if errors.size > 0
|
122
|
+
buf << errors.pretty_inspect
|
123
|
+
buf << "\n"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
puts
|
128
|
+
puts "SUMMARY:"
|
129
|
+
puts buf
|
130
|
+
|
131
|
+
# maybe write out in the future?
|
132
|
+
# basedir = File.dirname( opts[:file] )
|
133
|
+
# basename = File.basename( opts[:file], File.extname( opts[:file] ))
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
end # method self.main
|
139
|
+
end # module Fbtok
|
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -59,6 +59,7 @@ def read( path, parse: false )
|
|
59
59
|
|
60
60
|
## process nodes
|
61
61
|
h1 = nil
|
62
|
+
h2 = nil
|
62
63
|
orphans = 0 ## track paragraphs's with no heading
|
63
64
|
|
64
65
|
attrib_found = false
|
@@ -69,8 +70,14 @@ def read( path, parse: false )
|
|
69
70
|
|
70
71
|
if type == :h1
|
71
72
|
h1 = node[1] ## get heading text
|
72
|
-
puts
|
73
73
|
puts " = Heading 1 >#{node[1]}<"
|
74
|
+
elsif type == :h2
|
75
|
+
if h1.nil?
|
76
|
+
puts "!! WARN - no heading for subheading; skipping parse"
|
77
|
+
next
|
78
|
+
end
|
79
|
+
h2 = node[1] ## get heading text
|
80
|
+
puts " == Heading 2 >#{node[1]}<"
|
74
81
|
elsif type == :p
|
75
82
|
|
76
83
|
if h1.nil?
|
@@ -0,0 +1,78 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Parser
|
4
|
+
|
5
|
+
|
6
|
+
###
|
7
|
+
## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
|
8
|
+
class Opts
|
9
|
+
|
10
|
+
SEASON_RE = %r{ (?:
|
11
|
+
\d{4}-\d{2}
|
12
|
+
| \d{4}(--[a-z0-9_-]+)?
|
13
|
+
)
|
14
|
+
}x
|
15
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
16
|
+
|
17
|
+
|
18
|
+
## note: if pattern includes directory add here
|
19
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
20
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
21
|
+
#{SEASON}
|
22
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
23
|
+
}x
|
24
|
+
|
25
|
+
|
26
|
+
def self.find( path, dir: nil )
|
27
|
+
## check - rename dir
|
28
|
+
## use root_dir or work_dir or cd or such - why? why not?
|
29
|
+
|
30
|
+
datafiles = []
|
31
|
+
|
32
|
+
## note: normalize path - use File.expand_path ??
|
33
|
+
## change all backslash to slash for now
|
34
|
+
## path = path.gsub( "\\", '/' )
|
35
|
+
path = if dir
|
36
|
+
File.expand_path( path, File.expand_path( dir ))
|
37
|
+
else
|
38
|
+
File.expand_path( path )
|
39
|
+
end
|
40
|
+
|
41
|
+
## check all txt files
|
42
|
+
## note: incl. files starting with dot (.)) as candidates
|
43
|
+
## (normally excluded with just *)
|
44
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
45
|
+
## pp candidates
|
46
|
+
candidates.each do |candidate|
|
47
|
+
datafiles << candidate if MATCH_RE.match( candidate )
|
48
|
+
end
|
49
|
+
|
50
|
+
## pp datafiles
|
51
|
+
datafiles
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
def self.expand_args( args )
|
56
|
+
paths = []
|
57
|
+
|
58
|
+
args.each do |arg|
|
59
|
+
## check if directory
|
60
|
+
if Dir.exist?( arg )
|
61
|
+
datafiles = find( arg )
|
62
|
+
puts
|
63
|
+
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
64
|
+
pp datafiles
|
65
|
+
paths += datafiles
|
66
|
+
else
|
67
|
+
## assume it's a file
|
68
|
+
paths << arg
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
paths
|
73
|
+
end
|
74
|
+
end # class Opts
|
75
|
+
|
76
|
+
|
77
|
+
end # class Parser
|
78
|
+
end # module SportDb
|
data/lib/sportdb/parser.rb
CHANGED
@@ -3,6 +3,10 @@ require 'cocos'
|
|
3
3
|
require 'season/formats' # e.g. Season() support machinery
|
4
4
|
|
5
5
|
|
6
|
+
## more stdlibs
|
7
|
+
require 'optparse' ## check - already auto-required in cocos? keep? why? why not?
|
8
|
+
|
9
|
+
|
6
10
|
|
7
11
|
####
|
8
12
|
# try a (simple) tokenizer/parser with regex
|
@@ -28,7 +32,12 @@ require_relative 'parser/parser'
|
|
28
32
|
## todo/check - move outline reader upstream to cocos - why? why not?
|
29
33
|
## use read_outline(), parse_outline() - why? why not?
|
30
34
|
require_relative 'parser/outline_reader'
|
35
|
+
|
36
|
+
|
37
|
+
require_relative 'parser/opts'
|
31
38
|
require_relative 'parser/linter'
|
39
|
+
require_relative 'parser/fbtok/main'
|
40
|
+
|
32
41
|
|
33
42
|
###
|
34
43
|
# make parser api (easily) available - why? why not?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -88,8 +88,10 @@ files:
|
|
88
88
|
- Rakefile
|
89
89
|
- bin/fbtok
|
90
90
|
- lib/sportdb/parser.rb
|
91
|
+
- lib/sportdb/parser/fbtok/main.rb
|
91
92
|
- lib/sportdb/parser/lang.rb
|
92
93
|
- lib/sportdb/parser/linter.rb
|
94
|
+
- lib/sportdb/parser/opts.rb
|
93
95
|
- lib/sportdb/parser/outline_reader.rb
|
94
96
|
- lib/sportdb/parser/parser.rb
|
95
97
|
- lib/sportdb/parser/token-date.rb
|