fbtok 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +9 -0
- data/README.md +29 -0
- data/Rakefile +28 -0
- data/bin/fbtok +13 -0
- data/lib/fbtok/fbtok.rb +141 -0
- data/lib/fbtok/linter.rb +156 -0
- data/lib/fbtok/opts.rb +81 -0
- data/lib/fbtok.rb +9 -0
- metadata +105 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 434d60833636dd403ce4671279c847aa446295236685f264fa31b4791d9cdf82
|
4
|
+
data.tar.gz: 55729c6c2100163e05af42f53121958f7b9442ee15e07af2bfc28a7a94cdb75f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e0694f22d437e9d614070acf083db4c4b522d99fe20c0c53370c6dd59d73107cc9ab7f94b26a049c4f3031f2acbfef2ff64ad0eb447dd6c2a54c34293becc717
|
7
|
+
data.tar.gz: fbe918cfe469447bcc1b77e9bb12a17e6430e6dc1bf5e287637cc0cb8379f606582c9adbfb1e3150609887db26fc7f75506f7faa64ae992f4e7a5487b7d0c16e
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# fbtok - football.txt lint tools incl. tokenizer, parser & more
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
* home :: [github.com/sportdb/footty](https://github.com/sportdb/footty)
|
6
|
+
* bugs :: [github.com/sportdb/footty/issues](https://github.com/sportdb/footty/issues)
|
7
|
+
* gem :: [rubygems.org/gems/fbtok](https://rubygems.org/gems/fbtok)
|
8
|
+
* rdoc :: [rubydoc.info/gems/fbtok](http://rubydoc.info/gems/fbtok)
|
9
|
+
|
10
|
+
|
11
|
+
## Step 0 - Installation Via Gems
|
12
|
+
|
13
|
+
To install the command-line tool via gems (ruby's package manager) use:
|
14
|
+
|
15
|
+
```
|
16
|
+
$ gem install fbtok
|
17
|
+
```
|
18
|
+
|
19
|
+
|
20
|
+
## Usage
|
21
|
+
|
22
|
+
...
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
## Questions? Comments?
|
27
|
+
|
28
|
+
Yes, you can. More than welcome.
|
29
|
+
See [Help & Support »](https://github.com/openfootball/help)
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
|
3
|
+
|
4
|
+
Hoe.spec 'fbtok' do
|
5
|
+
self.version = '0.0.1'
|
6
|
+
|
7
|
+
self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
|
8
|
+
self.description = summary
|
9
|
+
|
10
|
+
self.urls = { home: 'https://github.com/sportdb/footty' }
|
11
|
+
|
12
|
+
self.author = 'Gerald Bauer'
|
13
|
+
self.email = 'gerald.bauer@gmail.com'
|
14
|
+
|
15
|
+
# switch extension to .markdown for gihub formatting
|
16
|
+
self.readme_file = 'README.md'
|
17
|
+
self.history_file = 'CHANGELOG.md'
|
18
|
+
|
19
|
+
self.licenses = ['Public Domain']
|
20
|
+
|
21
|
+
self.extra_deps = [
|
22
|
+
['sportdb-parser', '>= 0.3.9'],
|
23
|
+
]
|
24
|
+
|
25
|
+
self.spec_extras = {
|
26
|
+
required_ruby_version: '>= 3.1.0'
|
27
|
+
}
|
28
|
+
end
|
data/bin/fbtok
ADDED
data/lib/fbtok/fbtok.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
|
2
|
+
module Fbtok
|
3
|
+
def self.main( args=ARGV )
|
4
|
+
|
5
|
+
opts = {
|
6
|
+
debug: true,
|
7
|
+
metal: false,
|
8
|
+
file: nil,
|
9
|
+
}
|
10
|
+
|
11
|
+
parser = OptionParser.new do |parser|
|
12
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
|
13
|
+
|
14
|
+
|
15
|
+
parser.on( "-q", "--quiet",
|
16
|
+
"less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
|
17
|
+
opts[:debug] = false
|
18
|
+
end
|
19
|
+
parser.on( "--verbose", "--debug",
|
20
|
+
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
21
|
+
opts[:debug] = true
|
22
|
+
end
|
23
|
+
|
24
|
+
parser.on( "--metal",
|
25
|
+
"turn off typed parse tree; show to the metal tokens"+
|
26
|
+
" (default: #{opts[:metal]})" ) do |metal|
|
27
|
+
opts[:metal] = true
|
28
|
+
end
|
29
|
+
|
30
|
+
parser.on( "-f FILE", "--file FILE",
|
31
|
+
"read datafiles (pathspecs) via .csv file") do |file|
|
32
|
+
opts[:file] = file
|
33
|
+
## note: for batch (massive) processing auto-set debug (verbose output) to false (as default)
|
34
|
+
opts[:debug] = false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
parser.parse!( args )
|
38
|
+
|
39
|
+
puts "OPTS:"
|
40
|
+
p opts
|
41
|
+
puts "ARGV:"
|
42
|
+
p args
|
43
|
+
|
44
|
+
|
45
|
+
## todo/check - use packs or projects or such
|
46
|
+
## instead of specs - why? why not?
|
47
|
+
specs = []
|
48
|
+
if opts[:file]
|
49
|
+
recs = read_csv( opts[:file] )
|
50
|
+
pp recs
|
51
|
+
## note - make pathspecs relative to passed in file arg!!!
|
52
|
+
basedir = File.dirname( opts[:file] )
|
53
|
+
recs.each do |rec|
|
54
|
+
paths = SportDb::Parser::Opts.find( rec['path'], dir: basedir )
|
55
|
+
specs << [paths, rec]
|
56
|
+
end
|
57
|
+
else
|
58
|
+
paths = if args.empty?
|
59
|
+
[
|
60
|
+
'../../../openfootball/euro/2021--europe/euro.txt',
|
61
|
+
'../../../openfootball/euro/2024--germany/euro.txt',
|
62
|
+
]
|
63
|
+
else
|
64
|
+
## check for directories
|
65
|
+
## and auto-expand
|
66
|
+
SportDb::Parser::Opts.expand_args( args )
|
67
|
+
end
|
68
|
+
specs << [paths, {}]
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
73
|
+
|
74
|
+
linter = SportDb::Parser::Linter.new
|
75
|
+
|
76
|
+
|
77
|
+
specs.each_with_index do |(paths, rec),i|
|
78
|
+
errors = []
|
79
|
+
|
80
|
+
paths.each_with_index do |path,j|
|
81
|
+
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
82
|
+
linter.read( path, parse: !opts[:metal] )
|
83
|
+
|
84
|
+
errors += linter.errors if linter.errors?
|
85
|
+
end
|
86
|
+
|
87
|
+
if errors.size > 0
|
88
|
+
puts
|
89
|
+
pp errors
|
90
|
+
puts
|
91
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
92
|
+
else
|
93
|
+
puts
|
94
|
+
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
95
|
+
end
|
96
|
+
|
97
|
+
## add errors to rec via rec['errors'] to allow
|
98
|
+
## for further processing/reporting
|
99
|
+
rec['errors'] = errors
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
###
|
104
|
+
## generate a report if --file option used
|
105
|
+
if opts[:file]
|
106
|
+
|
107
|
+
buf = String.new
|
108
|
+
|
109
|
+
buf << "# fbtok summary report - #{specs.size} dataset(s)\n\n"
|
110
|
+
|
111
|
+
specs.each_with_index do |(paths, rec),i|
|
112
|
+
errors = rec['errors']
|
113
|
+
|
114
|
+
if errors.size > 0
|
115
|
+
buf << "!! #{errors.size} ERROR(S) "
|
116
|
+
else
|
117
|
+
buf << " OK "
|
118
|
+
end
|
119
|
+
buf << "%-20s" % rec['path']
|
120
|
+
buf << " - #{paths.size} datafile(s)"
|
121
|
+
buf << "\n"
|
122
|
+
|
123
|
+
if errors.size > 0
|
124
|
+
buf << errors.pretty_inspect
|
125
|
+
buf << "\n"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
puts
|
130
|
+
puts "SUMMARY:"
|
131
|
+
puts buf
|
132
|
+
|
133
|
+
# maybe write out in the future?
|
134
|
+
# basedir = File.dirname( opts[:file] )
|
135
|
+
# basename = File.basename( opts[:file], File.extname( opts[:file] ))
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
end # method self.main
|
141
|
+
end # module Fbtok
|
data/lib/fbtok/linter.rb
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Parser
|
4
|
+
|
5
|
+
###
|
6
|
+
## note - Linter for now nested inside Parser - keep? why? why not?
|
7
|
+
class Linter
|
8
|
+
|
9
|
+
def self.debug=(value) @@debug = value; end
|
10
|
+
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
|
+
def debug?() self.class.debug?; end
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
attr_reader :errors
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@errors = []
|
19
|
+
@parser = Parser.new ## use own parser instance (not shared) - why? why not?
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def errors?() @errors.size > 0; end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
## note: colon (:) MUST be followed by one (or more) spaces
|
28
|
+
## make sure mon feb 12 18:10 will not match
|
29
|
+
## allow 1. FC Köln etc.
|
30
|
+
## Mainz 05:
|
31
|
+
## limit to 30 chars max
|
32
|
+
## only allow chars incl. intl buut (NOT ()[]/;)
|
33
|
+
##
|
34
|
+
## Group A:
|
35
|
+
## Group B: - remove colon
|
36
|
+
## or lookup first
|
37
|
+
|
38
|
+
ATTRIB_RE = %r{^
|
39
|
+
[ ]*? # slurp leading spaces
|
40
|
+
(?<key>[^:|\]\[()\/; -]
|
41
|
+
[^:|\]\[()\/;]{0,30}
|
42
|
+
)
|
43
|
+
[ ]*? # slurp trailing spaces
|
44
|
+
:[ ]+
|
45
|
+
(?<value>.+)
|
46
|
+
[ ]*? # slurp trailing spaces
|
47
|
+
$
|
48
|
+
}ix
|
49
|
+
|
50
|
+
|
51
|
+
#########
|
52
|
+
## parse - false (default) - tokenize (only)
|
53
|
+
## - true - tokenize & parse
|
54
|
+
def read( path, parse: false )
|
55
|
+
## note: every (new) read call - resets errors list to empty
|
56
|
+
@errors = []
|
57
|
+
|
58
|
+
nodes = OutlineReader.read( path )
|
59
|
+
|
60
|
+
## process nodes
|
61
|
+
h1 = nil
|
62
|
+
h2 = nil
|
63
|
+
orphans = 0 ## track paragraphs's with no heading
|
64
|
+
|
65
|
+
attrib_found = false
|
66
|
+
|
67
|
+
|
68
|
+
nodes.each do |node|
|
69
|
+
type = node[0]
|
70
|
+
|
71
|
+
if type == :h1
|
72
|
+
h1 = node[1] ## get heading text
|
73
|
+
puts " = Heading 1 >#{node[1]}<"
|
74
|
+
elsif type == :h2
|
75
|
+
if h1.nil?
|
76
|
+
puts "!! WARN - no heading for subheading; skipping parse"
|
77
|
+
next
|
78
|
+
end
|
79
|
+
h2 = node[1] ## get heading text
|
80
|
+
puts " == Heading 2 >#{node[1]}<"
|
81
|
+
elsif type == :p
|
82
|
+
|
83
|
+
if h1.nil?
|
84
|
+
orphans += 1 ## only warn once
|
85
|
+
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
86
|
+
next
|
87
|
+
end
|
88
|
+
|
89
|
+
lines = node[1]
|
90
|
+
|
91
|
+
tree = []
|
92
|
+
lines.each_with_index do |line,i|
|
93
|
+
|
94
|
+
if debug?
|
95
|
+
puts
|
96
|
+
puts "line >#{line}<"
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
## skip new (experimental attrib syntax)
|
101
|
+
if attrib_found == false &&
|
102
|
+
ATTRIB_RE.match?( line )
|
103
|
+
## note: check attrib regex AFTER group def e.g.:
|
104
|
+
## Group A:
|
105
|
+
## Group B: etc.
|
106
|
+
## todo/fix - change Group A: to Group A etc.
|
107
|
+
## Group B: to Group B
|
108
|
+
attrib_found = true
|
109
|
+
## logger.debug "skipping key/value line - >#{line}<"
|
110
|
+
next
|
111
|
+
end
|
112
|
+
|
113
|
+
if attrib_found
|
114
|
+
## check if line ends with dot
|
115
|
+
## if not slurp up lines to the next do!!!
|
116
|
+
## logger.debug "skipping key/value line - >#{line}<"
|
117
|
+
attrib_found = false if line.end_with?( '.' )
|
118
|
+
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
119
|
+
next
|
120
|
+
end
|
121
|
+
|
122
|
+
t, error_messages = if parse
|
123
|
+
@parser.parse_with_errors( line )
|
124
|
+
else
|
125
|
+
@parser.tokenize_with_errors( line )
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
if error_messages.size > 0
|
130
|
+
## add to "global" error list
|
131
|
+
## make a triplet tuple (file / msg / line text)
|
132
|
+
error_messages.each do |msg|
|
133
|
+
@errors << [ path,
|
134
|
+
msg,
|
135
|
+
line
|
136
|
+
]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
pp t if debug?
|
141
|
+
|
142
|
+
tree << t
|
143
|
+
end
|
144
|
+
|
145
|
+
## pp tree
|
146
|
+
else
|
147
|
+
pp node
|
148
|
+
raise ArgumentError, "unsupported (node) type >#{type}<"
|
149
|
+
end
|
150
|
+
end # each node
|
151
|
+
end # read
|
152
|
+
end # class Linter
|
153
|
+
|
154
|
+
|
155
|
+
end # class Parser
|
156
|
+
end # module SportDb
|
data/lib/fbtok/opts.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Parser
|
4
|
+
|
5
|
+
|
6
|
+
###
|
7
|
+
## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
|
8
|
+
class Opts
|
9
|
+
|
10
|
+
SEASON_RE = %r{ (?:
|
11
|
+
\d{4}-\d{2}
|
12
|
+
| \d{4}(--[a-z0-9_-]+)?
|
13
|
+
)
|
14
|
+
}x
|
15
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
16
|
+
|
17
|
+
|
18
|
+
## note: if pattern includes directory add here
|
19
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
20
|
+
## update - note include/allow dot (.) too
|
21
|
+
## e.g. 2024-25/at.1.txt
|
22
|
+
## change to at_1 or uefa_cl or such - why? why not?
|
23
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
24
|
+
#{SEASON}
|
25
|
+
/[a-z0-9_.-]+\.txt$ ## txt e.g /1-premierleague.txt
|
26
|
+
}x
|
27
|
+
|
28
|
+
|
29
|
+
def self.find( path, dir: nil )
|
30
|
+
## check - rename dir
|
31
|
+
## use root_dir or work_dir or cd or such - why? why not?
|
32
|
+
|
33
|
+
datafiles = []
|
34
|
+
|
35
|
+
## note: normalize path - use File.expand_path ??
|
36
|
+
## change all backslash to slash for now
|
37
|
+
## path = path.gsub( "\\", '/' )
|
38
|
+
path = if dir
|
39
|
+
File.expand_path( path, File.expand_path( dir ))
|
40
|
+
else
|
41
|
+
File.expand_path( path )
|
42
|
+
end
|
43
|
+
|
44
|
+
## check all txt files
|
45
|
+
## note: incl. files starting with dot (.)) as candidates
|
46
|
+
## (normally excluded with just *)
|
47
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
48
|
+
## pp candidates
|
49
|
+
candidates.each do |candidate|
|
50
|
+
datafiles << candidate if MATCH_RE.match( candidate )
|
51
|
+
end
|
52
|
+
|
53
|
+
## pp datafiles
|
54
|
+
datafiles
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def self.expand_args( args )
|
59
|
+
paths = []
|
60
|
+
|
61
|
+
args.each do |arg|
|
62
|
+
## check if directory
|
63
|
+
if Dir.exist?( arg )
|
64
|
+
datafiles = find( arg )
|
65
|
+
puts
|
66
|
+
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
67
|
+
pp datafiles
|
68
|
+
paths += datafiles
|
69
|
+
else
|
70
|
+
## assume it's a file
|
71
|
+
paths << arg
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
paths
|
76
|
+
end
|
77
|
+
end # class Opts
|
78
|
+
|
79
|
+
|
80
|
+
end # class Parser
|
81
|
+
end # module SportDb
|
data/lib/fbtok.rb
ADDED
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fbtok
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-01-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sportdb-parser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.3.9
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.9
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rdoc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4.0'
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '7'
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '4.0'
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '7'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: hoe
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '4.2'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - "~>"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '4.2'
|
61
|
+
description: fbtok - football.txt lint tools incl. tokenizer, parser & more
|
62
|
+
email: gerald.bauer@gmail.com
|
63
|
+
executables:
|
64
|
+
- fbtok
|
65
|
+
extensions: []
|
66
|
+
extra_rdoc_files:
|
67
|
+
- CHANGELOG.md
|
68
|
+
- Manifest.txt
|
69
|
+
- README.md
|
70
|
+
files:
|
71
|
+
- CHANGELOG.md
|
72
|
+
- Manifest.txt
|
73
|
+
- README.md
|
74
|
+
- Rakefile
|
75
|
+
- bin/fbtok
|
76
|
+
- lib/fbtok.rb
|
77
|
+
- lib/fbtok/fbtok.rb
|
78
|
+
- lib/fbtok/linter.rb
|
79
|
+
- lib/fbtok/opts.rb
|
80
|
+
homepage: https://github.com/sportdb/footty
|
81
|
+
licenses:
|
82
|
+
- Public Domain
|
83
|
+
metadata: {}
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options:
|
86
|
+
- "--main"
|
87
|
+
- README.md
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 3.1.0
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
requirements: []
|
101
|
+
rubygems_version: 3.5.22
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: fbtok - football.txt lint tools incl. tokenizer, parser & more
|
105
|
+
test_files: []
|