fbtok 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -0
- data/Rakefile +2 -2
- data/bin/fbtok +6 -4
- data/bin/fbtree +227 -0
- data/bin/fbx +2 -2
- data/lib/fbtok/linter.rb +23 -8
- data/lib/fbtok/opts.rb +2 -1
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0db5fb0d9cbda9ad626f4debbf27751f591c90684455e2e3cabc66162a02e3c6
|
4
|
+
data.tar.gz: cc4d1d160356785e54b99fb7033e078af9edf63cd60a469ab8bb2ad685603b7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d688b40a4c97971eab87594a83ffe7966b245370944ac140950f0e935076d25c4317dfcb526ac0f22470c1027a7bc488812f37f07f6d1d413edb8e7c538c3f2b
|
7
|
+
data.tar.gz: beefaf087fe66d2ab3ecaae23cc786184e0558b3dd7bcc44e4e131cf70fca200a3f723060c42a2921d5cbef1f2b6513dc567dc693a7dd03bc2adcd8b7ad3a6e2
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
|
|
2
2
|
|
3
3
|
|
4
4
|
Hoe.spec 'fbtok' do
|
5
|
-
self.version = '0.
|
5
|
+
self.version = '0.2.0'
|
6
6
|
|
7
7
|
self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
|
8
8
|
self.description = summary
|
@@ -19,9 +19,9 @@ Hoe.spec 'fbtok' do
|
|
19
19
|
self.licenses = ['Public Domain']
|
20
20
|
|
21
21
|
self.extra_deps = [
|
22
|
-
# ['sportdb-parser', '>= 0.2.2'],
|
23
22
|
# ['sportdb-structs', '>= 0.5.0'],
|
24
23
|
# ['logutils', '>= 0.6.1'],
|
24
|
+
['sportdb-parser', '>= 0.5.0'],
|
25
25
|
['sportdb-formats', '>= 2.1.2'],
|
26
26
|
]
|
27
27
|
|
data/bin/fbtok
CHANGED
@@ -10,7 +10,6 @@ args = ARGV
|
|
10
10
|
|
11
11
|
opts = {
|
12
12
|
debug: true,
|
13
|
-
metal: false,
|
14
13
|
file: nil,
|
15
14
|
}
|
16
15
|
|
@@ -27,11 +26,14 @@ parser = OptionParser.new do |parser|
|
|
27
26
|
opts[:debug] = true
|
28
27
|
end
|
29
28
|
|
29
|
+
=begin
|
30
30
|
parser.on( "--metal",
|
31
31
|
"turn off typed parse tree; show to the metal tokens"+
|
32
32
|
" (default: #{opts[:metal]})" ) do |metal|
|
33
33
|
opts[:metal] = true
|
34
34
|
end
|
35
|
+
=end
|
36
|
+
|
35
37
|
|
36
38
|
parser.on( "-f FILE", "--file FILE",
|
37
39
|
"read datafiles (pathspecs) via .csv file") do |file|
|
@@ -85,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
|
|
85
87
|
|
86
88
|
paths.each_with_index do |path,j|
|
87
89
|
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
88
|
-
linter.read( path
|
90
|
+
linter.read( path )
|
89
91
|
|
90
92
|
errors += linter.errors if linter.errors?
|
91
93
|
end
|
@@ -94,10 +96,10 @@ specs.each_with_index do |(paths, rec),i|
|
|
94
96
|
puts
|
95
97
|
pp errors
|
96
98
|
puts
|
97
|
-
puts "!! #{errors.size}
|
99
|
+
puts "!! #{errors.size} tokenize error(s) in #{paths.size} datafiles(s)"
|
98
100
|
else
|
99
101
|
puts
|
100
|
-
puts "OK no
|
102
|
+
puts "OK no tokenize errors found in #{paths.size} datafile(s)"
|
101
103
|
end
|
102
104
|
|
103
105
|
## add errors to rec via rec['errors'] to allow
|
data/bin/fbtree
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/fbtree
|
5
|
+
|
6
|
+
require 'fbtok'
|
7
|
+
|
8
|
+
|
9
|
+
###
|
10
|
+
## note - Linter for now nested inside Parser - keep? why? why not?
|
11
|
+
class RaccLinter
|
12
|
+
|
13
|
+
def self.debug=(value) @@debug = value; end
|
14
|
+
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
15
|
+
def debug?() self.class.debug?; end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
attr_reader :errors
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@errors = []
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
def errors?() @errors.size > 0; end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
## note: colon (:) MUST be followed by one (or more) spaces
|
31
|
+
## make sure mon feb 12 18:10 will not match
|
32
|
+
## allow 1. FC Köln etc.
|
33
|
+
## Mainz 05:
|
34
|
+
## limit to 30 chars max
|
35
|
+
## only allow chars incl. intl buut (NOT ()[]/;)
|
36
|
+
##
|
37
|
+
## Group A:
|
38
|
+
## Group B: - remove colon
|
39
|
+
## or lookup first
|
40
|
+
|
41
|
+
ATTRIB_RE = %r{^
|
42
|
+
[ ]*? # slurp leading spaces
|
43
|
+
(?<key>[^:|\]\[()\/; -]
|
44
|
+
[^:|\]\[()\/;]{0,30}
|
45
|
+
)
|
46
|
+
[ ]*? # slurp trailing spaces
|
47
|
+
:[ ]+
|
48
|
+
(?<value>.+)
|
49
|
+
[ ]*? # slurp trailing spaces
|
50
|
+
$
|
51
|
+
}ix
|
52
|
+
|
53
|
+
|
54
|
+
#########
|
55
|
+
## parse - false (default) - tokenize (only)
|
56
|
+
## - true - tokenize & parse
|
57
|
+
def read( path )
|
58
|
+
## note: every (new) read call - resets errors list to empty
|
59
|
+
@errors = []
|
60
|
+
|
61
|
+
nodes = SportDb::OutlineReader.read( path )
|
62
|
+
|
63
|
+
## process nodes
|
64
|
+
h1 = nil
|
65
|
+
h2 = nil
|
66
|
+
orphans = 0 ## track paragraphs's with no heading
|
67
|
+
|
68
|
+
attrib_found = false
|
69
|
+
|
70
|
+
|
71
|
+
nodes.each do |node|
|
72
|
+
type = node[0]
|
73
|
+
|
74
|
+
if type == :h1
|
75
|
+
h1 = node[1] ## get heading text
|
76
|
+
puts " = Heading 1 >#{node[1]}<"
|
77
|
+
elsif type == :h2
|
78
|
+
if h1.nil?
|
79
|
+
puts "!! WARN - no heading for subheading; skipping parse"
|
80
|
+
next
|
81
|
+
end
|
82
|
+
h2 = node[1] ## get heading text
|
83
|
+
puts " == Heading 2 >#{node[1]}<"
|
84
|
+
elsif type == :p
|
85
|
+
|
86
|
+
if h1.nil?
|
87
|
+
orphans += 1 ## only warn once
|
88
|
+
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
89
|
+
next
|
90
|
+
end
|
91
|
+
|
92
|
+
lines = node[1]
|
93
|
+
|
94
|
+
txt = []
|
95
|
+
lines.each_with_index do |line,i|
|
96
|
+
|
97
|
+
if debug?
|
98
|
+
puts
|
99
|
+
puts "line >#{line}<"
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
## skip new (experimental attrib syntax)
|
104
|
+
if attrib_found == false &&
|
105
|
+
ATTRIB_RE.match?( line )
|
106
|
+
## note: check attrib regex AFTER group def e.g.:
|
107
|
+
## Group A:
|
108
|
+
## Group B: etc.
|
109
|
+
## todo/fix - change Group A: to Group A etc.
|
110
|
+
## Group B: to Group B
|
111
|
+
attrib_found = true
|
112
|
+
## logger.debug "skipping key/value line - >#{line}<"
|
113
|
+
next
|
114
|
+
end
|
115
|
+
|
116
|
+
if attrib_found
|
117
|
+
## check if line ends with dot
|
118
|
+
## if not slurp up lines to the next do!!!
|
119
|
+
## logger.debug "skipping key/value line - >#{line}<"
|
120
|
+
attrib_found = false if line.end_with?( '.' )
|
121
|
+
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
122
|
+
next
|
123
|
+
end
|
124
|
+
|
125
|
+
txt << line
|
126
|
+
txt << "\n"
|
127
|
+
end
|
128
|
+
|
129
|
+
## flatten
|
130
|
+
txt = txt.join
|
131
|
+
pp txt if debug?
|
132
|
+
|
133
|
+
parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
|
134
|
+
tree = parser.parse
|
135
|
+
pp tree
|
136
|
+
else
|
137
|
+
pp node
|
138
|
+
raise ArgumentError, "unsupported (node) type >#{type}<"
|
139
|
+
end
|
140
|
+
end # each node
|
141
|
+
end # read
|
142
|
+
end # class RaccLinter
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
###############################################
|
148
|
+
# start with code
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
args = ARGV
|
153
|
+
|
154
|
+
|
155
|
+
opts = {
|
156
|
+
debug: true,
|
157
|
+
}
|
158
|
+
|
159
|
+
parser = OptionParser.new do |parser|
|
160
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
|
161
|
+
|
162
|
+
|
163
|
+
parser.on( "-q", "--quiet",
|
164
|
+
"less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
|
165
|
+
opts[:debug] = false
|
166
|
+
end
|
167
|
+
parser.on( "--verbose", "--debug",
|
168
|
+
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
169
|
+
opts[:debug] = true
|
170
|
+
end
|
171
|
+
end
|
172
|
+
parser.parse!( args )
|
173
|
+
|
174
|
+
puts "OPTS:"
|
175
|
+
p opts
|
176
|
+
puts "ARGV:"
|
177
|
+
p args
|
178
|
+
|
179
|
+
|
180
|
+
## todo/check - use packs or projects or such
|
181
|
+
## instead of specs - why? why not?
|
182
|
+
paths = if args.empty?
|
183
|
+
[
|
184
|
+
'../../../openfootball/euro/2021--europe/euro.txt',
|
185
|
+
'../../../openfootball/euro/2024--germany/euro.txt',
|
186
|
+
]
|
187
|
+
else
|
188
|
+
## check for directories
|
189
|
+
## and auto-expand
|
190
|
+
SportDb::Parser::Opts.expand_args( args )
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
|
196
|
+
errors = []
|
197
|
+
linter = RaccLinter.new
|
198
|
+
|
199
|
+
paths.each_with_index do |path,i|
|
200
|
+
|
201
|
+
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
202
|
+
|
203
|
+
linter.read( path )
|
204
|
+
end
|
205
|
+
|
206
|
+
puts "bye"
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
__END__
|
212
|
+
|
213
|
+
if errors.size > 0
|
214
|
+
puts
|
215
|
+
pp errors
|
216
|
+
puts
|
217
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
218
|
+
else
|
219
|
+
puts
|
220
|
+
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
221
|
+
end
|
222
|
+
|
223
|
+
## add errors to rec via rec['errors'] to allow
|
224
|
+
## for further processing/reporting
|
225
|
+
rec['errors'] = errors
|
226
|
+
end
|
227
|
+
|
data/bin/fbx
CHANGED
@@ -88,12 +88,12 @@ paths.each_with_index do |path,i|
|
|
88
88
|
## pp secs
|
89
89
|
|
90
90
|
secs.each_with_index do |sec,j| ## sec(tion)s
|
91
|
-
season = sec[:season]
|
91
|
+
season = Season.parse( sec[:season] ) ## convert (str) to season obj!!!
|
92
92
|
league = sec[:league]
|
93
93
|
stage = sec[:stage]
|
94
94
|
lines = sec[:lines]
|
95
95
|
|
96
|
-
puts " section #{j+1}/#{secs.size} - #{league
|
96
|
+
puts " section #{j+1}/#{secs.size} - #{league} #{season}, #{stage} - #{lines.size} line(s)"
|
97
97
|
|
98
98
|
next if opts[:outline]
|
99
99
|
|
data/lib/fbtok/linter.rb
CHANGED
@@ -51,7 +51,7 @@ def errors?() @errors.size > 0; end
|
|
51
51
|
#########
|
52
52
|
## parse - false (default) - tokenize (only)
|
53
53
|
## - true - tokenize & parse
|
54
|
-
def read( path
|
54
|
+
def read( path )
|
55
55
|
## note: every (new) read call - resets errors list to empty
|
56
56
|
@errors = []
|
57
57
|
|
@@ -119,13 +119,8 @@ def read( path, parse: false )
|
|
119
119
|
next
|
120
120
|
end
|
121
121
|
|
122
|
-
t, error_messages =
|
123
|
-
|
124
|
-
else
|
125
|
-
@parser.tokenize_with_errors( line )
|
126
|
-
end
|
127
|
-
|
128
|
-
|
122
|
+
t, error_messages = @parser.tokenize_with_errors( line )
|
123
|
+
|
129
124
|
if error_messages.size > 0
|
130
125
|
## add to "global" error list
|
131
126
|
## make a triplet tuple (file / msg / line text)
|
@@ -137,6 +132,26 @@ def read( path, parse: false )
|
|
137
132
|
end
|
138
133
|
end
|
139
134
|
|
135
|
+
## post-process tokens
|
136
|
+
## - check for round, group, etc.
|
137
|
+
t = t.map do |tok|
|
138
|
+
#############
|
139
|
+
## pass 1
|
140
|
+
## replace all texts with keyword matches (e.g. group, round, leg, etc.)
|
141
|
+
if tok[0] == :TEXT
|
142
|
+
text = tok[1]
|
143
|
+
if @parser.is_group?( text )
|
144
|
+
[:GROUP, text]
|
145
|
+
elsif @parser.is_round?( text ) || @parser.is_leg?( text )
|
146
|
+
[:ROUND, text]
|
147
|
+
else
|
148
|
+
tok ## pass through as-is (1:1)
|
149
|
+
end
|
150
|
+
else
|
151
|
+
tok
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
140
155
|
pp t if debug?
|
141
156
|
|
142
157
|
tree << t
|
data/lib/fbtok/opts.rb
CHANGED
@@ -18,11 +18,12 @@ class Opts
|
|
18
18
|
## note: if pattern includes directory add here
|
19
19
|
## (otherwise move to more "generic" datafile) - why? why not?
|
20
20
|
## update - note include/allow dot (.) too
|
21
|
+
## BUT NOT as first character!!! (e.g. exclude .confg.txt !!!)
|
21
22
|
## e.g. 2024-25/at.1.txt
|
22
23
|
## change to at_1 or uefa_cl or such - why? why not?
|
23
24
|
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
24
25
|
#{SEASON}
|
25
|
-
/[a-z0-9_.-]
|
26
|
+
/[a-z0-9][a-z0-9_.-]*\.txt$ ## txt e.g /1-premierleague.txt
|
26
27
|
}x
|
27
28
|
|
28
29
|
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbtok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sportdb-parser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.5.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.5.0
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: sportdb-formats
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -64,6 +78,7 @@ executables:
|
|
64
78
|
- fbchk
|
65
79
|
- fbt
|
66
80
|
- fbtok
|
81
|
+
- fbtree
|
67
82
|
- fbx
|
68
83
|
extensions: []
|
69
84
|
extra_rdoc_files:
|
@@ -78,6 +93,7 @@ files:
|
|
78
93
|
- bin/fbchk
|
79
94
|
- bin/fbt
|
80
95
|
- bin/fbtok
|
96
|
+
- bin/fbtree
|
81
97
|
- bin/fbx
|
82
98
|
- lib/fbtok.rb
|
83
99
|
- lib/fbtok/linter.rb
|