fbtok 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -0
- data/Rakefile +2 -2
- data/bin/fbtok +6 -4
- data/bin/fbtree +227 -0
- data/lib/fbtok/linter.rb +23 -8
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0db5fb0d9cbda9ad626f4debbf27751f591c90684455e2e3cabc66162a02e3c6
|
4
|
+
data.tar.gz: cc4d1d160356785e54b99fb7033e078af9edf63cd60a469ab8bb2ad685603b7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d688b40a4c97971eab87594a83ffe7966b245370944ac140950f0e935076d25c4317dfcb526ac0f22470c1027a7bc488812f37f07f6d1d413edb8e7c538c3f2b
|
7
|
+
data.tar.gz: beefaf087fe66d2ab3ecaae23cc786184e0558b3dd7bcc44e4e131cf70fca200a3f723060c42a2921d5cbef1f2b6513dc567dc693a7dd03bc2adcd8b7ad3a6e2
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
|
|
2
2
|
|
3
3
|
|
4
4
|
Hoe.spec 'fbtok' do
|
5
|
-
self.version = '0.
|
5
|
+
self.version = '0.2.0'
|
6
6
|
|
7
7
|
self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
|
8
8
|
self.description = summary
|
@@ -19,9 +19,9 @@ Hoe.spec 'fbtok' do
|
|
19
19
|
self.licenses = ['Public Domain']
|
20
20
|
|
21
21
|
self.extra_deps = [
|
22
|
-
# ['sportdb-parser', '>= 0.2.2'],
|
23
22
|
# ['sportdb-structs', '>= 0.5.0'],
|
24
23
|
# ['logutils', '>= 0.6.1'],
|
24
|
+
['sportdb-parser', '>= 0.5.0'],
|
25
25
|
['sportdb-formats', '>= 2.1.2'],
|
26
26
|
]
|
27
27
|
|
data/bin/fbtok
CHANGED
@@ -10,7 +10,6 @@ args = ARGV
|
|
10
10
|
|
11
11
|
opts = {
|
12
12
|
debug: true,
|
13
|
-
metal: false,
|
14
13
|
file: nil,
|
15
14
|
}
|
16
15
|
|
@@ -27,11 +26,14 @@ parser = OptionParser.new do |parser|
|
|
27
26
|
opts[:debug] = true
|
28
27
|
end
|
29
28
|
|
29
|
+
=begin
|
30
30
|
parser.on( "--metal",
|
31
31
|
"turn off typed parse tree; show to the metal tokens"+
|
32
32
|
" (default: #{opts[:metal]})" ) do |metal|
|
33
33
|
opts[:metal] = true
|
34
34
|
end
|
35
|
+
=end
|
36
|
+
|
35
37
|
|
36
38
|
parser.on( "-f FILE", "--file FILE",
|
37
39
|
"read datafiles (pathspecs) via .csv file") do |file|
|
@@ -85,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
|
|
85
87
|
|
86
88
|
paths.each_with_index do |path,j|
|
87
89
|
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
88
|
-
linter.read( path
|
90
|
+
linter.read( path )
|
89
91
|
|
90
92
|
errors += linter.errors if linter.errors?
|
91
93
|
end
|
@@ -94,10 +96,10 @@ specs.each_with_index do |(paths, rec),i|
|
|
94
96
|
puts
|
95
97
|
pp errors
|
96
98
|
puts
|
97
|
-
puts "!! #{errors.size}
|
99
|
+
puts "!! #{errors.size} tokenize error(s) in #{paths.size} datafiles(s)"
|
98
100
|
else
|
99
101
|
puts
|
100
|
-
puts "OK no
|
102
|
+
puts "OK no tokenize errors found in #{paths.size} datafile(s)"
|
101
103
|
end
|
102
104
|
|
103
105
|
## add errors to rec via rec['errors'] to allow
|
data/bin/fbtree
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/fbtree
|
5
|
+
|
6
|
+
require 'fbtok'
|
7
|
+
|
8
|
+
|
9
|
+
###
|
10
|
+
## note - Linter for now nested inside Parser - keep? why? why not?
|
11
|
+
class RaccLinter
|
12
|
+
|
13
|
+
def self.debug=(value) @@debug = value; end
|
14
|
+
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
15
|
+
def debug?() self.class.debug?; end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
attr_reader :errors
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@errors = []
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
def errors?() @errors.size > 0; end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
## note: colon (:) MUST be followed by one (or more) spaces
|
31
|
+
## make sure mon feb 12 18:10 will not match
|
32
|
+
## allow 1. FC Köln etc.
|
33
|
+
## Mainz 05:
|
34
|
+
## limit to 30 chars max
|
35
|
+
## only allow chars incl. intl buut (NOT ()[]/;)
|
36
|
+
##
|
37
|
+
## Group A:
|
38
|
+
## Group B: - remove colon
|
39
|
+
## or lookup first
|
40
|
+
|
41
|
+
ATTRIB_RE = %r{^
|
42
|
+
[ ]*? # slurp leading spaces
|
43
|
+
(?<key>[^:|\]\[()\/; -]
|
44
|
+
[^:|\]\[()\/;]{0,30}
|
45
|
+
)
|
46
|
+
[ ]*? # slurp trailing spaces
|
47
|
+
:[ ]+
|
48
|
+
(?<value>.+)
|
49
|
+
[ ]*? # slurp trailing spaces
|
50
|
+
$
|
51
|
+
}ix
|
52
|
+
|
53
|
+
|
54
|
+
#########
|
55
|
+
## parse - false (default) - tokenize (only)
|
56
|
+
## - true - tokenize & parse
|
57
|
+
def read( path )
|
58
|
+
## note: every (new) read call - resets errors list to empty
|
59
|
+
@errors = []
|
60
|
+
|
61
|
+
nodes = SportDb::OutlineReader.read( path )
|
62
|
+
|
63
|
+
## process nodes
|
64
|
+
h1 = nil
|
65
|
+
h2 = nil
|
66
|
+
orphans = 0 ## track paragraphs's with no heading
|
67
|
+
|
68
|
+
attrib_found = false
|
69
|
+
|
70
|
+
|
71
|
+
nodes.each do |node|
|
72
|
+
type = node[0]
|
73
|
+
|
74
|
+
if type == :h1
|
75
|
+
h1 = node[1] ## get heading text
|
76
|
+
puts " = Heading 1 >#{node[1]}<"
|
77
|
+
elsif type == :h2
|
78
|
+
if h1.nil?
|
79
|
+
puts "!! WARN - no heading for subheading; skipping parse"
|
80
|
+
next
|
81
|
+
end
|
82
|
+
h2 = node[1] ## get heading text
|
83
|
+
puts " == Heading 2 >#{node[1]}<"
|
84
|
+
elsif type == :p
|
85
|
+
|
86
|
+
if h1.nil?
|
87
|
+
orphans += 1 ## only warn once
|
88
|
+
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
89
|
+
next
|
90
|
+
end
|
91
|
+
|
92
|
+
lines = node[1]
|
93
|
+
|
94
|
+
txt = []
|
95
|
+
lines.each_with_index do |line,i|
|
96
|
+
|
97
|
+
if debug?
|
98
|
+
puts
|
99
|
+
puts "line >#{line}<"
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
## skip new (experimental attrib syntax)
|
104
|
+
if attrib_found == false &&
|
105
|
+
ATTRIB_RE.match?( line )
|
106
|
+
## note: check attrib regex AFTER group def e.g.:
|
107
|
+
## Group A:
|
108
|
+
## Group B: etc.
|
109
|
+
## todo/fix - change Group A: to Group A etc.
|
110
|
+
## Group B: to Group B
|
111
|
+
attrib_found = true
|
112
|
+
## logger.debug "skipping key/value line - >#{line}<"
|
113
|
+
next
|
114
|
+
end
|
115
|
+
|
116
|
+
if attrib_found
|
117
|
+
## check if line ends with dot
|
118
|
+
## if not slurp up lines to the next do!!!
|
119
|
+
## logger.debug "skipping key/value line - >#{line}<"
|
120
|
+
attrib_found = false if line.end_with?( '.' )
|
121
|
+
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
122
|
+
next
|
123
|
+
end
|
124
|
+
|
125
|
+
txt << line
|
126
|
+
txt << "\n"
|
127
|
+
end
|
128
|
+
|
129
|
+
## flatten
|
130
|
+
txt = txt.join
|
131
|
+
pp txt if debug?
|
132
|
+
|
133
|
+
parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
|
134
|
+
tree = parser.parse
|
135
|
+
pp tree
|
136
|
+
else
|
137
|
+
pp node
|
138
|
+
raise ArgumentError, "unsupported (node) type >#{type}<"
|
139
|
+
end
|
140
|
+
end # each node
|
141
|
+
end # read
|
142
|
+
end # class RaccLinter
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
###############################################
|
148
|
+
# start with code
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
args = ARGV
|
153
|
+
|
154
|
+
|
155
|
+
opts = {
|
156
|
+
debug: true,
|
157
|
+
}
|
158
|
+
|
159
|
+
parser = OptionParser.new do |parser|
|
160
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
|
161
|
+
|
162
|
+
|
163
|
+
parser.on( "-q", "--quiet",
|
164
|
+
"less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
|
165
|
+
opts[:debug] = false
|
166
|
+
end
|
167
|
+
parser.on( "--verbose", "--debug",
|
168
|
+
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
169
|
+
opts[:debug] = true
|
170
|
+
end
|
171
|
+
end
|
172
|
+
parser.parse!( args )
|
173
|
+
|
174
|
+
puts "OPTS:"
|
175
|
+
p opts
|
176
|
+
puts "ARGV:"
|
177
|
+
p args
|
178
|
+
|
179
|
+
|
180
|
+
## todo/check - use packs or projects or such
|
181
|
+
## instead of specs - why? why not?
|
182
|
+
paths = if args.empty?
|
183
|
+
[
|
184
|
+
'../../../openfootball/euro/2021--europe/euro.txt',
|
185
|
+
'../../../openfootball/euro/2024--germany/euro.txt',
|
186
|
+
]
|
187
|
+
else
|
188
|
+
## check for directories
|
189
|
+
## and auto-expand
|
190
|
+
SportDb::Parser::Opts.expand_args( args )
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
|
196
|
+
errors = []
|
197
|
+
linter = RaccLinter.new
|
198
|
+
|
199
|
+
paths.each_with_index do |path,i|
|
200
|
+
|
201
|
+
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
202
|
+
|
203
|
+
linter.read( path )
|
204
|
+
end
|
205
|
+
|
206
|
+
puts "bye"
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
__END__
|
212
|
+
|
213
|
+
if errors.size > 0
|
214
|
+
puts
|
215
|
+
pp errors
|
216
|
+
puts
|
217
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
218
|
+
else
|
219
|
+
puts
|
220
|
+
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
221
|
+
end
|
222
|
+
|
223
|
+
## add errors to rec via rec['errors'] to allow
|
224
|
+
## for further processing/reporting
|
225
|
+
rec['errors'] = errors
|
226
|
+
end
|
227
|
+
|
data/lib/fbtok/linter.rb
CHANGED
@@ -51,7 +51,7 @@ def errors?() @errors.size > 0; end
|
|
51
51
|
#########
|
52
52
|
## parse - false (default) - tokenize (only)
|
53
53
|
## - true - tokenize & parse
|
54
|
-
def read( path
|
54
|
+
def read( path )
|
55
55
|
## note: every (new) read call - resets errors list to empty
|
56
56
|
@errors = []
|
57
57
|
|
@@ -119,13 +119,8 @@ def read( path, parse: false )
|
|
119
119
|
next
|
120
120
|
end
|
121
121
|
|
122
|
-
t, error_messages =
|
123
|
-
|
124
|
-
else
|
125
|
-
@parser.tokenize_with_errors( line )
|
126
|
-
end
|
127
|
-
|
128
|
-
|
122
|
+
t, error_messages = @parser.tokenize_with_errors( line )
|
123
|
+
|
129
124
|
if error_messages.size > 0
|
130
125
|
## add to "global" error list
|
131
126
|
## make a triplet tuple (file / msg / line text)
|
@@ -137,6 +132,26 @@ def read( path, parse: false )
|
|
137
132
|
end
|
138
133
|
end
|
139
134
|
|
135
|
+
## post-process tokens
|
136
|
+
## - check for round, group, etc.
|
137
|
+
t = t.map do |tok|
|
138
|
+
#############
|
139
|
+
## pass 1
|
140
|
+
## replace all texts with keyword matches (e.g. group, round, leg, etc.)
|
141
|
+
if tok[0] == :TEXT
|
142
|
+
text = tok[1]
|
143
|
+
if @parser.is_group?( text )
|
144
|
+
[:GROUP, text]
|
145
|
+
elsif @parser.is_round?( text ) || @parser.is_leg?( text )
|
146
|
+
[:ROUND, text]
|
147
|
+
else
|
148
|
+
tok ## pass through as-is (1:1)
|
149
|
+
end
|
150
|
+
else
|
151
|
+
tok
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
140
155
|
pp t if debug?
|
141
156
|
|
142
157
|
tree << t
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbtok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sportdb-parser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.5.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.5.0
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: sportdb-formats
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -64,6 +78,7 @@ executables:
|
|
64
78
|
- fbchk
|
65
79
|
- fbt
|
66
80
|
- fbtok
|
81
|
+
- fbtree
|
67
82
|
- fbx
|
68
83
|
extensions: []
|
69
84
|
extra_rdoc_files:
|
@@ -78,6 +93,7 @@ files:
|
|
78
93
|
- bin/fbchk
|
79
94
|
- bin/fbt
|
80
95
|
- bin/fbtok
|
96
|
+
- bin/fbtree
|
81
97
|
- bin/fbx
|
82
98
|
- lib/fbtok.rb
|
83
99
|
- lib/fbtok/linter.rb
|