fbtok 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Rakefile +2 -2
- data/bin/fbtok +1 -1
- data/bin/fbtree +5 -166
- data/lib/fbtok/linter.rb +26 -88
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0ce828e29e589e804a281a71a1c1160951b669a6fecd82cc42dc9bebf059dd1
|
4
|
+
data.tar.gz: 42bcc2682928350e3dc4344ea245e1a5da2156743eb3917a293b963ed93123c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3c1dd2f87074bf4d2c063df0032b5068d8c023f190a090492a964d210f7a0eae20e3239a61bc387e1f6f0313a8dc316ff5cd62971b383cbe8690f11f1199e117
|
7
|
+
data.tar.gz: 3020d52799befbda3d2d45a8f2d830e1fcb4e5b8330652fa04b0e46563d9b979b8ba63c82756519d64e1db144d421f70cb78949865f1c3c0d7aaba017b6c57ab
|
data/CHANGELOG.md
CHANGED
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
|
|
2
2
|
|
3
3
|
|
4
4
|
Hoe.spec 'fbtok' do
|
5
|
-
self.version = '0.2.
|
5
|
+
self.version = '0.2.2'
|
6
6
|
|
7
7
|
self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
|
8
8
|
self.description = summary
|
@@ -21,7 +21,7 @@ Hoe.spec 'fbtok' do
|
|
21
21
|
self.extra_deps = [
|
22
22
|
# ['sportdb-structs', '>= 0.5.0'],
|
23
23
|
# ['logutils', '>= 0.6.1'],
|
24
|
-
['sportdb-parser', '>= 0.5.
|
24
|
+
['sportdb-parser', '>= 0.5.4'],
|
25
25
|
['sportdb-formats', '>= 2.1.2'],
|
26
26
|
]
|
27
27
|
|
data/bin/fbtok
CHANGED
@@ -87,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
|
|
87
87
|
|
88
88
|
paths.each_with_index do |path,j|
|
89
89
|
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
90
|
-
linter.read( path )
|
90
|
+
linter.read( path, parse: false ) ## only tokenize (do NOT parse)
|
91
91
|
|
92
92
|
errors += linter.errors if linter.errors?
|
93
93
|
end
|
data/bin/fbtree
CHANGED
@@ -5,150 +5,6 @@
|
|
5
5
|
|
6
6
|
require 'fbtok'
|
7
7
|
|
8
|
-
|
9
|
-
###
|
10
|
-
## note - Linter for now nested inside Parser - keep? why? why not?
|
11
|
-
class RaccLinter
|
12
|
-
|
13
|
-
def self.debug=(value) @@debug = value; end
|
14
|
-
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
15
|
-
def debug?() self.class.debug?; end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
attr_reader :errors
|
20
|
-
|
21
|
-
def initialize
|
22
|
-
@errors = []
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
def errors?() @errors.size > 0; end
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
31
|
-
## make sure mon feb 12 18:10 will not match
|
32
|
-
## allow 1. FC Köln etc.
|
33
|
-
## Mainz 05:
|
34
|
-
## limit to 30 chars max
|
35
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
36
|
-
##
|
37
|
-
## Group A:
|
38
|
-
## Group B: - remove colon
|
39
|
-
## or lookup first
|
40
|
-
|
41
|
-
ATTRIB_RE = %r{^
|
42
|
-
[ ]*? # slurp leading spaces
|
43
|
-
(?<key>[^:|\]\[()\/; -]
|
44
|
-
[^:|\]\[()\/;]{0,30}
|
45
|
-
)
|
46
|
-
[ ]*? # slurp trailing spaces
|
47
|
-
:[ ]+
|
48
|
-
(?<value>.+)
|
49
|
-
[ ]*? # slurp trailing spaces
|
50
|
-
$
|
51
|
-
}ix
|
52
|
-
|
53
|
-
|
54
|
-
#########
|
55
|
-
## parse - false (default) - tokenize (only)
|
56
|
-
## - true - tokenize & parse
|
57
|
-
def read( path )
|
58
|
-
## note: every (new) read call - resets errors list to empty
|
59
|
-
@errors = []
|
60
|
-
|
61
|
-
nodes = SportDb::OutlineReader.read( path )
|
62
|
-
|
63
|
-
## process nodes
|
64
|
-
h1 = nil
|
65
|
-
h2 = nil
|
66
|
-
orphans = 0 ## track paragraphs's with no heading
|
67
|
-
|
68
|
-
attrib_found = false
|
69
|
-
|
70
|
-
|
71
|
-
nodes.each do |node|
|
72
|
-
type = node[0]
|
73
|
-
|
74
|
-
if type == :h1
|
75
|
-
h1 = node[1] ## get heading text
|
76
|
-
puts " = Heading 1 >#{node[1]}<"
|
77
|
-
elsif type == :h2
|
78
|
-
if h1.nil?
|
79
|
-
puts "!! WARN - no heading for subheading; skipping parse"
|
80
|
-
next
|
81
|
-
end
|
82
|
-
h2 = node[1] ## get heading text
|
83
|
-
puts " == Heading 2 >#{node[1]}<"
|
84
|
-
elsif type == :p
|
85
|
-
|
86
|
-
if h1.nil?
|
87
|
-
orphans += 1 ## only warn once
|
88
|
-
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
89
|
-
next
|
90
|
-
end
|
91
|
-
|
92
|
-
lines = node[1]
|
93
|
-
|
94
|
-
txt = []
|
95
|
-
lines.each_with_index do |line,i|
|
96
|
-
|
97
|
-
if debug?
|
98
|
-
puts
|
99
|
-
puts "line >#{line}<"
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
## skip new (experimental attrib syntax)
|
104
|
-
if attrib_found == false &&
|
105
|
-
ATTRIB_RE.match?( line )
|
106
|
-
## note: check attrib regex AFTER group def e.g.:
|
107
|
-
## Group A:
|
108
|
-
## Group B: etc.
|
109
|
-
## todo/fix - change Group A: to Group A etc.
|
110
|
-
## Group B: to Group B
|
111
|
-
attrib_found = true
|
112
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
113
|
-
next
|
114
|
-
end
|
115
|
-
|
116
|
-
if attrib_found
|
117
|
-
## check if line ends with dot
|
118
|
-
## if not slurp up lines to the next do!!!
|
119
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
120
|
-
attrib_found = false if line.end_with?( '.' )
|
121
|
-
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
122
|
-
next
|
123
|
-
end
|
124
|
-
|
125
|
-
txt << line
|
126
|
-
txt << "\n"
|
127
|
-
end
|
128
|
-
|
129
|
-
## flatten
|
130
|
-
txt = txt.join
|
131
|
-
pp txt if debug?
|
132
|
-
|
133
|
-
parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
|
134
|
-
tree = parser.parse
|
135
|
-
pp tree
|
136
|
-
else
|
137
|
-
pp node
|
138
|
-
raise ArgumentError, "unsupported (node) type >#{type}<"
|
139
|
-
end
|
140
|
-
end # each node
|
141
|
-
end # read
|
142
|
-
end # class RaccLinter
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
###############################################
|
148
|
-
# start with code
|
149
|
-
|
150
|
-
|
151
|
-
|
152
8
|
args = ARGV
|
153
9
|
|
154
10
|
|
@@ -192,36 +48,19 @@ p args
|
|
192
48
|
|
193
49
|
|
194
50
|
|
51
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
52
|
+
|
53
|
+
linter = SportDb::Parser::Linter.new
|
195
54
|
|
196
|
-
errors = []
|
197
|
-
linter = RaccLinter.new
|
198
55
|
|
199
56
|
paths.each_with_index do |path,i|
|
200
57
|
|
201
58
|
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
202
59
|
|
203
|
-
linter.read( path )
|
60
|
+
linter.read( path, parse: true )
|
204
61
|
end
|
205
62
|
|
206
|
-
puts "bye"
|
207
|
-
|
208
63
|
|
64
|
+
puts "bye"
|
209
65
|
|
210
66
|
|
211
|
-
__END__
|
212
|
-
|
213
|
-
if errors.size > 0
|
214
|
-
puts
|
215
|
-
pp errors
|
216
|
-
puts
|
217
|
-
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
218
|
-
else
|
219
|
-
puts
|
220
|
-
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
221
|
-
end
|
222
|
-
|
223
|
-
## add errors to rec via rec['errors'] to allow
|
224
|
-
## for further processing/reporting
|
225
|
-
rec['errors'] = errors
|
226
|
-
end
|
227
|
-
|
data/lib/fbtok/linter.rb
CHANGED
@@ -7,7 +7,7 @@ class Parser
|
|
7
7
|
class Linter
|
8
8
|
|
9
9
|
def self.debug=(value) @@debug = value; end
|
10
|
-
def self.debug?()
|
10
|
+
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
11
|
def debug?() self.class.debug?; end
|
12
12
|
|
13
13
|
|
@@ -23,71 +23,37 @@ end
|
|
23
23
|
def errors?() @errors.size > 0; end
|
24
24
|
|
25
25
|
|
26
|
-
|
27
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
28
|
-
## make sure mon feb 12 18:10 will not match
|
29
|
-
## allow 1. FC Köln etc.
|
30
|
-
## Mainz 05:
|
31
|
-
## limit to 30 chars max
|
32
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
33
|
-
##
|
34
|
-
## Group A:
|
35
|
-
## Group B: - remove colon
|
36
|
-
## or lookup first
|
37
|
-
|
38
|
-
ATTRIB_RE = %r{^
|
39
|
-
[ ]*? # slurp leading spaces
|
40
|
-
(?<key>[^:|\]\[()\/; -]
|
41
|
-
[^:|\]\[()\/;]{0,30}
|
42
|
-
)
|
43
|
-
[ ]*? # slurp trailing spaces
|
44
|
-
:[ ]+
|
45
|
-
(?<value>.+)
|
46
|
-
[ ]*? # slurp trailing spaces
|
47
|
-
$
|
48
|
-
}ix
|
49
|
-
|
50
|
-
|
51
26
|
#########
|
52
27
|
## parse - false (default) - tokenize (only)
|
53
28
|
## - true - tokenize & parse
|
54
|
-
def read( path
|
29
|
+
def read( path, parse: true )
|
55
30
|
## note: every (new) read call - resets errors list to empty
|
56
31
|
@errors = []
|
57
32
|
|
58
|
-
|
59
|
-
|
60
|
-
## process nodes
|
61
|
-
h1 = nil
|
62
|
-
h2 = nil
|
63
|
-
orphans = 0 ## track paragraphs's with no heading
|
33
|
+
outline = QuickMatchOutline.read( path )
|
64
34
|
|
65
|
-
|
35
|
+
outline.each_para do |lines|
|
66
36
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
puts " = Heading 1 >#{node[1]}<"
|
74
|
-
elsif type == :h2
|
75
|
-
if h1.nil?
|
76
|
-
puts "!! WARN - no heading for subheading; skipping parse"
|
77
|
-
next
|
78
|
-
end
|
79
|
-
h2 = node[1] ## get heading text
|
80
|
-
puts " == Heading 2 >#{node[1]}<"
|
81
|
-
elsif type == :p
|
82
|
-
|
83
|
-
if h1.nil?
|
84
|
-
orphans += 1 ## only warn once
|
85
|
-
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
86
|
-
next
|
37
|
+
if parse
|
38
|
+
## flatten lines
|
39
|
+
txt = []
|
40
|
+
lines.each_with_index do |line,i|
|
41
|
+
txt << line
|
42
|
+
txt << "\n"
|
87
43
|
end
|
44
|
+
txt = txt.join
|
45
|
+
|
46
|
+
if debug?
|
47
|
+
puts "lines:"
|
48
|
+
pp txt
|
49
|
+
end
|
50
|
+
|
51
|
+
## todo/fix - add/track parse errors!!!!!!
|
52
|
+
parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
|
53
|
+
tree = parser.parse
|
54
|
+
pp tree
|
88
55
|
|
89
|
-
|
90
|
-
|
56
|
+
else ## process for tokenize only
|
91
57
|
tree = []
|
92
58
|
lines.each_with_index do |line,i|
|
93
59
|
|
@@ -96,29 +62,6 @@ def read( path )
|
|
96
62
|
puts "line >#{line}<"
|
97
63
|
end
|
98
64
|
|
99
|
-
|
100
|
-
## skip new (experimental attrib syntax)
|
101
|
-
if attrib_found == false &&
|
102
|
-
ATTRIB_RE.match?( line )
|
103
|
-
## note: check attrib regex AFTER group def e.g.:
|
104
|
-
## Group A:
|
105
|
-
## Group B: etc.
|
106
|
-
## todo/fix - change Group A: to Group A etc.
|
107
|
-
## Group B: to Group B
|
108
|
-
attrib_found = true
|
109
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
110
|
-
next
|
111
|
-
end
|
112
|
-
|
113
|
-
if attrib_found
|
114
|
-
## check if line ends with dot
|
115
|
-
## if not slurp up lines to the next do!!!
|
116
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
117
|
-
attrib_found = false if line.end_with?( '.' )
|
118
|
-
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
119
|
-
next
|
120
|
-
end
|
121
|
-
|
122
65
|
t, error_messages = @parser.tokenize_with_errors( line )
|
123
66
|
|
124
67
|
if error_messages.size > 0
|
@@ -155,15 +98,10 @@ def read( path )
|
|
155
98
|
pp t if debug?
|
156
99
|
|
157
100
|
tree << t
|
158
|
-
end
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
pp node
|
163
|
-
raise ArgumentError, "unsupported (node) type >#{type}<"
|
164
|
-
end
|
165
|
-
end # each node
|
166
|
-
end # read
|
101
|
+
end # each line
|
102
|
+
end # parse? (or tokenize?)
|
103
|
+
end # each para (node)
|
104
|
+
end # method read
|
167
105
|
end # class Linter
|
168
106
|
|
169
107
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbtok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-parser
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.5.
|
19
|
+
version: 0.5.4
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.5.
|
26
|
+
version: 0.5.4
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: sportdb-formats
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|