fbtok 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Rakefile +2 -2
- data/bin/fbtok +1 -1
- data/bin/fbtree +5 -166
- data/lib/fbtok/linter.rb +26 -55
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 191b44113dea92aebe49fb5717146c9cd0038afb448ba34c4dd828e6daeb869e
|
4
|
+
data.tar.gz: 7367ad2ae027d158371ac63fcbbe53271da84f1dabbd4a4f2111ebd658d0c2c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85d8e883d5fbe0654a5188b3c0d46b1be2f694a96a6fe9d70a6b88ce36d7676200165f82e999ef2b77dd2fc6f453da0dfd30b06e8ed4cc0a622e5a20329cb61e
|
7
|
+
data.tar.gz: 947f91db38384711f3fa734f7ef52b51068f01d78b9ecd953ac17caa94afe61e748971341ceafb0bcddf6a120b2d30324d6820ff77a675ded8d494b354614b43
|
data/CHANGELOG.md
CHANGED
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
|
|
2
2
|
|
3
3
|
|
4
4
|
Hoe.spec 'fbtok' do
|
5
|
-
self.version = '0.2.
|
5
|
+
self.version = '0.2.1'
|
6
6
|
|
7
7
|
self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
|
8
8
|
self.description = summary
|
@@ -21,7 +21,7 @@ Hoe.spec 'fbtok' do
|
|
21
21
|
self.extra_deps = [
|
22
22
|
# ['sportdb-structs', '>= 0.5.0'],
|
23
23
|
# ['logutils', '>= 0.6.1'],
|
24
|
-
['sportdb-parser', '>= 0.5.
|
24
|
+
['sportdb-parser', '>= 0.5.1'],
|
25
25
|
['sportdb-formats', '>= 2.1.2'],
|
26
26
|
]
|
27
27
|
|
data/bin/fbtok
CHANGED
@@ -87,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
|
|
87
87
|
|
88
88
|
paths.each_with_index do |path,j|
|
89
89
|
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
90
|
-
linter.read( path )
|
90
|
+
linter.read( path, parse: false ) ## only tokenize (do NOT parse)
|
91
91
|
|
92
92
|
errors += linter.errors if linter.errors?
|
93
93
|
end
|
data/bin/fbtree
CHANGED
@@ -5,150 +5,6 @@
|
|
5
5
|
|
6
6
|
require 'fbtok'
|
7
7
|
|
8
|
-
|
9
|
-
###
|
10
|
-
## note - Linter for now nested inside Parser - keep? why? why not?
|
11
|
-
class RaccLinter
|
12
|
-
|
13
|
-
def self.debug=(value) @@debug = value; end
|
14
|
-
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
15
|
-
def debug?() self.class.debug?; end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
attr_reader :errors
|
20
|
-
|
21
|
-
def initialize
|
22
|
-
@errors = []
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
def errors?() @errors.size > 0; end
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
31
|
-
## make sure mon feb 12 18:10 will not match
|
32
|
-
## allow 1. FC Köln etc.
|
33
|
-
## Mainz 05:
|
34
|
-
## limit to 30 chars max
|
35
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
36
|
-
##
|
37
|
-
## Group A:
|
38
|
-
## Group B: - remove colon
|
39
|
-
## or lookup first
|
40
|
-
|
41
|
-
ATTRIB_RE = %r{^
|
42
|
-
[ ]*? # slurp leading spaces
|
43
|
-
(?<key>[^:|\]\[()\/; -]
|
44
|
-
[^:|\]\[()\/;]{0,30}
|
45
|
-
)
|
46
|
-
[ ]*? # slurp trailing spaces
|
47
|
-
:[ ]+
|
48
|
-
(?<value>.+)
|
49
|
-
[ ]*? # slurp trailing spaces
|
50
|
-
$
|
51
|
-
}ix
|
52
|
-
|
53
|
-
|
54
|
-
#########
|
55
|
-
## parse - false (default) - tokenize (only)
|
56
|
-
## - true - tokenize & parse
|
57
|
-
def read( path )
|
58
|
-
## note: every (new) read call - resets errors list to empty
|
59
|
-
@errors = []
|
60
|
-
|
61
|
-
nodes = SportDb::OutlineReader.read( path )
|
62
|
-
|
63
|
-
## process nodes
|
64
|
-
h1 = nil
|
65
|
-
h2 = nil
|
66
|
-
orphans = 0 ## track paragraphs's with no heading
|
67
|
-
|
68
|
-
attrib_found = false
|
69
|
-
|
70
|
-
|
71
|
-
nodes.each do |node|
|
72
|
-
type = node[0]
|
73
|
-
|
74
|
-
if type == :h1
|
75
|
-
h1 = node[1] ## get heading text
|
76
|
-
puts " = Heading 1 >#{node[1]}<"
|
77
|
-
elsif type == :h2
|
78
|
-
if h1.nil?
|
79
|
-
puts "!! WARN - no heading for subheading; skipping parse"
|
80
|
-
next
|
81
|
-
end
|
82
|
-
h2 = node[1] ## get heading text
|
83
|
-
puts " == Heading 2 >#{node[1]}<"
|
84
|
-
elsif type == :p
|
85
|
-
|
86
|
-
if h1.nil?
|
87
|
-
orphans += 1 ## only warn once
|
88
|
-
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
89
|
-
next
|
90
|
-
end
|
91
|
-
|
92
|
-
lines = node[1]
|
93
|
-
|
94
|
-
txt = []
|
95
|
-
lines.each_with_index do |line,i|
|
96
|
-
|
97
|
-
if debug?
|
98
|
-
puts
|
99
|
-
puts "line >#{line}<"
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
## skip new (experimental attrib syntax)
|
104
|
-
if attrib_found == false &&
|
105
|
-
ATTRIB_RE.match?( line )
|
106
|
-
## note: check attrib regex AFTER group def e.g.:
|
107
|
-
## Group A:
|
108
|
-
## Group B: etc.
|
109
|
-
## todo/fix - change Group A: to Group A etc.
|
110
|
-
## Group B: to Group B
|
111
|
-
attrib_found = true
|
112
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
113
|
-
next
|
114
|
-
end
|
115
|
-
|
116
|
-
if attrib_found
|
117
|
-
## check if line ends with dot
|
118
|
-
## if not slurp up lines to the next do!!!
|
119
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
120
|
-
attrib_found = false if line.end_with?( '.' )
|
121
|
-
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
122
|
-
next
|
123
|
-
end
|
124
|
-
|
125
|
-
txt << line
|
126
|
-
txt << "\n"
|
127
|
-
end
|
128
|
-
|
129
|
-
## flatten
|
130
|
-
txt = txt.join
|
131
|
-
pp txt if debug?
|
132
|
-
|
133
|
-
parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
|
134
|
-
tree = parser.parse
|
135
|
-
pp tree
|
136
|
-
else
|
137
|
-
pp node
|
138
|
-
raise ArgumentError, "unsupported (node) type >#{type}<"
|
139
|
-
end
|
140
|
-
end # each node
|
141
|
-
end # read
|
142
|
-
end # class RaccLinter
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
###############################################
|
148
|
-
# start with code
|
149
|
-
|
150
|
-
|
151
|
-
|
152
8
|
args = ARGV
|
153
9
|
|
154
10
|
|
@@ -192,36 +48,19 @@ p args
|
|
192
48
|
|
193
49
|
|
194
50
|
|
51
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
52
|
+
|
53
|
+
linter = SportDb::Parser::Linter.new
|
195
54
|
|
196
|
-
errors = []
|
197
|
-
linter = RaccLinter.new
|
198
55
|
|
199
56
|
paths.each_with_index do |path,i|
|
200
57
|
|
201
58
|
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
202
59
|
|
203
|
-
linter.read( path )
|
60
|
+
linter.read( path, parse: true )
|
204
61
|
end
|
205
62
|
|
206
|
-
puts "bye"
|
207
|
-
|
208
63
|
|
64
|
+
puts "bye"
|
209
65
|
|
210
66
|
|
211
|
-
__END__
|
212
|
-
|
213
|
-
if errors.size > 0
|
214
|
-
puts
|
215
|
-
pp errors
|
216
|
-
puts
|
217
|
-
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
218
|
-
else
|
219
|
-
puts
|
220
|
-
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
221
|
-
end
|
222
|
-
|
223
|
-
## add errors to rec via rec['errors'] to allow
|
224
|
-
## for further processing/reporting
|
225
|
-
rec['errors'] = errors
|
226
|
-
end
|
227
|
-
|
data/lib/fbtok/linter.rb
CHANGED
@@ -7,7 +7,7 @@ class Parser
|
|
7
7
|
class Linter
|
8
8
|
|
9
9
|
def self.debug=(value) @@debug = value; end
|
10
|
-
def self.debug?()
|
10
|
+
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
11
|
def debug?() self.class.debug?; end
|
12
12
|
|
13
13
|
|
@@ -23,35 +23,10 @@ end
|
|
23
23
|
def errors?() @errors.size > 0; end
|
24
24
|
|
25
25
|
|
26
|
-
|
27
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
28
|
-
## make sure mon feb 12 18:10 will not match
|
29
|
-
## allow 1. FC Köln etc.
|
30
|
-
## Mainz 05:
|
31
|
-
## limit to 30 chars max
|
32
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
33
|
-
##
|
34
|
-
## Group A:
|
35
|
-
## Group B: - remove colon
|
36
|
-
## or lookup first
|
37
|
-
|
38
|
-
ATTRIB_RE = %r{^
|
39
|
-
[ ]*? # slurp leading spaces
|
40
|
-
(?<key>[^:|\]\[()\/; -]
|
41
|
-
[^:|\]\[()\/;]{0,30}
|
42
|
-
)
|
43
|
-
[ ]*? # slurp trailing spaces
|
44
|
-
:[ ]+
|
45
|
-
(?<value>.+)
|
46
|
-
[ ]*? # slurp trailing spaces
|
47
|
-
$
|
48
|
-
}ix
|
49
|
-
|
50
|
-
|
51
26
|
#########
|
52
27
|
## parse - false (default) - tokenize (only)
|
53
28
|
## - true - tokenize & parse
|
54
|
-
def read( path
|
29
|
+
def read( path, parse: true )
|
55
30
|
## note: every (new) read call - resets errors list to empty
|
56
31
|
@errors = []
|
57
32
|
|
@@ -62,9 +37,7 @@ def read( path )
|
|
62
37
|
h2 = nil
|
63
38
|
orphans = 0 ## track paragraphs's with no heading
|
64
39
|
|
65
|
-
|
66
|
-
|
67
|
-
|
40
|
+
|
68
41
|
nodes.each do |node|
|
69
42
|
type = node[0]
|
70
43
|
|
@@ -88,7 +61,29 @@ def read( path )
|
|
88
61
|
|
89
62
|
lines = node[1]
|
90
63
|
|
64
|
+
|
91
65
|
tree = []
|
66
|
+
|
67
|
+
if parse
|
68
|
+
## flatten lines
|
69
|
+
txt = []
|
70
|
+
lines.each_with_index do |line,i|
|
71
|
+
txt << line
|
72
|
+
txt << "\n"
|
73
|
+
end
|
74
|
+
txt = txt.join
|
75
|
+
|
76
|
+
if debug?
|
77
|
+
puts "lines:"
|
78
|
+
pp txt
|
79
|
+
end
|
80
|
+
|
81
|
+
## todo/fix - add/track parse errors!!!!!!
|
82
|
+
parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
|
83
|
+
tree = parser.parse
|
84
|
+
pp tree
|
85
|
+
|
86
|
+
else ## process for tokenize only
|
92
87
|
lines.each_with_index do |line,i|
|
93
88
|
|
94
89
|
if debug?
|
@@ -96,29 +91,6 @@ def read( path )
|
|
96
91
|
puts "line >#{line}<"
|
97
92
|
end
|
98
93
|
|
99
|
-
|
100
|
-
## skip new (experimental attrib syntax)
|
101
|
-
if attrib_found == false &&
|
102
|
-
ATTRIB_RE.match?( line )
|
103
|
-
## note: check attrib regex AFTER group def e.g.:
|
104
|
-
## Group A:
|
105
|
-
## Group B: etc.
|
106
|
-
## todo/fix - change Group A: to Group A etc.
|
107
|
-
## Group B: to Group B
|
108
|
-
attrib_found = true
|
109
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
110
|
-
next
|
111
|
-
end
|
112
|
-
|
113
|
-
if attrib_found
|
114
|
-
## check if line ends with dot
|
115
|
-
## if not slurp up lines to the next do!!!
|
116
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
117
|
-
attrib_found = false if line.end_with?( '.' )
|
118
|
-
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
119
|
-
next
|
120
|
-
end
|
121
|
-
|
122
94
|
t, error_messages = @parser.tokenize_with_errors( line )
|
123
95
|
|
124
96
|
if error_messages.size > 0
|
@@ -156,8 +128,7 @@ def read( path )
|
|
156
128
|
|
157
129
|
tree << t
|
158
130
|
end
|
159
|
-
|
160
|
-
## pp tree
|
131
|
+
end
|
161
132
|
else
|
162
133
|
pp node
|
163
134
|
raise ArgumentError, "unsupported (node) type >#{type}<"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbtok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-parser
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.5.
|
19
|
+
version: 0.5.1
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.5.
|
26
|
+
version: 0.5.1
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: sportdb-formats
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|