fbtok 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -0
- data/Rakefile +2 -2
- data/bin/fbtok +6 -4
- data/bin/fbtree +66 -0
- data/lib/fbtok/linter.rb +48 -62
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 191b44113dea92aebe49fb5717146c9cd0038afb448ba34c4dd828e6daeb869e
|
4
|
+
data.tar.gz: 7367ad2ae027d158371ac63fcbbe53271da84f1dabbd4a4f2111ebd658d0c2c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85d8e883d5fbe0654a5188b3c0d46b1be2f694a96a6fe9d70a6b88ce36d7676200165f82e999ef2b77dd2fc6f453da0dfd30b06e8ed4cc0a622e5a20329cb61e
|
7
|
+
data.tar.gz: 947f91db38384711f3fa734f7ef52b51068f01d78b9ecd953ac17caa94afe61e748971341ceafb0bcddf6a120b2d30324d6820ff77a675ded8d494b354614b43
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
|
|
2
2
|
|
3
3
|
|
4
4
|
Hoe.spec 'fbtok' do
|
5
|
-
self.version = '0.1
|
5
|
+
self.version = '0.2.1'
|
6
6
|
|
7
7
|
self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
|
8
8
|
self.description = summary
|
@@ -19,9 +19,9 @@ Hoe.spec 'fbtok' do
|
|
19
19
|
self.licenses = ['Public Domain']
|
20
20
|
|
21
21
|
self.extra_deps = [
|
22
|
-
# ['sportdb-parser', '>= 0.2.2'],
|
23
22
|
# ['sportdb-structs', '>= 0.5.0'],
|
24
23
|
# ['logutils', '>= 0.6.1'],
|
24
|
+
['sportdb-parser', '>= 0.5.1'],
|
25
25
|
['sportdb-formats', '>= 2.1.2'],
|
26
26
|
]
|
27
27
|
|
data/bin/fbtok
CHANGED
@@ -10,7 +10,6 @@ args = ARGV
|
|
10
10
|
|
11
11
|
opts = {
|
12
12
|
debug: true,
|
13
|
-
metal: false,
|
14
13
|
file: nil,
|
15
14
|
}
|
16
15
|
|
@@ -27,11 +26,14 @@ parser = OptionParser.new do |parser|
|
|
27
26
|
opts[:debug] = true
|
28
27
|
end
|
29
28
|
|
29
|
+
=begin
|
30
30
|
parser.on( "--metal",
|
31
31
|
"turn off typed parse tree; show to the metal tokens"+
|
32
32
|
" (default: #{opts[:metal]})" ) do |metal|
|
33
33
|
opts[:metal] = true
|
34
34
|
end
|
35
|
+
=end
|
36
|
+
|
35
37
|
|
36
38
|
parser.on( "-f FILE", "--file FILE",
|
37
39
|
"read datafiles (pathspecs) via .csv file") do |file|
|
@@ -85,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
|
|
85
87
|
|
86
88
|
paths.each_with_index do |path,j|
|
87
89
|
puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
|
88
|
-
linter.read( path, parse:
|
90
|
+
linter.read( path, parse: false ) ## only tokenize (do NOT parse)
|
89
91
|
|
90
92
|
errors += linter.errors if linter.errors?
|
91
93
|
end
|
@@ -94,10 +96,10 @@ specs.each_with_index do |(paths, rec),i|
|
|
94
96
|
puts
|
95
97
|
pp errors
|
96
98
|
puts
|
97
|
-
puts "!! #{errors.size}
|
99
|
+
puts "!! #{errors.size} tokenize error(s) in #{paths.size} datafiles(s)"
|
98
100
|
else
|
99
101
|
puts
|
100
|
-
puts "OK no
|
102
|
+
puts "OK no tokenize errors found in #{paths.size} datafile(s)"
|
101
103
|
end
|
102
104
|
|
103
105
|
## add errors to rec via rec['errors'] to allow
|
data/bin/fbtree
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/fbtree
|
5
|
+
|
6
|
+
require 'fbtok'
|
7
|
+
|
8
|
+
args = ARGV
|
9
|
+
|
10
|
+
|
11
|
+
opts = {
|
12
|
+
debug: true,
|
13
|
+
}
|
14
|
+
|
15
|
+
parser = OptionParser.new do |parser|
|
16
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
|
17
|
+
|
18
|
+
|
19
|
+
parser.on( "-q", "--quiet",
|
20
|
+
"less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
|
21
|
+
opts[:debug] = false
|
22
|
+
end
|
23
|
+
parser.on( "--verbose", "--debug",
|
24
|
+
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
25
|
+
opts[:debug] = true
|
26
|
+
end
|
27
|
+
end
|
28
|
+
parser.parse!( args )
|
29
|
+
|
30
|
+
puts "OPTS:"
|
31
|
+
p opts
|
32
|
+
puts "ARGV:"
|
33
|
+
p args
|
34
|
+
|
35
|
+
|
36
|
+
## todo/check - use packs or projects or such
|
37
|
+
## instead of specs - why? why not?
|
38
|
+
paths = if args.empty?
|
39
|
+
[
|
40
|
+
'../../../openfootball/euro/2021--europe/euro.txt',
|
41
|
+
'../../../openfootball/euro/2024--germany/euro.txt',
|
42
|
+
]
|
43
|
+
else
|
44
|
+
## check for directories
|
45
|
+
## and auto-expand
|
46
|
+
SportDb::Parser::Opts.expand_args( args )
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
SportDb::Parser::Linter.debug = true if opts[:debug]
|
52
|
+
|
53
|
+
linter = SportDb::Parser::Linter.new
|
54
|
+
|
55
|
+
|
56
|
+
paths.each_with_index do |path,i|
|
57
|
+
|
58
|
+
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
59
|
+
|
60
|
+
linter.read( path, parse: true )
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
puts "bye"
|
65
|
+
|
66
|
+
|
data/lib/fbtok/linter.rb
CHANGED
@@ -7,7 +7,7 @@ class Parser
|
|
7
7
|
class Linter
|
8
8
|
|
9
9
|
def self.debug=(value) @@debug = value; end
|
10
|
-
def self.debug?()
|
10
|
+
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
11
|
def debug?() self.class.debug?; end
|
12
12
|
|
13
13
|
|
@@ -23,35 +23,10 @@ end
|
|
23
23
|
def errors?() @errors.size > 0; end
|
24
24
|
|
25
25
|
|
26
|
-
|
27
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
28
|
-
## make sure mon feb 12 18:10 will not match
|
29
|
-
## allow 1. FC Köln etc.
|
30
|
-
## Mainz 05:
|
31
|
-
## limit to 30 chars max
|
32
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
33
|
-
##
|
34
|
-
## Group A:
|
35
|
-
## Group B: - remove colon
|
36
|
-
## or lookup first
|
37
|
-
|
38
|
-
ATTRIB_RE = %r{^
|
39
|
-
[ ]*? # slurp leading spaces
|
40
|
-
(?<key>[^:|\]\[()\/; -]
|
41
|
-
[^:|\]\[()\/;]{0,30}
|
42
|
-
)
|
43
|
-
[ ]*? # slurp trailing spaces
|
44
|
-
:[ ]+
|
45
|
-
(?<value>.+)
|
46
|
-
[ ]*? # slurp trailing spaces
|
47
|
-
$
|
48
|
-
}ix
|
49
|
-
|
50
|
-
|
51
26
|
#########
|
52
27
|
## parse - false (default) - tokenize (only)
|
53
28
|
## - true - tokenize & parse
|
54
|
-
def read( path, parse:
|
29
|
+
def read( path, parse: true )
|
55
30
|
## note: every (new) read call - resets errors list to empty
|
56
31
|
@errors = []
|
57
32
|
|
@@ -62,9 +37,7 @@ def read( path, parse: false )
|
|
62
37
|
h2 = nil
|
63
38
|
orphans = 0 ## track paragraphs's with no heading
|
64
39
|
|
65
|
-
|
66
|
-
|
67
|
-
|
40
|
+
|
68
41
|
nodes.each do |node|
|
69
42
|
type = node[0]
|
70
43
|
|
@@ -88,7 +61,29 @@ def read( path, parse: false )
|
|
88
61
|
|
89
62
|
lines = node[1]
|
90
63
|
|
64
|
+
|
91
65
|
tree = []
|
66
|
+
|
67
|
+
if parse
|
68
|
+
## flatten lines
|
69
|
+
txt = []
|
70
|
+
lines.each_with_index do |line,i|
|
71
|
+
txt << line
|
72
|
+
txt << "\n"
|
73
|
+
end
|
74
|
+
txt = txt.join
|
75
|
+
|
76
|
+
if debug?
|
77
|
+
puts "lines:"
|
78
|
+
pp txt
|
79
|
+
end
|
80
|
+
|
81
|
+
## todo/fix - add/track parse errors!!!!!!
|
82
|
+
parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
|
83
|
+
tree = parser.parse
|
84
|
+
pp tree
|
85
|
+
|
86
|
+
else ## process for tokenize only
|
92
87
|
lines.each_with_index do |line,i|
|
93
88
|
|
94
89
|
if debug?
|
@@ -96,36 +91,8 @@ def read( path, parse: false )
|
|
96
91
|
puts "line >#{line}<"
|
97
92
|
end
|
98
93
|
|
99
|
-
|
100
|
-
|
101
|
-
if attrib_found == false &&
|
102
|
-
ATTRIB_RE.match?( line )
|
103
|
-
## note: check attrib regex AFTER group def e.g.:
|
104
|
-
## Group A:
|
105
|
-
## Group B: etc.
|
106
|
-
## todo/fix - change Group A: to Group A etc.
|
107
|
-
## Group B: to Group B
|
108
|
-
attrib_found = true
|
109
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
110
|
-
next
|
111
|
-
end
|
112
|
-
|
113
|
-
if attrib_found
|
114
|
-
## check if line ends with dot
|
115
|
-
## if not slurp up lines to the next do!!!
|
116
|
-
## logger.debug "skipping key/value line - >#{line}<"
|
117
|
-
attrib_found = false if line.end_with?( '.' )
|
118
|
-
# logger.debug "skipping key/value line (cont.) - >#{line}<"
|
119
|
-
next
|
120
|
-
end
|
121
|
-
|
122
|
-
t, error_messages = if parse
|
123
|
-
@parser.parse_with_errors( line )
|
124
|
-
else
|
125
|
-
@parser.tokenize_with_errors( line )
|
126
|
-
end
|
127
|
-
|
128
|
-
|
94
|
+
t, error_messages = @parser.tokenize_with_errors( line )
|
95
|
+
|
129
96
|
if error_messages.size > 0
|
130
97
|
## add to "global" error list
|
131
98
|
## make a triplet tuple (file / msg / line text)
|
@@ -137,12 +104,31 @@ def read( path, parse: false )
|
|
137
104
|
end
|
138
105
|
end
|
139
106
|
|
107
|
+
## post-process tokens
|
108
|
+
## - check for round, group, etc.
|
109
|
+
t = t.map do |tok|
|
110
|
+
#############
|
111
|
+
## pass 1
|
112
|
+
## replace all texts with keyword matches (e.g. group, round, leg, etc.)
|
113
|
+
if tok[0] == :TEXT
|
114
|
+
text = tok[1]
|
115
|
+
if @parser.is_group?( text )
|
116
|
+
[:GROUP, text]
|
117
|
+
elsif @parser.is_round?( text ) || @parser.is_leg?( text )
|
118
|
+
[:ROUND, text]
|
119
|
+
else
|
120
|
+
tok ## pass through as-is (1:1)
|
121
|
+
end
|
122
|
+
else
|
123
|
+
tok
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
140
127
|
pp t if debug?
|
141
128
|
|
142
129
|
tree << t
|
143
130
|
end
|
144
|
-
|
145
|
-
## pp tree
|
131
|
+
end
|
146
132
|
else
|
147
133
|
pp node
|
148
134
|
raise ArgumentError, "unsupported (node) type >#{type}<"
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbtok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sportdb-parser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.5.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.5.1
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: sportdb-formats
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -64,6 +78,7 @@ executables:
|
|
64
78
|
- fbchk
|
65
79
|
- fbt
|
66
80
|
- fbtok
|
81
|
+
- fbtree
|
67
82
|
- fbx
|
68
83
|
extensions: []
|
69
84
|
extra_rdoc_files:
|
@@ -78,6 +93,7 @@ files:
|
|
78
93
|
- bin/fbchk
|
79
94
|
- bin/fbt
|
80
95
|
- bin/fbtok
|
96
|
+
- bin/fbtree
|
81
97
|
- bin/fbx
|
82
98
|
- lib/fbtok.rb
|
83
99
|
- lib/fbtok/linter.rb
|