fbtok 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b4ab88883b8f80ee4824cab352297f037e2afcb72351bf6956a8e48bddd5a164
4
- data.tar.gz: 6bd75bea2015124e34b01f506757a54a5537ca0544559a0377b6f0a6a0533c0b
3
+ metadata.gz: 0db5fb0d9cbda9ad626f4debbf27751f591c90684455e2e3cabc66162a02e3c6
4
+ data.tar.gz: cc4d1d160356785e54b99fb7033e078af9edf63cd60a469ab8bb2ad685603b7f
5
5
  SHA512:
6
- metadata.gz: 94b3582db70136fdeadeebe003fefdff304ca923bd76edcc0a40d28731c64e73f372b7c0ecf74e58623358102c32ad51e0a233becee4555d5ccd322552d53d80
7
- data.tar.gz: dca47c3d3e134ed9c103ba873e76e0c4793d690bbaf6ced8a34693e4c2fea3fc1ff43af0d45ebc62738022f02bb02a0e0bcc04c153dd752a49c7a50d62ba6bba
6
+ metadata.gz: d688b40a4c97971eab87594a83ffe7966b245370944ac140950f0e935076d25c4317dfcb526ac0f22470c1027a7bc488812f37f07f6d1d413edb8e7c538c3f2b
7
+ data.tar.gz: beefaf087fe66d2ab3ecaae23cc786184e0558b3dd7bcc44e4e131cf70fca200a3f723060c42a2921d5cbef1f2b6513dc567dc693a7dd03bc2adcd8b7ad3a6e2
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.1.1
1
+ ### 0.2.0
2
2
  ### 0.0.1 / 2025-01-02
3
3
 
4
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -5,6 +5,7 @@ Rakefile
5
5
  bin/fbchk
6
6
  bin/fbt
7
7
  bin/fbtok
8
+ bin/fbtree
8
9
  bin/fbx
9
10
  lib/fbtok.rb
10
11
  lib/fbtok/linter.rb
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
2
2
 
3
3
 
4
4
  Hoe.spec 'fbtok' do
5
- self.version = '0.1.1'
5
+ self.version = '0.2.0'
6
6
 
7
7
  self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
8
8
  self.description = summary
@@ -19,9 +19,9 @@ Hoe.spec 'fbtok' do
19
19
  self.licenses = ['Public Domain']
20
20
 
21
21
  self.extra_deps = [
22
- # ['sportdb-parser', '>= 0.2.2'],
23
22
  # ['sportdb-structs', '>= 0.5.0'],
24
23
  # ['logutils', '>= 0.6.1'],
24
+ ['sportdb-parser', '>= 0.5.0'],
25
25
  ['sportdb-formats', '>= 2.1.2'],
26
26
  ]
27
27
 
data/bin/fbtok CHANGED
@@ -10,7 +10,6 @@ args = ARGV
10
10
 
11
11
  opts = {
12
12
  debug: true,
13
- metal: false,
14
13
  file: nil,
15
14
  }
16
15
 
@@ -27,11 +26,14 @@ parser = OptionParser.new do |parser|
27
26
  opts[:debug] = true
28
27
  end
29
28
 
29
+ =begin
30
30
  parser.on( "--metal",
31
31
  "turn off typed parse tree; show to the metal tokens"+
32
32
  " (default: #{opts[:metal]})" ) do |metal|
33
33
  opts[:metal] = true
34
34
  end
35
+ =end
36
+
35
37
 
36
38
  parser.on( "-f FILE", "--file FILE",
37
39
  "read datafiles (pathspecs) via .csv file") do |file|
@@ -85,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
85
87
 
86
88
  paths.each_with_index do |path,j|
87
89
  puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
88
- linter.read( path, parse: !opts[:metal] )
90
+ linter.read( path )
89
91
 
90
92
  errors += linter.errors if linter.errors?
91
93
  end
@@ -94,10 +96,10 @@ specs.each_with_index do |(paths, rec),i|
94
96
  puts
95
97
  pp errors
96
98
  puts
97
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
99
+ puts "!! #{errors.size} tokenize error(s) in #{paths.size} datafiles(s)"
98
100
  else
99
101
  puts
100
- puts "OK no parse errors found in #{paths.size} datafile(s)"
102
+ puts "OK no tokenize errors found in #{paths.size} datafile(s)"
101
103
  end
102
104
 
103
105
  ## add errors to rec via rec['errors'] to allow
data/bin/fbtree ADDED
@@ -0,0 +1,227 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ## tip: to test run:
4
+ ## ruby -I ./lib bin/fbtree
5
+
6
+ require 'fbtok'
7
+
8
+
9
+ ###
10
+ ## note - Linter for now nested inside Parser - keep? why? why not?
11
+ class RaccLinter
12
+
13
+ def self.debug=(value) @@debug = value; end
14
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
15
+ def debug?() self.class.debug?; end
16
+
17
+
18
+
19
+ attr_reader :errors
20
+
21
+ def initialize
22
+ @errors = []
23
+ end
24
+
25
+
26
+ def errors?() @errors.size > 0; end
27
+
28
+
29
+
30
+ ## note: colon (:) MUST be followed by one (or more) spaces
31
+ ## make sure mon feb 12 18:10 will not match
32
+ ## allow 1. FC Köln etc.
33
+ ## Mainz 05:
34
+ ## limit to 30 chars max
35
+ ## only allow chars incl. intl buut (NOT ()[]/;)
36
+ ##
37
+ ## Group A:
38
+ ## Group B: - remove colon
39
+ ## or lookup first
40
+
41
+ ATTRIB_RE = %r{^
42
+ [ ]*? # slurp leading spaces
43
+ (?<key>[^:|\]\[()\/; -]
44
+ [^:|\]\[()\/;]{0,30}
45
+ )
46
+ [ ]*? # slurp trailing spaces
47
+ :[ ]+
48
+ (?<value>.+)
49
+ [ ]*? # slurp trailing spaces
50
+ $
51
+ }ix
52
+
53
+
54
+ #########
55
+ ## parse - false (default) - tokenize (only)
56
+ ## - true - tokenize & parse
57
+ def read( path )
58
+ ## note: every (new) read call - resets errors list to empty
59
+ @errors = []
60
+
61
+ nodes = SportDb::OutlineReader.read( path )
62
+
63
+ ## process nodes
64
+ h1 = nil
65
+ h2 = nil
66
+ orphans = 0 ## track paragraphs's with no heading
67
+
68
+ attrib_found = false
69
+
70
+
71
+ nodes.each do |node|
72
+ type = node[0]
73
+
74
+ if type == :h1
75
+ h1 = node[1] ## get heading text
76
+ puts " = Heading 1 >#{node[1]}<"
77
+ elsif type == :h2
78
+ if h1.nil?
79
+ puts "!! WARN - no heading for subheading; skipping parse"
80
+ next
81
+ end
82
+ h2 = node[1] ## get heading text
83
+ puts " == Heading 2 >#{node[1]}<"
84
+ elsif type == :p
85
+
86
+ if h1.nil?
87
+ orphans += 1 ## only warn once
88
+ puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
89
+ next
90
+ end
91
+
92
+ lines = node[1]
93
+
94
+ txt = []
95
+ lines.each_with_index do |line,i|
96
+
97
+ if debug?
98
+ puts
99
+ puts "line >#{line}<"
100
+ end
101
+
102
+
103
+ ## skip new (experimental attrib syntax)
104
+ if attrib_found == false &&
105
+ ATTRIB_RE.match?( line )
106
+ ## note: check attrib regex AFTER group def e.g.:
107
+ ## Group A:
108
+ ## Group B: etc.
109
+ ## todo/fix - change Group A: to Group A etc.
110
+ ## Group B: to Group B
111
+ attrib_found = true
112
+ ## logger.debug "skipping key/value line - >#{line}<"
113
+ next
114
+ end
115
+
116
+ if attrib_found
117
+ ## check if line ends with dot
118
+ ## if not slurp up lines to the next do!!!
119
+ ## logger.debug "skipping key/value line - >#{line}<"
120
+ attrib_found = false if line.end_with?( '.' )
121
+ # logger.debug "skipping key/value line (cont.) - >#{line}<"
122
+ next
123
+ end
124
+
125
+ txt << line
126
+ txt << "\n"
127
+ end
128
+
129
+ ## flatten
130
+ txt = txt.join
131
+ pp txt if debug?
132
+
133
+ parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
134
+ tree = parser.parse
135
+ pp tree
136
+ else
137
+ pp node
138
+ raise ArgumentError, "unsupported (node) type >#{type}<"
139
+ end
140
+ end # each node
141
+ end # read
142
+ end # class RaccLinter
143
+
144
+
145
+
146
+
147
+ ###############################################
148
+ # start with code
149
+
150
+
151
+
152
+ args = ARGV
153
+
154
+
155
+ opts = {
156
+ debug: true,
157
+ }
158
+
159
+ parser = OptionParser.new do |parser|
160
+ parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
161
+
162
+
163
+ parser.on( "-q", "--quiet",
164
+ "less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
165
+ opts[:debug] = false
166
+ end
167
+ parser.on( "--verbose", "--debug",
168
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
169
+ opts[:debug] = true
170
+ end
171
+ end
172
+ parser.parse!( args )
173
+
174
+ puts "OPTS:"
175
+ p opts
176
+ puts "ARGV:"
177
+ p args
178
+
179
+
180
+ ## todo/check - use packs or projects or such
181
+ ## instead of specs - why? why not?
182
+ paths = if args.empty?
183
+ [
184
+ '../../../openfootball/euro/2021--europe/euro.txt',
185
+ '../../../openfootball/euro/2024--germany/euro.txt',
186
+ ]
187
+ else
188
+ ## check for directories
189
+ ## and auto-expand
190
+ SportDb::Parser::Opts.expand_args( args )
191
+ end
192
+
193
+
194
+
195
+
196
+ errors = []
197
+ linter = RaccLinter.new
198
+
199
+ paths.each_with_index do |path,i|
200
+
201
+ puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
202
+
203
+ linter.read( path )
204
+ end
205
+
206
+ puts "bye"
207
+
208
+
209
+
210
+
211
+ __END__
212
+
213
+ if errors.size > 0
214
+ puts
215
+ pp errors
216
+ puts
217
+ puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
218
+ else
219
+ puts
220
+ puts "OK no parse errors found in #{paths.size} datafile(s)"
221
+ end
222
+
223
+ ## add errors to rec via rec['errors'] to allow
224
+ ## for further processing/reporting
225
+ rec['errors'] = errors
226
+ end
227
+
data/bin/fbx CHANGED
@@ -88,12 +88,12 @@ paths.each_with_index do |path,i|
88
88
  ## pp secs
89
89
 
90
90
  secs.each_with_index do |sec,j| ## sec(tion)s
91
- season = sec[:season]
91
+ season = Season.parse( sec[:season] ) ## convert (str) to season obj!!!
92
92
  league = sec[:league]
93
93
  stage = sec[:stage]
94
94
  lines = sec[:lines]
95
95
 
96
- puts " section #{j+1}/#{secs.size} - #{league.name} #{season}, #{stage} - #{lines.size} line(s)"
96
+ puts " section #{j+1}/#{secs.size} - #{league} #{season}, #{stage} - #{lines.size} line(s)"
97
97
 
98
98
  next if opts[:outline]
99
99
 
data/lib/fbtok/linter.rb CHANGED
@@ -51,7 +51,7 @@ def errors?() @errors.size > 0; end
51
51
  #########
52
52
  ## parse - false (default) - tokenize (only)
53
53
  ## - true - tokenize & parse
54
- def read( path, parse: false )
54
+ def read( path )
55
55
  ## note: every (new) read call - resets errors list to empty
56
56
  @errors = []
57
57
 
@@ -119,13 +119,8 @@ def read( path, parse: false )
119
119
  next
120
120
  end
121
121
 
122
- t, error_messages = if parse
123
- @parser.parse_with_errors( line )
124
- else
125
- @parser.tokenize_with_errors( line )
126
- end
127
-
128
-
122
+ t, error_messages = @parser.tokenize_with_errors( line )
123
+
129
124
  if error_messages.size > 0
130
125
  ## add to "global" error list
131
126
  ## make a triplet tuple (file / msg / line text)
@@ -137,6 +132,26 @@ def read( path, parse: false )
137
132
  end
138
133
  end
139
134
 
135
+ ## post-process tokens
136
+ ## - check for round, group, etc.
137
+ t = t.map do |tok|
138
+ #############
139
+ ## pass 1
140
+ ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
141
+ if tok[0] == :TEXT
142
+ text = tok[1]
143
+ if @parser.is_group?( text )
144
+ [:GROUP, text]
145
+ elsif @parser.is_round?( text ) || @parser.is_leg?( text )
146
+ [:ROUND, text]
147
+ else
148
+ tok ## pass through as-is (1:1)
149
+ end
150
+ else
151
+ tok
152
+ end
153
+ end
154
+
140
155
  pp t if debug?
141
156
 
142
157
  tree << t
data/lib/fbtok/opts.rb CHANGED
@@ -18,11 +18,12 @@ class Opts
18
18
  ## note: if pattern includes directory add here
19
19
  ## (otherwise move to more "generic" datafile) - why? why not?
20
20
  ## update - note include/allow dot (.) too
21
+ ## BUT NOT as first character!!! (e.g. exclude .confg.txt !!!)
21
22
  ## e.g. 2024-25/at.1.txt
22
23
  ## change to at_1 or uefa_cl or such - why? why not?
23
24
  MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
24
25
  #{SEASON}
25
- /[a-z0-9_.-]+\.txt$ ## txt e.g /1-premierleague.txt
26
+ /[a-z0-9][a-z0-9_.-]*\.txt$ ## txt e.g /1-premierleague.txt
26
27
  }x
27
28
 
28
29
 
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbtok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-02 00:00:00.000000000 Z
11
+ date: 2025-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sportdb-parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.5.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.5.0
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: sportdb-formats
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -64,6 +78,7 @@ executables:
64
78
  - fbchk
65
79
  - fbt
66
80
  - fbtok
81
+ - fbtree
67
82
  - fbx
68
83
  extensions: []
69
84
  extra_rdoc_files:
@@ -78,6 +93,7 @@ files:
78
93
  - bin/fbchk
79
94
  - bin/fbt
80
95
  - bin/fbtok
96
+ - bin/fbtree
81
97
  - bin/fbx
82
98
  - lib/fbtok.rb
83
99
  - lib/fbtok/linter.rb