fbtok 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b4ab88883b8f80ee4824cab352297f037e2afcb72351bf6956a8e48bddd5a164
4
- data.tar.gz: 6bd75bea2015124e34b01f506757a54a5537ca0544559a0377b6f0a6a0533c0b
3
+ metadata.gz: 0db5fb0d9cbda9ad626f4debbf27751f591c90684455e2e3cabc66162a02e3c6
4
+ data.tar.gz: cc4d1d160356785e54b99fb7033e078af9edf63cd60a469ab8bb2ad685603b7f
5
5
  SHA512:
6
- metadata.gz: 94b3582db70136fdeadeebe003fefdff304ca923bd76edcc0a40d28731c64e73f372b7c0ecf74e58623358102c32ad51e0a233becee4555d5ccd322552d53d80
7
- data.tar.gz: dca47c3d3e134ed9c103ba873e76e0c4793d690bbaf6ced8a34693e4c2fea3fc1ff43af0d45ebc62738022f02bb02a0e0bcc04c153dd752a49c7a50d62ba6bba
6
+ metadata.gz: d688b40a4c97971eab87594a83ffe7966b245370944ac140950f0e935076d25c4317dfcb526ac0f22470c1027a7bc488812f37f07f6d1d413edb8e7c538c3f2b
7
+ data.tar.gz: beefaf087fe66d2ab3ecaae23cc786184e0558b3dd7bcc44e4e131cf70fca200a3f723060c42a2921d5cbef1f2b6513dc567dc693a7dd03bc2adcd8b7ad3a6e2
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.1.1
1
+ ### 0.2.0
2
2
  ### 0.0.1 / 2025-01-02
3
3
 
4
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -5,6 +5,7 @@ Rakefile
5
5
  bin/fbchk
6
6
  bin/fbt
7
7
  bin/fbtok
8
+ bin/fbtree
8
9
  bin/fbx
9
10
  lib/fbtok.rb
10
11
  lib/fbtok/linter.rb
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
2
2
 
3
3
 
4
4
  Hoe.spec 'fbtok' do
5
- self.version = '0.1.1'
5
+ self.version = '0.2.0'
6
6
 
7
7
  self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
8
8
  self.description = summary
@@ -19,9 +19,9 @@ Hoe.spec 'fbtok' do
19
19
  self.licenses = ['Public Domain']
20
20
 
21
21
  self.extra_deps = [
22
- # ['sportdb-parser', '>= 0.2.2'],
23
22
  # ['sportdb-structs', '>= 0.5.0'],
24
23
  # ['logutils', '>= 0.6.1'],
24
+ ['sportdb-parser', '>= 0.5.0'],
25
25
  ['sportdb-formats', '>= 2.1.2'],
26
26
  ]
27
27
 
data/bin/fbtok CHANGED
@@ -10,7 +10,6 @@ args = ARGV
10
10
 
11
11
  opts = {
12
12
  debug: true,
13
- metal: false,
14
13
  file: nil,
15
14
  }
16
15
 
@@ -27,11 +26,14 @@ parser = OptionParser.new do |parser|
27
26
  opts[:debug] = true
28
27
  end
29
28
 
29
+ =begin
30
30
  parser.on( "--metal",
31
31
  "turn off typed parse tree; show to the metal tokens"+
32
32
  " (default: #{opts[:metal]})" ) do |metal|
33
33
  opts[:metal] = true
34
34
  end
35
+ =end
36
+
35
37
 
36
38
  parser.on( "-f FILE", "--file FILE",
37
39
  "read datafiles (pathspecs) via .csv file") do |file|
@@ -85,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
85
87
 
86
88
  paths.each_with_index do |path,j|
87
89
  puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
88
- linter.read( path, parse: !opts[:metal] )
90
+ linter.read( path )
89
91
 
90
92
  errors += linter.errors if linter.errors?
91
93
  end
@@ -94,10 +96,10 @@ specs.each_with_index do |(paths, rec),i|
94
96
  puts
95
97
  pp errors
96
98
  puts
97
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
99
+ puts "!! #{errors.size} tokenize error(s) in #{paths.size} datafiles(s)"
98
100
  else
99
101
  puts
100
- puts "OK no parse errors found in #{paths.size} datafile(s)"
102
+ puts "OK no tokenize errors found in #{paths.size} datafile(s)"
101
103
  end
102
104
 
103
105
  ## add errors to rec via rec['errors'] to allow
data/bin/fbtree ADDED
@@ -0,0 +1,227 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ## tip: to test run:
4
+ ## ruby -I ./lib bin/fbtree
5
+
6
+ require 'fbtok'
7
+
8
+
9
+ ###
10
+ ## note - Linter for now nested inside Parser - keep? why? why not?
11
+ class RaccLinter
12
+
13
+ def self.debug=(value) @@debug = value; end
14
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
15
+ def debug?() self.class.debug?; end
16
+
17
+
18
+
19
+ attr_reader :errors
20
+
21
+ def initialize
22
+ @errors = []
23
+ end
24
+
25
+
26
+ def errors?() @errors.size > 0; end
27
+
28
+
29
+
30
+ ## note: colon (:) MUST be followed by one (or more) spaces
31
+ ## make sure mon feb 12 18:10 will not match
32
+ ## allow 1. FC Köln etc.
33
+ ## Mainz 05:
34
+ ## limit to 30 chars max
35
+ ## only allow chars incl. intl buut (NOT ()[]/;)
36
+ ##
37
+ ## Group A:
38
+ ## Group B: - remove colon
39
+ ## or lookup first
40
+
41
+ ATTRIB_RE = %r{^
42
+ [ ]*? # slurp leading spaces
43
+ (?<key>[^:|\]\[()\/; -]
44
+ [^:|\]\[()\/;]{0,30}
45
+ )
46
+ [ ]*? # slurp trailing spaces
47
+ :[ ]+
48
+ (?<value>.+)
49
+ [ ]*? # slurp trailing spaces
50
+ $
51
+ }ix
52
+
53
+
54
+ #########
55
+ ## parse - false (default) - tokenize (only)
56
+ ## - true - tokenize & parse
57
+ def read( path )
58
+ ## note: every (new) read call - resets errors list to empty
59
+ @errors = []
60
+
61
+ nodes = SportDb::OutlineReader.read( path )
62
+
63
+ ## process nodes
64
+ h1 = nil
65
+ h2 = nil
66
+ orphans = 0 ## track paragraphs's with no heading
67
+
68
+ attrib_found = false
69
+
70
+
71
+ nodes.each do |node|
72
+ type = node[0]
73
+
74
+ if type == :h1
75
+ h1 = node[1] ## get heading text
76
+ puts " = Heading 1 >#{node[1]}<"
77
+ elsif type == :h2
78
+ if h1.nil?
79
+ puts "!! WARN - no heading for subheading; skipping parse"
80
+ next
81
+ end
82
+ h2 = node[1] ## get heading text
83
+ puts " == Heading 2 >#{node[1]}<"
84
+ elsif type == :p
85
+
86
+ if h1.nil?
87
+ orphans += 1 ## only warn once
88
+ puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
89
+ next
90
+ end
91
+
92
+ lines = node[1]
93
+
94
+ txt = []
95
+ lines.each_with_index do |line,i|
96
+
97
+ if debug?
98
+ puts
99
+ puts "line >#{line}<"
100
+ end
101
+
102
+
103
+ ## skip new (experimental attrib syntax)
104
+ if attrib_found == false &&
105
+ ATTRIB_RE.match?( line )
106
+ ## note: check attrib regex AFTER group def e.g.:
107
+ ## Group A:
108
+ ## Group B: etc.
109
+ ## todo/fix - change Group A: to Group A etc.
110
+ ## Group B: to Group B
111
+ attrib_found = true
112
+ ## logger.debug "skipping key/value line - >#{line}<"
113
+ next
114
+ end
115
+
116
+ if attrib_found
117
+ ## check if line ends with dot
118
+ ## if not slurp up lines to the next do!!!
119
+ ## logger.debug "skipping key/value line - >#{line}<"
120
+ attrib_found = false if line.end_with?( '.' )
121
+ # logger.debug "skipping key/value line (cont.) - >#{line}<"
122
+ next
123
+ end
124
+
125
+ txt << line
126
+ txt << "\n"
127
+ end
128
+
129
+ ## flatten
130
+ txt = txt.join
131
+ pp txt if debug?
132
+
133
+ parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
134
+ tree = parser.parse
135
+ pp tree
136
+ else
137
+ pp node
138
+ raise ArgumentError, "unsupported (node) type >#{type}<"
139
+ end
140
+ end # each node
141
+ end # read
142
+ end # class RaccLinter
143
+
144
+
145
+
146
+
147
+ ###############################################
148
+ # start with code
149
+
150
+
151
+
152
+ args = ARGV
153
+
154
+
155
+ opts = {
156
+ debug: true,
157
+ }
158
+
159
+ parser = OptionParser.new do |parser|
160
+ parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
161
+
162
+
163
+ parser.on( "-q", "--quiet",
164
+ "less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
165
+ opts[:debug] = false
166
+ end
167
+ parser.on( "--verbose", "--debug",
168
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
169
+ opts[:debug] = true
170
+ end
171
+ end
172
+ parser.parse!( args )
173
+
174
+ puts "OPTS:"
175
+ p opts
176
+ puts "ARGV:"
177
+ p args
178
+
179
+
180
+ ## todo/check - use packs or projects or such
181
+ ## instead of specs - why? why not?
182
+ paths = if args.empty?
183
+ [
184
+ '../../../openfootball/euro/2021--europe/euro.txt',
185
+ '../../../openfootball/euro/2024--germany/euro.txt',
186
+ ]
187
+ else
188
+ ## check for directories
189
+ ## and auto-expand
190
+ SportDb::Parser::Opts.expand_args( args )
191
+ end
192
+
193
+
194
+
195
+
196
+ errors = []
197
+ linter = RaccLinter.new
198
+
199
+ paths.each_with_index do |path,i|
200
+
201
+ puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
202
+
203
+ linter.read( path )
204
+ end
205
+
206
+ puts "bye"
207
+
208
+
209
+
210
+
211
+ __END__
212
+
213
+ if errors.size > 0
214
+ puts
215
+ pp errors
216
+ puts
217
+ puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
218
+ else
219
+ puts
220
+ puts "OK no parse errors found in #{paths.size} datafile(s)"
221
+ end
222
+
223
+ ## add errors to rec via rec['errors'] to allow
224
+ ## for further processing/reporting
225
+ rec['errors'] = errors
226
+ end
227
+
data/bin/fbx CHANGED
@@ -88,12 +88,12 @@ paths.each_with_index do |path,i|
88
88
  ## pp secs
89
89
 
90
90
  secs.each_with_index do |sec,j| ## sec(tion)s
91
- season = sec[:season]
91
+ season = Season.parse( sec[:season] ) ## convert (str) to season obj!!!
92
92
  league = sec[:league]
93
93
  stage = sec[:stage]
94
94
  lines = sec[:lines]
95
95
 
96
- puts " section #{j+1}/#{secs.size} - #{league.name} #{season}, #{stage} - #{lines.size} line(s)"
96
+ puts " section #{j+1}/#{secs.size} - #{league} #{season}, #{stage} - #{lines.size} line(s)"
97
97
 
98
98
  next if opts[:outline]
99
99
 
data/lib/fbtok/linter.rb CHANGED
@@ -51,7 +51,7 @@ def errors?() @errors.size > 0; end
51
51
  #########
52
52
  ## parse - false (default) - tokenize (only)
53
53
  ## - true - tokenize & parse
54
- def read( path, parse: false )
54
+ def read( path )
55
55
  ## note: every (new) read call - resets errors list to empty
56
56
  @errors = []
57
57
 
@@ -119,13 +119,8 @@ def read( path, parse: false )
119
119
  next
120
120
  end
121
121
 
122
- t, error_messages = if parse
123
- @parser.parse_with_errors( line )
124
- else
125
- @parser.tokenize_with_errors( line )
126
- end
127
-
128
-
122
+ t, error_messages = @parser.tokenize_with_errors( line )
123
+
129
124
  if error_messages.size > 0
130
125
  ## add to "global" error list
131
126
  ## make a triplet tuple (file / msg / line text)
@@ -137,6 +132,26 @@ def read( path, parse: false )
137
132
  end
138
133
  end
139
134
 
135
+ ## post-process tokens
136
+ ## - check for round, group, etc.
137
+ t = t.map do |tok|
138
+ #############
139
+ ## pass 1
140
+ ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
141
+ if tok[0] == :TEXT
142
+ text = tok[1]
143
+ if @parser.is_group?( text )
144
+ [:GROUP, text]
145
+ elsif @parser.is_round?( text ) || @parser.is_leg?( text )
146
+ [:ROUND, text]
147
+ else
148
+ tok ## pass through as-is (1:1)
149
+ end
150
+ else
151
+ tok
152
+ end
153
+ end
154
+
140
155
  pp t if debug?
141
156
 
142
157
  tree << t
data/lib/fbtok/opts.rb CHANGED
@@ -18,11 +18,12 @@ class Opts
18
18
  ## note: if pattern includes directory add here
19
19
  ## (otherwise move to more "generic" datafile) - why? why not?
20
20
  ## update - note include/allow dot (.) too
21
+ ## BUT NOT as first character!!! (e.g. exclude .confg.txt !!!)
21
22
  ## e.g. 2024-25/at.1.txt
22
23
  ## change to at_1 or uefa_cl or such - why? why not?
23
24
  MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
24
25
  #{SEASON}
25
- /[a-z0-9_.-]+\.txt$ ## txt e.g /1-premierleague.txt
26
+ /[a-z0-9][a-z0-9_.-]*\.txt$ ## txt e.g /1-premierleague.txt
26
27
  }x
27
28
 
28
29
 
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbtok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-02 00:00:00.000000000 Z
11
+ date: 2025-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sportdb-parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.5.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.5.0
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: sportdb-formats
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -64,6 +78,7 @@ executables:
64
78
  - fbchk
65
79
  - fbt
66
80
  - fbtok
81
+ - fbtree
67
82
  - fbx
68
83
  extensions: []
69
84
  extra_rdoc_files:
@@ -78,6 +93,7 @@ files:
78
93
  - bin/fbchk
79
94
  - bin/fbt
80
95
  - bin/fbtok
96
+ - bin/fbtree
81
97
  - bin/fbx
82
98
  - lib/fbtok.rb
83
99
  - lib/fbtok/linter.rb