fbtok 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: efa16f747083c94f0a8fbb8df7ac3d1718ad2c204fca2008b711da35a7adf5ac
4
- data.tar.gz: da8dfd82204875f23fc9efa236a1af79701e83047c5c3bf1fc34b4032371db51
3
+ metadata.gz: 191b44113dea92aebe49fb5717146c9cd0038afb448ba34c4dd828e6daeb869e
4
+ data.tar.gz: 7367ad2ae027d158371ac63fcbbe53271da84f1dabbd4a4f2111ebd658d0c2c7
5
5
  SHA512:
6
- metadata.gz: 29635412c9fb671cc254fa735dbf994289a961d02b92ca2480e80f4021d7f0d5b154f3b6d00c10cf50b1bce6f950ab624cbb1865d1273595a9401ad9c66a9ad4
7
- data.tar.gz: b81ed7ecffc27725bef9dbb0f9b448eac302b649e89b7ac7da68c556fff4b14a4243dfe736145210b4cabc3a4b1b30d6633ab7a4d9fcd9f4e8082fc88522b275
6
+ metadata.gz: 85d8e883d5fbe0654a5188b3c0d46b1be2f694a96a6fe9d70a6b88ce36d7676200165f82e999ef2b77dd2fc6f453da0dfd30b06e8ed4cc0a622e5a20329cb61e
7
+ data.tar.gz: 947f91db38384711f3fa734f7ef52b51068f01d78b9ecd953ac17caa94afe61e748971341ceafb0bcddf6a120b2d30324d6820ff77a675ded8d494b354614b43
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.1.2
1
+ ### 0.2.1
2
2
  ### 0.0.1 / 2025-01-02
3
3
 
4
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -5,6 +5,7 @@ Rakefile
5
5
  bin/fbchk
6
6
  bin/fbt
7
7
  bin/fbtok
8
+ bin/fbtree
8
9
  bin/fbx
9
10
  lib/fbtok.rb
10
11
  lib/fbtok/linter.rb
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
2
2
 
3
3
 
4
4
  Hoe.spec 'fbtok' do
5
- self.version = '0.1.2'
5
+ self.version = '0.2.1'
6
6
 
7
7
  self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
8
8
  self.description = summary
@@ -19,9 +19,9 @@ Hoe.spec 'fbtok' do
19
19
  self.licenses = ['Public Domain']
20
20
 
21
21
  self.extra_deps = [
22
- # ['sportdb-parser', '>= 0.2.2'],
23
22
  # ['sportdb-structs', '>= 0.5.0'],
24
23
  # ['logutils', '>= 0.6.1'],
24
+ ['sportdb-parser', '>= 0.5.1'],
25
25
  ['sportdb-formats', '>= 2.1.2'],
26
26
  ]
27
27
 
data/bin/fbtok CHANGED
@@ -10,7 +10,6 @@ args = ARGV
10
10
 
11
11
  opts = {
12
12
  debug: true,
13
- metal: false,
14
13
  file: nil,
15
14
  }
16
15
 
@@ -27,11 +26,14 @@ parser = OptionParser.new do |parser|
27
26
  opts[:debug] = true
28
27
  end
29
28
 
29
+ =begin
30
30
  parser.on( "--metal",
31
31
  "turn off typed parse tree; show to the metal tokens"+
32
32
  " (default: #{opts[:metal]})" ) do |metal|
33
33
  opts[:metal] = true
34
34
  end
35
+ =end
36
+
35
37
 
36
38
  parser.on( "-f FILE", "--file FILE",
37
39
  "read datafiles (pathspecs) via .csv file") do |file|
@@ -85,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
85
87
 
86
88
  paths.each_with_index do |path,j|
87
89
  puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
88
- linter.read( path, parse: !opts[:metal] )
90
+ linter.read( path, parse: false ) ## only tokenize (do NOT parse)
89
91
 
90
92
  errors += linter.errors if linter.errors?
91
93
  end
@@ -94,10 +96,10 @@ specs.each_with_index do |(paths, rec),i|
94
96
  puts
95
97
  pp errors
96
98
  puts
97
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
99
+ puts "!! #{errors.size} tokenize error(s) in #{paths.size} datafiles(s)"
98
100
  else
99
101
  puts
100
- puts "OK no parse errors found in #{paths.size} datafile(s)"
102
+ puts "OK no tokenize errors found in #{paths.size} datafile(s)"
101
103
  end
102
104
 
103
105
  ## add errors to rec via rec['errors'] to allow
data/bin/fbtree ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ## tip: to test run:
4
+ ## ruby -I ./lib bin/fbtree
5
+
6
+ require 'fbtok'
7
+
8
+ args = ARGV
9
+
10
+
11
+ opts = {
12
+ debug: true,
13
+ }
14
+
15
+ parser = OptionParser.new do |parser|
16
+ parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
17
+
18
+
19
+ parser.on( "-q", "--quiet",
20
+ "less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
21
+ opts[:debug] = false
22
+ end
23
+ parser.on( "--verbose", "--debug",
24
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
25
+ opts[:debug] = true
26
+ end
27
+ end
28
+ parser.parse!( args )
29
+
30
+ puts "OPTS:"
31
+ p opts
32
+ puts "ARGV:"
33
+ p args
34
+
35
+
36
+ ## todo/check - use packs or projects or such
37
+ ## instead of specs - why? why not?
38
+ paths = if args.empty?
39
+ [
40
+ '../../../openfootball/euro/2021--europe/euro.txt',
41
+ '../../../openfootball/euro/2024--germany/euro.txt',
42
+ ]
43
+ else
44
+ ## check for directories
45
+ ## and auto-expand
46
+ SportDb::Parser::Opts.expand_args( args )
47
+ end
48
+
49
+
50
+
51
+ SportDb::Parser::Linter.debug = true if opts[:debug]
52
+
53
+ linter = SportDb::Parser::Linter.new
54
+
55
+
56
+ paths.each_with_index do |path,i|
57
+
58
+ puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
59
+
60
+ linter.read( path, parse: true )
61
+ end
62
+
63
+
64
+ puts "bye"
65
+
66
+
data/lib/fbtok/linter.rb CHANGED
@@ -7,7 +7,7 @@ class Parser
7
7
  class Linter
8
8
 
9
9
  def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
10
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
11
  def debug?() self.class.debug?; end
12
12
 
13
13
 
@@ -23,35 +23,10 @@ end
23
23
  def errors?() @errors.size > 0; end
24
24
 
25
25
 
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
26
  #########
52
27
  ## parse - false (default) - tokenize (only)
53
28
  ## - true - tokenize & parse
54
- def read( path, parse: false )
29
+ def read( path, parse: true )
55
30
  ## note: every (new) read call - resets errors list to empty
56
31
  @errors = []
57
32
 
@@ -62,9 +37,7 @@ def read( path, parse: false )
62
37
  h2 = nil
63
38
  orphans = 0 ## track paragraphs's with no heading
64
39
 
65
- attrib_found = false
66
-
67
-
40
+
68
41
  nodes.each do |node|
69
42
  type = node[0]
70
43
 
@@ -88,7 +61,29 @@ def read( path, parse: false )
88
61
 
89
62
  lines = node[1]
90
63
 
64
+
91
65
  tree = []
66
+
67
+ if parse
68
+ ## flatten lines
69
+ txt = []
70
+ lines.each_with_index do |line,i|
71
+ txt << line
72
+ txt << "\n"
73
+ end
74
+ txt = txt.join
75
+
76
+ if debug?
77
+ puts "lines:"
78
+ pp txt
79
+ end
80
+
81
+ ## todo/fix - add/track parse errors!!!!!!
82
+ parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
83
+ tree = parser.parse
84
+ pp tree
85
+
86
+ else ## process for tokenize only
92
87
  lines.each_with_index do |line,i|
93
88
 
94
89
  if debug?
@@ -96,36 +91,8 @@ def read( path, parse: false )
96
91
  puts "line >#{line}<"
97
92
  end
98
93
 
99
-
100
- ## skip new (experimental attrib syntax)
101
- if attrib_found == false &&
102
- ATTRIB_RE.match?( line )
103
- ## note: check attrib regex AFTER group def e.g.:
104
- ## Group A:
105
- ## Group B: etc.
106
- ## todo/fix - change Group A: to Group A etc.
107
- ## Group B: to Group B
108
- attrib_found = true
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- next
111
- end
112
-
113
- if attrib_found
114
- ## check if line ends with dot
115
- ## if not slurp up lines to the next do!!!
116
- ## logger.debug "skipping key/value line - >#{line}<"
117
- attrib_found = false if line.end_with?( '.' )
118
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
119
- next
120
- end
121
-
122
- t, error_messages = if parse
123
- @parser.parse_with_errors( line )
124
- else
125
- @parser.tokenize_with_errors( line )
126
- end
127
-
128
-
94
+ t, error_messages = @parser.tokenize_with_errors( line )
95
+
129
96
  if error_messages.size > 0
130
97
  ## add to "global" error list
131
98
  ## make a triplet tuple (file / msg / line text)
@@ -137,12 +104,31 @@ def read( path, parse: false )
137
104
  end
138
105
  end
139
106
 
107
+ ## post-process tokens
108
+ ## - check for round, group, etc.
109
+ t = t.map do |tok|
110
+ #############
111
+ ## pass 1
112
+ ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
113
+ if tok[0] == :TEXT
114
+ text = tok[1]
115
+ if @parser.is_group?( text )
116
+ [:GROUP, text]
117
+ elsif @parser.is_round?( text ) || @parser.is_leg?( text )
118
+ [:ROUND, text]
119
+ else
120
+ tok ## pass through as-is (1:1)
121
+ end
122
+ else
123
+ tok
124
+ end
125
+ end
126
+
140
127
  pp t if debug?
141
128
 
142
129
  tree << t
143
130
  end
144
-
145
- ## pp tree
131
+ end
146
132
  else
147
133
  pp node
148
134
  raise ArgumentError, "unsupported (node) type >#{type}<"
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbtok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-02 00:00:00.000000000 Z
11
+ date: 2025-01-17 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sportdb-parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.5.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.5.1
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: sportdb-formats
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -64,6 +78,7 @@ executables:
64
78
  - fbchk
65
79
  - fbt
66
80
  - fbtok
81
+ - fbtree
67
82
  - fbx
68
83
  extensions: []
69
84
  extra_rdoc_files:
@@ -78,6 +93,7 @@ files:
78
93
  - bin/fbchk
79
94
  - bin/fbt
80
95
  - bin/fbtok
96
+ - bin/fbtree
81
97
  - bin/fbx
82
98
  - lib/fbtok.rb
83
99
  - lib/fbtok/linter.rb