fbtok 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0db5fb0d9cbda9ad626f4debbf27751f591c90684455e2e3cabc66162a02e3c6
4
- data.tar.gz: cc4d1d160356785e54b99fb7033e078af9edf63cd60a469ab8bb2ad685603b7f
3
+ metadata.gz: a0ce828e29e589e804a281a71a1c1160951b669a6fecd82cc42dc9bebf059dd1
4
+ data.tar.gz: 42bcc2682928350e3dc4344ea245e1a5da2156743eb3917a293b963ed93123c5
5
5
  SHA512:
6
- metadata.gz: d688b40a4c97971eab87594a83ffe7966b245370944ac140950f0e935076d25c4317dfcb526ac0f22470c1027a7bc488812f37f07f6d1d413edb8e7c538c3f2b
7
- data.tar.gz: beefaf087fe66d2ab3ecaae23cc786184e0558b3dd7bcc44e4e131cf70fca200a3f723060c42a2921d5cbef1f2b6513dc567dc693a7dd03bc2adcd8b7ad3a6e2
6
+ metadata.gz: 3c1dd2f87074bf4d2c063df0032b5068d8c023f190a090492a964d210f7a0eae20e3239a61bc387e1f6f0313a8dc316ff5cd62971b383cbe8690f11f1199e117
7
+ data.tar.gz: 3020d52799befbda3d2d45a8f2d830e1fcb4e5b8330652fa04b0e46563d9b979b8ba63c82756519d64e1db144d421f70cb78949865f1c3c0d7aaba017b6c57ab
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.0
1
+ ### 0.2.2
2
2
  ### 0.0.1 / 2025-01-02
3
3
 
4
4
  * Everything is new. First release.
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
2
2
 
3
3
 
4
4
  Hoe.spec 'fbtok' do
5
- self.version = '0.2.0'
5
+ self.version = '0.2.2'
6
6
 
7
7
  self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
8
8
  self.description = summary
@@ -21,7 +21,7 @@ Hoe.spec 'fbtok' do
21
21
  self.extra_deps = [
22
22
  # ['sportdb-structs', '>= 0.5.0'],
23
23
  # ['logutils', '>= 0.6.1'],
24
- ['sportdb-parser', '>= 0.5.0'],
24
+ ['sportdb-parser', '>= 0.5.4'],
25
25
  ['sportdb-formats', '>= 2.1.2'],
26
26
  ]
27
27
 
data/bin/fbtok CHANGED
@@ -87,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
87
87
 
88
88
  paths.each_with_index do |path,j|
89
89
  puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
90
- linter.read( path )
90
+ linter.read( path, parse: false ) ## only tokenize (do NOT parse)
91
91
 
92
92
  errors += linter.errors if linter.errors?
93
93
  end
data/bin/fbtree CHANGED
@@ -5,150 +5,6 @@
5
5
 
6
6
  require 'fbtok'
7
7
 
8
-
9
- ###
10
- ## note - Linter for now nested inside Parser - keep? why? why not?
11
- class RaccLinter
12
-
13
- def self.debug=(value) @@debug = value; end
14
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
15
- def debug?() self.class.debug?; end
16
-
17
-
18
-
19
- attr_reader :errors
20
-
21
- def initialize
22
- @errors = []
23
- end
24
-
25
-
26
- def errors?() @errors.size > 0; end
27
-
28
-
29
-
30
- ## note: colon (:) MUST be followed by one (or more) spaces
31
- ## make sure mon feb 12 18:10 will not match
32
- ## allow 1. FC Köln etc.
33
- ## Mainz 05:
34
- ## limit to 30 chars max
35
- ## only allow chars incl. intl buut (NOT ()[]/;)
36
- ##
37
- ## Group A:
38
- ## Group B: - remove colon
39
- ## or lookup first
40
-
41
- ATTRIB_RE = %r{^
42
- [ ]*? # slurp leading spaces
43
- (?<key>[^:|\]\[()\/; -]
44
- [^:|\]\[()\/;]{0,30}
45
- )
46
- [ ]*? # slurp trailing spaces
47
- :[ ]+
48
- (?<value>.+)
49
- [ ]*? # slurp trailing spaces
50
- $
51
- }ix
52
-
53
-
54
- #########
55
- ## parse - false (default) - tokenize (only)
56
- ## - true - tokenize & parse
57
- def read( path )
58
- ## note: every (new) read call - resets errors list to empty
59
- @errors = []
60
-
61
- nodes = SportDb::OutlineReader.read( path )
62
-
63
- ## process nodes
64
- h1 = nil
65
- h2 = nil
66
- orphans = 0 ## track paragraphs's with no heading
67
-
68
- attrib_found = false
69
-
70
-
71
- nodes.each do |node|
72
- type = node[0]
73
-
74
- if type == :h1
75
- h1 = node[1] ## get heading text
76
- puts " = Heading 1 >#{node[1]}<"
77
- elsif type == :h2
78
- if h1.nil?
79
- puts "!! WARN - no heading for subheading; skipping parse"
80
- next
81
- end
82
- h2 = node[1] ## get heading text
83
- puts " == Heading 2 >#{node[1]}<"
84
- elsif type == :p
85
-
86
- if h1.nil?
87
- orphans += 1 ## only warn once
88
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
89
- next
90
- end
91
-
92
- lines = node[1]
93
-
94
- txt = []
95
- lines.each_with_index do |line,i|
96
-
97
- if debug?
98
- puts
99
- puts "line >#{line}<"
100
- end
101
-
102
-
103
- ## skip new (experimental attrib syntax)
104
- if attrib_found == false &&
105
- ATTRIB_RE.match?( line )
106
- ## note: check attrib regex AFTER group def e.g.:
107
- ## Group A:
108
- ## Group B: etc.
109
- ## todo/fix - change Group A: to Group A etc.
110
- ## Group B: to Group B
111
- attrib_found = true
112
- ## logger.debug "skipping key/value line - >#{line}<"
113
- next
114
- end
115
-
116
- if attrib_found
117
- ## check if line ends with dot
118
- ## if not slurp up lines to the next do!!!
119
- ## logger.debug "skipping key/value line - >#{line}<"
120
- attrib_found = false if line.end_with?( '.' )
121
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
122
- next
123
- end
124
-
125
- txt << line
126
- txt << "\n"
127
- end
128
-
129
- ## flatten
130
- txt = txt.join
131
- pp txt if debug?
132
-
133
- parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
134
- tree = parser.parse
135
- pp tree
136
- else
137
- pp node
138
- raise ArgumentError, "unsupported (node) type >#{type}<"
139
- end
140
- end # each node
141
- end # read
142
- end # class RaccLinter
143
-
144
-
145
-
146
-
147
- ###############################################
148
- # start with code
149
-
150
-
151
-
152
8
  args = ARGV
153
9
 
154
10
 
@@ -192,36 +48,19 @@ p args
192
48
 
193
49
 
194
50
 
51
+ SportDb::Parser::Linter.debug = true if opts[:debug]
52
+
53
+ linter = SportDb::Parser::Linter.new
195
54
 
196
- errors = []
197
- linter = RaccLinter.new
198
55
 
199
56
  paths.each_with_index do |path,i|
200
57
 
201
58
  puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
202
59
 
203
- linter.read( path )
60
+ linter.read( path, parse: true )
204
61
  end
205
62
 
206
- puts "bye"
207
-
208
63
 
64
+ puts "bye"
209
65
 
210
66
 
211
- __END__
212
-
213
- if errors.size > 0
214
- puts
215
- pp errors
216
- puts
217
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
218
- else
219
- puts
220
- puts "OK no parse errors found in #{paths.size} datafile(s)"
221
- end
222
-
223
- ## add errors to rec via rec['errors'] to allow
224
- ## for further processing/reporting
225
- rec['errors'] = errors
226
- end
227
-
data/lib/fbtok/linter.rb CHANGED
@@ -7,7 +7,7 @@ class Parser
7
7
  class Linter
8
8
 
9
9
  def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
10
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
11
  def debug?() self.class.debug?; end
12
12
 
13
13
 
@@ -23,71 +23,37 @@ end
23
23
  def errors?() @errors.size > 0; end
24
24
 
25
25
 
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
26
  #########
52
27
  ## parse - false (default) - tokenize (only)
53
28
  ## - true - tokenize & parse
54
- def read( path )
29
+ def read( path, parse: true )
55
30
  ## note: every (new) read call - resets errors list to empty
56
31
  @errors = []
57
32
 
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- h2 = nil
63
- orphans = 0 ## track paragraphs's with no heading
33
+ outline = QuickMatchOutline.read( path )
64
34
 
65
- attrib_found = false
35
+ outline.each_para do |lines|
66
36
 
67
-
68
- nodes.each do |node|
69
- type = node[0]
70
-
71
- if type == :h1
72
- h1 = node[1] ## get heading text
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :h2
75
- if h1.nil?
76
- puts "!! WARN - no heading for subheading; skipping parse"
77
- next
78
- end
79
- h2 = node[1] ## get heading text
80
- puts " == Heading 2 >#{node[1]}<"
81
- elsif type == :p
82
-
83
- if h1.nil?
84
- orphans += 1 ## only warn once
85
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
86
- next
37
+ if parse
38
+ ## flatten lines
39
+ txt = []
40
+ lines.each_with_index do |line,i|
41
+ txt << line
42
+ txt << "\n"
87
43
  end
44
+ txt = txt.join
45
+
46
+ if debug?
47
+ puts "lines:"
48
+ pp txt
49
+ end
50
+
51
+ ## todo/fix - add/track parse errors!!!!!!
52
+ parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
53
+ tree = parser.parse
54
+ pp tree
88
55
 
89
- lines = node[1]
90
-
56
+ else ## process for tokenize only
91
57
  tree = []
92
58
  lines.each_with_index do |line,i|
93
59
 
@@ -96,29 +62,6 @@ def read( path )
96
62
  puts "line >#{line}<"
97
63
  end
98
64
 
99
-
100
- ## skip new (experimental attrib syntax)
101
- if attrib_found == false &&
102
- ATTRIB_RE.match?( line )
103
- ## note: check attrib regex AFTER group def e.g.:
104
- ## Group A:
105
- ## Group B: etc.
106
- ## todo/fix - change Group A: to Group A etc.
107
- ## Group B: to Group B
108
- attrib_found = true
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- next
111
- end
112
-
113
- if attrib_found
114
- ## check if line ends with dot
115
- ## if not slurp up lines to the next do!!!
116
- ## logger.debug "skipping key/value line - >#{line}<"
117
- attrib_found = false if line.end_with?( '.' )
118
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
119
- next
120
- end
121
-
122
65
  t, error_messages = @parser.tokenize_with_errors( line )
123
66
 
124
67
  if error_messages.size > 0
@@ -155,15 +98,10 @@ def read( path )
155
98
  pp t if debug?
156
99
 
157
100
  tree << t
158
- end
159
-
160
- ## pp tree
161
- else
162
- pp node
163
- raise ArgumentError, "unsupported (node) type >#{type}<"
164
- end
165
- end # each node
166
- end # read
101
+ end # each line
102
+ end # parse? (or tokenize?)
103
+ end # each para (node)
104
+ end # method read
167
105
  end # class Linter
168
106
 
169
107
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbtok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-15 00:00:00.000000000 Z
11
+ date: 2025-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-parser
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.5.0
19
+ version: 0.5.4
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.5.0
26
+ version: 0.5.4
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sportdb-formats
29
29
  requirement: !ruby/object:Gem::Requirement