fbtok 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0db5fb0d9cbda9ad626f4debbf27751f591c90684455e2e3cabc66162a02e3c6
4
- data.tar.gz: cc4d1d160356785e54b99fb7033e078af9edf63cd60a469ab8bb2ad685603b7f
3
+ metadata.gz: 191b44113dea92aebe49fb5717146c9cd0038afb448ba34c4dd828e6daeb869e
4
+ data.tar.gz: 7367ad2ae027d158371ac63fcbbe53271da84f1dabbd4a4f2111ebd658d0c2c7
5
5
  SHA512:
6
- metadata.gz: d688b40a4c97971eab87594a83ffe7966b245370944ac140950f0e935076d25c4317dfcb526ac0f22470c1027a7bc488812f37f07f6d1d413edb8e7c538c3f2b
7
- data.tar.gz: beefaf087fe66d2ab3ecaae23cc786184e0558b3dd7bcc44e4e131cf70fca200a3f723060c42a2921d5cbef1f2b6513dc567dc693a7dd03bc2adcd8b7ad3a6e2
6
+ metadata.gz: 85d8e883d5fbe0654a5188b3c0d46b1be2f694a96a6fe9d70a6b88ce36d7676200165f82e999ef2b77dd2fc6f453da0dfd30b06e8ed4cc0a622e5a20329cb61e
7
+ data.tar.gz: 947f91db38384711f3fa734f7ef52b51068f01d78b9ecd953ac17caa94afe61e748971341ceafb0bcddf6a120b2d30324d6820ff77a675ded8d494b354614b43
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.0
1
+ ### 0.2.1
2
2
  ### 0.0.1 / 2025-01-02
3
3
 
4
4
  * Everything is new. First release.
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
2
2
 
3
3
 
4
4
  Hoe.spec 'fbtok' do
5
- self.version = '0.2.0'
5
+ self.version = '0.2.1'
6
6
 
7
7
  self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
8
8
  self.description = summary
@@ -21,7 +21,7 @@ Hoe.spec 'fbtok' do
21
21
  self.extra_deps = [
22
22
  # ['sportdb-structs', '>= 0.5.0'],
23
23
  # ['logutils', '>= 0.6.1'],
24
- ['sportdb-parser', '>= 0.5.0'],
24
+ ['sportdb-parser', '>= 0.5.1'],
25
25
  ['sportdb-formats', '>= 2.1.2'],
26
26
  ]
27
27
 
data/bin/fbtok CHANGED
@@ -87,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
87
87
 
88
88
  paths.each_with_index do |path,j|
89
89
  puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
90
- linter.read( path )
90
+ linter.read( path, parse: false ) ## only tokenize (do NOT parse)
91
91
 
92
92
  errors += linter.errors if linter.errors?
93
93
  end
data/bin/fbtree CHANGED
@@ -5,150 +5,6 @@
5
5
 
6
6
  require 'fbtok'
7
7
 
8
-
9
- ###
10
- ## note - Linter for now nested inside Parser - keep? why? why not?
11
- class RaccLinter
12
-
13
- def self.debug=(value) @@debug = value; end
14
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
15
- def debug?() self.class.debug?; end
16
-
17
-
18
-
19
- attr_reader :errors
20
-
21
- def initialize
22
- @errors = []
23
- end
24
-
25
-
26
- def errors?() @errors.size > 0; end
27
-
28
-
29
-
30
- ## note: colon (:) MUST be followed by one (or more) spaces
31
- ## make sure mon feb 12 18:10 will not match
32
- ## allow 1. FC Köln etc.
33
- ## Mainz 05:
34
- ## limit to 30 chars max
35
- ## only allow chars incl. intl buut (NOT ()[]/;)
36
- ##
37
- ## Group A:
38
- ## Group B: - remove colon
39
- ## or lookup first
40
-
41
- ATTRIB_RE = %r{^
42
- [ ]*? # slurp leading spaces
43
- (?<key>[^:|\]\[()\/; -]
44
- [^:|\]\[()\/;]{0,30}
45
- )
46
- [ ]*? # slurp trailing spaces
47
- :[ ]+
48
- (?<value>.+)
49
- [ ]*? # slurp trailing spaces
50
- $
51
- }ix
52
-
53
-
54
- #########
55
- ## parse - false (default) - tokenize (only)
56
- ## - true - tokenize & parse
57
- def read( path )
58
- ## note: every (new) read call - resets errors list to empty
59
- @errors = []
60
-
61
- nodes = SportDb::OutlineReader.read( path )
62
-
63
- ## process nodes
64
- h1 = nil
65
- h2 = nil
66
- orphans = 0 ## track paragraphs's with no heading
67
-
68
- attrib_found = false
69
-
70
-
71
- nodes.each do |node|
72
- type = node[0]
73
-
74
- if type == :h1
75
- h1 = node[1] ## get heading text
76
- puts " = Heading 1 >#{node[1]}<"
77
- elsif type == :h2
78
- if h1.nil?
79
- puts "!! WARN - no heading for subheading; skipping parse"
80
- next
81
- end
82
- h2 = node[1] ## get heading text
83
- puts " == Heading 2 >#{node[1]}<"
84
- elsif type == :p
85
-
86
- if h1.nil?
87
- orphans += 1 ## only warn once
88
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
89
- next
90
- end
91
-
92
- lines = node[1]
93
-
94
- txt = []
95
- lines.each_with_index do |line,i|
96
-
97
- if debug?
98
- puts
99
- puts "line >#{line}<"
100
- end
101
-
102
-
103
- ## skip new (experimental attrib syntax)
104
- if attrib_found == false &&
105
- ATTRIB_RE.match?( line )
106
- ## note: check attrib regex AFTER group def e.g.:
107
- ## Group A:
108
- ## Group B: etc.
109
- ## todo/fix - change Group A: to Group A etc.
110
- ## Group B: to Group B
111
- attrib_found = true
112
- ## logger.debug "skipping key/value line - >#{line}<"
113
- next
114
- end
115
-
116
- if attrib_found
117
- ## check if line ends with dot
118
- ## if not slurp up lines to the next do!!!
119
- ## logger.debug "skipping key/value line - >#{line}<"
120
- attrib_found = false if line.end_with?( '.' )
121
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
122
- next
123
- end
124
-
125
- txt << line
126
- txt << "\n"
127
- end
128
-
129
- ## flatten
130
- txt = txt.join
131
- pp txt if debug?
132
-
133
- parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
134
- tree = parser.parse
135
- pp tree
136
- else
137
- pp node
138
- raise ArgumentError, "unsupported (node) type >#{type}<"
139
- end
140
- end # each node
141
- end # read
142
- end # class RaccLinter
143
-
144
-
145
-
146
-
147
- ###############################################
148
- # start with code
149
-
150
-
151
-
152
8
  args = ARGV
153
9
 
154
10
 
@@ -192,36 +48,19 @@ p args
192
48
 
193
49
 
194
50
 
51
+ SportDb::Parser::Linter.debug = true if opts[:debug]
52
+
53
+ linter = SportDb::Parser::Linter.new
195
54
 
196
- errors = []
197
- linter = RaccLinter.new
198
55
 
199
56
  paths.each_with_index do |path,i|
200
57
 
201
58
  puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
202
59
 
203
- linter.read( path )
60
+ linter.read( path, parse: true )
204
61
  end
205
62
 
206
- puts "bye"
207
-
208
63
 
64
+ puts "bye"
209
65
 
210
66
 
211
- __END__
212
-
213
- if errors.size > 0
214
- puts
215
- pp errors
216
- puts
217
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
218
- else
219
- puts
220
- puts "OK no parse errors found in #{paths.size} datafile(s)"
221
- end
222
-
223
- ## add errors to rec via rec['errors'] to allow
224
- ## for further processing/reporting
225
- rec['errors'] = errors
226
- end
227
-
data/lib/fbtok/linter.rb CHANGED
@@ -7,7 +7,7 @@ class Parser
7
7
  class Linter
8
8
 
9
9
  def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
10
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
11
  def debug?() self.class.debug?; end
12
12
 
13
13
 
@@ -23,35 +23,10 @@ end
23
23
  def errors?() @errors.size > 0; end
24
24
 
25
25
 
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
26
  #########
52
27
  ## parse - false (default) - tokenize (only)
53
28
  ## - true - tokenize & parse
54
- def read( path )
29
+ def read( path, parse: true )
55
30
  ## note: every (new) read call - resets errors list to empty
56
31
  @errors = []
57
32
 
@@ -62,9 +37,7 @@ def read( path )
62
37
  h2 = nil
63
38
  orphans = 0 ## track paragraphs's with no heading
64
39
 
65
- attrib_found = false
66
-
67
-
40
+
68
41
  nodes.each do |node|
69
42
  type = node[0]
70
43
 
@@ -88,7 +61,29 @@ def read( path )
88
61
 
89
62
  lines = node[1]
90
63
 
64
+
91
65
  tree = []
66
+
67
+ if parse
68
+ ## flatten lines
69
+ txt = []
70
+ lines.each_with_index do |line,i|
71
+ txt << line
72
+ txt << "\n"
73
+ end
74
+ txt = txt.join
75
+
76
+ if debug?
77
+ puts "lines:"
78
+ pp txt
79
+ end
80
+
81
+ ## todo/fix - add/track parse errors!!!!!!
82
+ parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
83
+ tree = parser.parse
84
+ pp tree
85
+
86
+ else ## process for tokenize only
92
87
  lines.each_with_index do |line,i|
93
88
 
94
89
  if debug?
@@ -96,29 +91,6 @@ def read( path )
96
91
  puts "line >#{line}<"
97
92
  end
98
93
 
99
-
100
- ## skip new (experimental attrib syntax)
101
- if attrib_found == false &&
102
- ATTRIB_RE.match?( line )
103
- ## note: check attrib regex AFTER group def e.g.:
104
- ## Group A:
105
- ## Group B: etc.
106
- ## todo/fix - change Group A: to Group A etc.
107
- ## Group B: to Group B
108
- attrib_found = true
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- next
111
- end
112
-
113
- if attrib_found
114
- ## check if line ends with dot
115
- ## if not slurp up lines to the next do!!!
116
- ## logger.debug "skipping key/value line - >#{line}<"
117
- attrib_found = false if line.end_with?( '.' )
118
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
119
- next
120
- end
121
-
122
94
  t, error_messages = @parser.tokenize_with_errors( line )
123
95
 
124
96
  if error_messages.size > 0
@@ -156,8 +128,7 @@ def read( path )
156
128
 
157
129
  tree << t
158
130
  end
159
-
160
- ## pp tree
131
+ end
161
132
  else
162
133
  pp node
163
134
  raise ArgumentError, "unsupported (node) type >#{type}<"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbtok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-15 00:00:00.000000000 Z
11
+ date: 2025-01-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-parser
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.5.0
19
+ version: 0.5.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.5.0
26
+ version: 0.5.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sportdb-formats
29
29
  requirement: !ruby/object:Gem::Requirement