fbtok 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0db5fb0d9cbda9ad626f4debbf27751f591c90684455e2e3cabc66162a02e3c6
4
- data.tar.gz: cc4d1d160356785e54b99fb7033e078af9edf63cd60a469ab8bb2ad685603b7f
3
+ metadata.gz: 191b44113dea92aebe49fb5717146c9cd0038afb448ba34c4dd828e6daeb869e
4
+ data.tar.gz: 7367ad2ae027d158371ac63fcbbe53271da84f1dabbd4a4f2111ebd658d0c2c7
5
5
  SHA512:
6
- metadata.gz: d688b40a4c97971eab87594a83ffe7966b245370944ac140950f0e935076d25c4317dfcb526ac0f22470c1027a7bc488812f37f07f6d1d413edb8e7c538c3f2b
7
- data.tar.gz: beefaf087fe66d2ab3ecaae23cc786184e0558b3dd7bcc44e4e131cf70fca200a3f723060c42a2921d5cbef1f2b6513dc567dc693a7dd03bc2adcd8b7ad3a6e2
6
+ metadata.gz: 85d8e883d5fbe0654a5188b3c0d46b1be2f694a96a6fe9d70a6b88ce36d7676200165f82e999ef2b77dd2fc6f453da0dfd30b06e8ed4cc0a622e5a20329cb61e
7
+ data.tar.gz: 947f91db38384711f3fa734f7ef52b51068f01d78b9ecd953ac17caa94afe61e748971341ceafb0bcddf6a120b2d30324d6820ff77a675ded8d494b354614b43
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.0
1
+ ### 0.2.1
2
2
  ### 0.0.1 / 2025-01-02
3
3
 
4
4
  * Everything is new. First release.
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'hoe'
2
2
 
3
3
 
4
4
  Hoe.spec 'fbtok' do
5
- self.version = '0.2.0'
5
+ self.version = '0.2.1'
6
6
 
7
7
  self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
8
8
  self.description = summary
@@ -21,7 +21,7 @@ Hoe.spec 'fbtok' do
21
21
  self.extra_deps = [
22
22
  # ['sportdb-structs', '>= 0.5.0'],
23
23
  # ['logutils', '>= 0.6.1'],
24
- ['sportdb-parser', '>= 0.5.0'],
24
+ ['sportdb-parser', '>= 0.5.1'],
25
25
  ['sportdb-formats', '>= 2.1.2'],
26
26
  ]
27
27
 
data/bin/fbtok CHANGED
@@ -87,7 +87,7 @@ specs.each_with_index do |(paths, rec),i|
87
87
 
88
88
  paths.each_with_index do |path,j|
89
89
  puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
90
- linter.read( path )
90
+ linter.read( path, parse: false ) ## only tokenize (do NOT parse)
91
91
 
92
92
  errors += linter.errors if linter.errors?
93
93
  end
data/bin/fbtree CHANGED
@@ -5,150 +5,6 @@
5
5
 
6
6
  require 'fbtok'
7
7
 
8
-
9
- ###
10
- ## note - Linter for now nested inside Parser - keep? why? why not?
11
- class RaccLinter
12
-
13
- def self.debug=(value) @@debug = value; end
14
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
15
- def debug?() self.class.debug?; end
16
-
17
-
18
-
19
- attr_reader :errors
20
-
21
- def initialize
22
- @errors = []
23
- end
24
-
25
-
26
- def errors?() @errors.size > 0; end
27
-
28
-
29
-
30
- ## note: colon (:) MUST be followed by one (or more) spaces
31
- ## make sure mon feb 12 18:10 will not match
32
- ## allow 1. FC Köln etc.
33
- ## Mainz 05:
34
- ## limit to 30 chars max
35
- ## only allow chars incl. intl buut (NOT ()[]/;)
36
- ##
37
- ## Group A:
38
- ## Group B: - remove colon
39
- ## or lookup first
40
-
41
- ATTRIB_RE = %r{^
42
- [ ]*? # slurp leading spaces
43
- (?<key>[^:|\]\[()\/; -]
44
- [^:|\]\[()\/;]{0,30}
45
- )
46
- [ ]*? # slurp trailing spaces
47
- :[ ]+
48
- (?<value>.+)
49
- [ ]*? # slurp trailing spaces
50
- $
51
- }ix
52
-
53
-
54
- #########
55
- ## parse - false (default) - tokenize (only)
56
- ## - true - tokenize & parse
57
- def read( path )
58
- ## note: every (new) read call - resets errors list to empty
59
- @errors = []
60
-
61
- nodes = SportDb::OutlineReader.read( path )
62
-
63
- ## process nodes
64
- h1 = nil
65
- h2 = nil
66
- orphans = 0 ## track paragraphs's with no heading
67
-
68
- attrib_found = false
69
-
70
-
71
- nodes.each do |node|
72
- type = node[0]
73
-
74
- if type == :h1
75
- h1 = node[1] ## get heading text
76
- puts " = Heading 1 >#{node[1]}<"
77
- elsif type == :h2
78
- if h1.nil?
79
- puts "!! WARN - no heading for subheading; skipping parse"
80
- next
81
- end
82
- h2 = node[1] ## get heading text
83
- puts " == Heading 2 >#{node[1]}<"
84
- elsif type == :p
85
-
86
- if h1.nil?
87
- orphans += 1 ## only warn once
88
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
89
- next
90
- end
91
-
92
- lines = node[1]
93
-
94
- txt = []
95
- lines.each_with_index do |line,i|
96
-
97
- if debug?
98
- puts
99
- puts "line >#{line}<"
100
- end
101
-
102
-
103
- ## skip new (experimental attrib syntax)
104
- if attrib_found == false &&
105
- ATTRIB_RE.match?( line )
106
- ## note: check attrib regex AFTER group def e.g.:
107
- ## Group A:
108
- ## Group B: etc.
109
- ## todo/fix - change Group A: to Group A etc.
110
- ## Group B: to Group B
111
- attrib_found = true
112
- ## logger.debug "skipping key/value line - >#{line}<"
113
- next
114
- end
115
-
116
- if attrib_found
117
- ## check if line ends with dot
118
- ## if not slurp up lines to the next do!!!
119
- ## logger.debug "skipping key/value line - >#{line}<"
120
- attrib_found = false if line.end_with?( '.' )
121
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
122
- next
123
- end
124
-
125
- txt << line
126
- txt << "\n"
127
- end
128
-
129
- ## flatten
130
- txt = txt.join
131
- pp txt if debug?
132
-
133
- parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
134
- tree = parser.parse
135
- pp tree
136
- else
137
- pp node
138
- raise ArgumentError, "unsupported (node) type >#{type}<"
139
- end
140
- end # each node
141
- end # read
142
- end # class RaccLinter
143
-
144
-
145
-
146
-
147
- ###############################################
148
- # start with code
149
-
150
-
151
-
152
8
  args = ARGV
153
9
 
154
10
 
@@ -192,36 +48,19 @@ p args
192
48
 
193
49
 
194
50
 
51
+ SportDb::Parser::Linter.debug = true if opts[:debug]
52
+
53
+ linter = SportDb::Parser::Linter.new
195
54
 
196
- errors = []
197
- linter = RaccLinter.new
198
55
 
199
56
  paths.each_with_index do |path,i|
200
57
 
201
58
  puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
202
59
 
203
- linter.read( path )
60
+ linter.read( path, parse: true )
204
61
  end
205
62
 
206
- puts "bye"
207
-
208
63
 
64
+ puts "bye"
209
65
 
210
66
 
211
- __END__
212
-
213
- if errors.size > 0
214
- puts
215
- pp errors
216
- puts
217
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
218
- else
219
- puts
220
- puts "OK no parse errors found in #{paths.size} datafile(s)"
221
- end
222
-
223
- ## add errors to rec via rec['errors'] to allow
224
- ## for further processing/reporting
225
- rec['errors'] = errors
226
- end
227
-
data/lib/fbtok/linter.rb CHANGED
@@ -7,7 +7,7 @@ class Parser
7
7
  class Linter
8
8
 
9
9
  def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
10
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
11
  def debug?() self.class.debug?; end
12
12
 
13
13
 
@@ -23,35 +23,10 @@ end
23
23
  def errors?() @errors.size > 0; end
24
24
 
25
25
 
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
26
  #########
52
27
  ## parse - false (default) - tokenize (only)
53
28
  ## - true - tokenize & parse
54
- def read( path )
29
+ def read( path, parse: true )
55
30
  ## note: every (new) read call - resets errors list to empty
56
31
  @errors = []
57
32
 
@@ -62,9 +37,7 @@ def read( path )
62
37
  h2 = nil
63
38
  orphans = 0 ## track paragraphs's with no heading
64
39
 
65
- attrib_found = false
66
-
67
-
40
+
68
41
  nodes.each do |node|
69
42
  type = node[0]
70
43
 
@@ -88,7 +61,29 @@ def read( path )
88
61
 
89
62
  lines = node[1]
90
63
 
64
+
91
65
  tree = []
66
+
67
+ if parse
68
+ ## flatten lines
69
+ txt = []
70
+ lines.each_with_index do |line,i|
71
+ txt << line
72
+ txt << "\n"
73
+ end
74
+ txt = txt.join
75
+
76
+ if debug?
77
+ puts "lines:"
78
+ pp txt
79
+ end
80
+
81
+ ## todo/fix - add/track parse errors!!!!!!
82
+ parser = RaccMatchParser.new( txt ) ## use own parser instance (not shared) - why? why not?
83
+ tree = parser.parse
84
+ pp tree
85
+
86
+ else ## process for tokenize only
92
87
  lines.each_with_index do |line,i|
93
88
 
94
89
  if debug?
@@ -96,29 +91,6 @@ def read( path )
96
91
  puts "line >#{line}<"
97
92
  end
98
93
 
99
-
100
- ## skip new (experimental attrib syntax)
101
- if attrib_found == false &&
102
- ATTRIB_RE.match?( line )
103
- ## note: check attrib regex AFTER group def e.g.:
104
- ## Group A:
105
- ## Group B: etc.
106
- ## todo/fix - change Group A: to Group A etc.
107
- ## Group B: to Group B
108
- attrib_found = true
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- next
111
- end
112
-
113
- if attrib_found
114
- ## check if line ends with dot
115
- ## if not slurp up lines to the next do!!!
116
- ## logger.debug "skipping key/value line - >#{line}<"
117
- attrib_found = false if line.end_with?( '.' )
118
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
119
- next
120
- end
121
-
122
94
  t, error_messages = @parser.tokenize_with_errors( line )
123
95
 
124
96
  if error_messages.size > 0
@@ -156,8 +128,7 @@ def read( path )
156
128
 
157
129
  tree << t
158
130
  end
159
-
160
- ## pp tree
131
+ end
161
132
  else
162
133
  pp node
163
134
  raise ArgumentError, "unsupported (node) type >#{type}<"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbtok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-15 00:00:00.000000000 Z
11
+ date: 2025-01-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-parser
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.5.0
19
+ version: 0.5.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.5.0
26
+ version: 0.5.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sportdb-formats
29
29
  requirement: !ruby/object:Gem::Requirement