sportdb-parser 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3657cedc5125ee2515efa8be4a1838d05b7290523dd893f7eba5b87024e71238
4
- data.tar.gz: caf6d7e909e17fa0dcabf659ab8f5046ca1940d8f7c1c6f5312e485dc0089384
3
+ metadata.gz: 9ebb468318f2b87c33ca66afb6c46611ce5f420258e0c41b40a2cbfabcff7a49
4
+ data.tar.gz: 0cf1d511f3e936d73531442d1ca6bef94d90a50ae65346b5b57347d4d294dc77
5
5
  SHA512:
6
- metadata.gz: 4063565aada304a1eb96009b6fe542392f41a55d4ad4d21b5de156004bd69a055c5f86b076bed1defbe50423c8c891dd538931ea6ca9b8ec41e237c23e699219
7
- data.tar.gz: 91f6476810cb6617dfcd703ada57592cd38b87f3b4b9fc6fd4468a9457ff0e6ae6337a4e4f5c782e1b80f5f6b6015d5ce26ed6330915cd67a5fb6606f665017f
6
+ metadata.gz: e992ab97d7ae18c514de14078d30eb36adc40f5044242ce9ace089fb88b104c61b29ff86a2aa8101bb7257c3ff2ce32c6150439ff855e195bee1b26032bb0d9d
7
+ data.tar.gz: 25e66e45e7daf2783bc6507a3cb2c660d9153eab9530210ef51ef6e0d5d3fc531e5891897be3b0492b0ad7ea5fe3d406a0a3dd0559549b85518360d442ed4d8b
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.1
1
+ ### 0.3.0
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -2,11 +2,10 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
- bin/fbt
5
+ bin/fbtok
6
6
  lib/sportdb/parser.rb
7
7
  lib/sportdb/parser/lang.rb
8
8
  lib/sportdb/parser/linter.rb
9
- lib/sportdb/parser/opts.rb
10
9
  lib/sportdb/parser/outline_reader.rb
11
10
  lib/sportdb/parser/parser.rb
12
11
  lib/sportdb/parser/token-date.rb
data/bin/{fbt → fbtok} RENAMED
@@ -1,36 +1,25 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  ## tip: to test run:
4
- ## ruby -I ./lib bin/fbt
4
+ ## ruby -I ./lib bin/fbtok
5
5
 
6
- ## our own code
7
6
  require 'sportdb/parser'
8
7
 
9
8
 
9
+ require 'optparse' ## check - already auto-required in cocos? keep? why? why not?
10
10
 
11
- require 'optparse'
12
11
 
13
- ##
14
- ## read textfile
15
- ## and dump tokens
16
- ##
17
- ## fbt ../openfootball/.../euro.txt
12
+ args=ARGV
18
13
 
19
14
 
15
+ opts = {
16
+ debug: true,
17
+ metal: false,
18
+ }
20
19
 
21
-
22
- args = ARGV
23
- opts = { debug: false,
24
- metal: false }
25
-
26
- parser = OptionParser.new do |parser|
20
+ parser = OptionParser.new do |parser|
27
21
  parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
28
22
 
29
- ##
30
- ## check if git has a offline option?? (use same)
31
- ## check for other tools - why? why not?
32
-
33
-
34
23
  parser.on( "--verbose", "--debug",
35
24
  "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
36
25
  opts[:debug] = debug
@@ -50,29 +39,12 @@ puts "ARGV:"
50
39
  p args
51
40
 
52
41
 
53
-
54
-
55
-
56
- paths = if args.empty?
57
- [
58
- '../../../openfootball/euro/2021--europe/euro.txt',
59
- '../../../openfootball/euro/2024--germany/euro.txt',
60
- ]
61
- else
62
- ## check for directories
63
- ## and auto-expand
64
-
65
- SportDb::Parser::Opts.expand_args( args )
66
- end
67
-
68
-
69
-
70
42
  SportDb::Parser::Linter.debug = true if opts[:debug]
71
43
 
72
44
  linter = SportDb::Parser::Linter.new
73
-
74
45
  errors = []
75
46
 
47
+ paths = args
76
48
  paths.each_with_index do |path,i|
77
49
  puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
78
50
  linter.read( path, parse: !opts[:metal] )
@@ -90,5 +62,6 @@ else
90
62
  puts "OK no parse errors found in #{paths.size} datafile(s)"
91
63
  end
92
64
 
65
+
93
66
  puts "bye"
94
67
 
@@ -5,11 +5,11 @@ class Parser
5
5
  ###
6
6
  ## note - Linter for now nested inside Parser - keep? why? why not?
7
7
  class Linter
8
-
8
+
9
9
  def self.debug=(value) @@debug = value; end
10
10
  def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
11
+ def debug?() self.class.debug?; end
12
+
13
13
 
14
14
 
15
15
  attr_reader :errors
@@ -35,7 +35,7 @@ def errors?() @errors.size > 0; end
35
35
  ## Group B: - remove colon
36
36
  ## or lookup first
37
37
 
38
- ATTRIB_RE = %r{^
38
+ ATTRIB_RE = %r{^
39
39
  [ ]*? # slurp leading spaces
40
40
  (?<key>[^:|\]\[()\/; -]
41
41
  [^:|\]\[()\/;]{0,30}
@@ -50,12 +50,12 @@ def errors?() @errors.size > 0; end
50
50
 
51
51
  #########
52
52
  ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
53
+ ## - true - tokenize & parse
54
54
  def read( path, parse: false )
55
55
  ## note: every (new) read call - resets errors list to empty
56
56
  @errors = []
57
57
 
58
- nodes = OutlineReader.read( path )
58
+ nodes = OutlineReader.read( path )
59
59
 
60
60
  ## process nodes
61
61
  h1 = nil
@@ -66,7 +66,7 @@ def read( path, parse: false )
66
66
 
67
67
  nodes.each do |node|
68
68
  type = node[0]
69
-
69
+
70
70
  if type == :h1
71
71
  h1 = node[1] ## get heading text
72
72
  puts
@@ -74,14 +74,14 @@ def read( path, parse: false )
74
74
  elsif type == :p
75
75
 
76
76
  if h1.nil?
77
- orphans += 1 ## only warn once
77
+ orphans += 1 ## only warn once
78
78
  puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
79
79
  next
80
80
  end
81
81
 
82
82
  lines = node[1]
83
83
 
84
- tree = []
84
+ tree = []
85
85
  lines.each_with_index do |line,i|
86
86
 
87
87
  if debug?
@@ -91,10 +91,10 @@ def read( path, parse: false )
91
91
 
92
92
 
93
93
  ## skip new (experimental attrib syntax)
94
- if attrib_found == false &&
94
+ if attrib_found == false &&
95
95
  ATTRIB_RE.match?( line )
96
96
  ## note: check attrib regex AFTER group def e.g.:
97
- ## Group A:
97
+ ## Group A:
98
98
  ## Group B: etc.
99
99
  ## todo/fix - change Group A: to Group A etc.
100
100
  ## Group B: to Group B
@@ -107,17 +107,17 @@ def read( path, parse: false )
107
107
  ## check if line ends with dot
108
108
  ## if not slurp up lines to the next do!!!
109
109
  ## logger.debug "skipping key/value line - >#{line}<"
110
- attrib_found = false if line.end_with?( '.' )
110
+ attrib_found = false if line.end_with?( '.' )
111
111
  # logger.debug "skipping key/value line (cont.) - >#{line}<"
112
112
  next
113
- end
114
-
113
+ end
114
+
115
115
  t, error_messages = if parse
116
116
  @parser.parse_with_errors( line )
117
117
  else
118
- @parser.tokenize_with_errors( line )
118
+ @parser.tokenize_with_errors( line )
119
119
  end
120
-
120
+
121
121
 
122
122
  if error_messages.size > 0
123
123
  ## add to "global" error list
@@ -134,7 +134,7 @@ def read( path, parse: false )
134
134
 
135
135
  tree << t
136
136
  end
137
-
137
+
138
138
  ## pp tree
139
139
  else
140
140
  pp node
@@ -146,4 +146,4 @@ end # class Linter
146
146
 
147
147
 
148
148
  end # class Parser
149
- end # module SportDb
149
+ end # module SportDb
@@ -155,6 +155,35 @@ DATE_RE = Regexp.union(
155
155
  )
156
156
 
157
157
 
158
+ ##
159
+ ## add a date parser helper
160
+ def self.parse_date( str, start: )
161
+ if m=DATE_RE.match( str )
162
+
163
+ year = m[:year].to_i(10) if m[:year]
164
+ month = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
165
+ day = m[:day].to_i(10) if m[:day]
166
+ wday = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
167
+
168
+ if year.nil? ## try to calculate year
169
+ year = if month > start.month ||
170
+ (month == start.month && day >= start.day)
171
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
172
+ start.year
173
+ else
174
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
175
+ start.year+1
176
+ end
177
+ end
178
+ Date.new( year,month,day )
179
+ else
180
+ puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
181
+ exit 1
182
+ end
183
+ end
184
+
185
+
186
+
158
187
  ###
159
188
  # date duration
160
189
  # use - or + as separator
@@ -1,12 +1,12 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
4
-
3
+
4
+
5
5
  ## note - do NOT allow single alpha text for now
6
- ## add later?? A - B C - D - why?
6
+ ## add later?? A - B C - D - why?
7
7
  ## opt 1) one alpha
8
- ## (?<text_i> [a-z]) # only allow single letter text (not numbers!!)
9
-
8
+ ## (?<text_i> [a-z]) # only allow single letter text (not numbers!!)
9
+
10
10
  ## opt 2) more than one alphanum
11
11
 
12
12
 
@@ -26,19 +26,19 @@ class Parser
26
26
 
27
27
 
28
28
  TEXT_RE = %r{
29
- ## must start with alpha (allow unicode letters!!)
30
- (?<text>
31
- ## positive lookbehind
29
+ ## must start with alpha (allow unicode letters!!)
30
+ (?<text>
31
+ ## positive lookbehind
32
32
  ## (MUST be fixed number of chars - no quantifier e.g. +? etc.)
33
33
  (?<=[ ,;@|\[\]]
34
34
  |^
35
35
  )
36
- (?:
36
+ (?:
37
37
  # opt 1 - start with alpha
38
38
  \p{L}+ ## all unicode letters (e.g. [a-z])
39
39
  |
40
40
 
41
- # opt 2 - start with num!! - allow special case (e.g. 1. FC)
41
+ # opt 2 - start with num!! - allow special case (e.g. 1. FC)
42
42
  \d+ # check for num lookahead (MUST be space or dot)
43
43
  ## MUST be followed by (optional dot) and
44
44
  ## required space !!!
@@ -46,69 +46,79 @@ TEXT_RE = %r{
46
46
  \.? ## optional dot
47
47
  [ ]? ## make space optional too - why? why not?
48
48
  ## yes - eg. 1st, 2nd, 5th etc.
49
- \p{L}+
49
+ \p{L}+
50
50
  )
51
-
51
+
52
52
  (?:(?: (?:[ ]
53
53
  (?!vs?\.?[ ]) ## note - exclude (v[ ]/vs[ ]/v.[ ]/vs.[ ])
54
- )
54
+ )
55
55
  | # only single spaces allowed inline!!!
56
- [-]
56
+ [-]
57
57
  )?
58
58
  (?:
59
59
  \p{L} |
60
- [&/']
60
+ [&/']
61
61
  |
62
62
  (?:
63
- \d+
64
- (?![0-9.:h'/+-])
63
+ \d+
64
+ (?![0-9.:h'/+-])
65
65
  ## negative lookahead for numbers
66
66
  ## note - include digits itself!!!
67
- )|
68
- \.
69
- )
67
+ )|
68
+ \.
69
+ )
70
70
  )* ## must NOT end with space or dash(-)
71
71
  ## todo/fix - possible in regex here
72
72
  ## only end in alphanum a-z0-9 (not dot or & ???)
73
73
 
74
-
74
+
75
75
  ## allow optional at the end
76
76
  ## tag or year
77
- ## make it and in the future - why? why not?
78
- ##
77
+ ## make it and in the future - why? why not?
78
+ ##
79
+ ## change - fix
80
+ ## do NOT use (A) for amateur
81
+ ## use A or A. with NO ()!!!
79
82
  ## (A) - allow with predined alpha only for now
80
83
  ## e.g. (A) - amateur a team or b?
84
+ ### same for U21 or U9 etc
85
+ ## use with NO ()!!! - why? why not?
81
86
  ## or U21 U9 etc. - why? why not?
82
87
  ## or etc.
83
88
  ## (1879-1893) or allow years e.g. (1879-1893)
84
- ###
85
- (?:
86
- [ ]
87
- \( (?:
88
- A|B|
89
- U\d{1,2}
90
- )
91
- \)
92
- )?
89
+ ###
90
+ ## add allow country code three to five letters for now
91
+ ## change to generic 1 to 5 - why? why not?
92
+ ## e.g. (A), (I),
93
+ ## (AUT)
94
+ ## (TRNC) five? for UEFA code for northern cyprus
95
+ ## change to 1 to 4 - why? why not?
96
+ ## check - fix possible for upper case only here
97
+ ## inline for this group only?
93
98
  (?:
94
- [ ]
99
+ [ ]
95
100
  \(
96
101
  \d{4}-\d{4}
97
102
  \)
98
- )?
99
-
103
+ )?
104
+ (?:
105
+ [ ]+ ## allow more than once space - why? why not?
106
+ \( (?:
107
+ [A-Z]{1,5}
108
+ )
109
+ \)
110
+ )?
100
111
  ## add lookahead/lookbehind
101
- ## must be space!!!
112
+ ## must be space!!!
102
113
  ## (or comma or start/end of string)
103
114
  ## kind of \b !!!
104
115
  ## positive lookahead
105
116
  (?=[ ,;@|\[\]]
106
117
  |$
107
118
  )
108
- )
119
+ )
109
120
  }ix
110
121
 
111
122
 
112
123
  end # class Parser
113
- end # module SportDb
114
-
124
+ end # module SportDb
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 2
7
- PATCH = 1
6
+ MINOR = 3
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -24,11 +24,11 @@ require_relative 'parser/lang'
24
24
  require_relative 'parser/parser'
25
25
 
26
26
 
27
- ## more
27
+ ####
28
+ ## todo/check - move outline reader upstream to cocos - why? why not?
29
+ ## use read_outline(), parse_outline() - why? why not?
28
30
  require_relative 'parser/outline_reader'
29
31
  require_relative 'parser/linter'
30
- require_relative 'parser/opts'
31
-
32
32
 
33
33
  ###
34
34
  # make parser api (easily) available - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-24 00:00:00.000000000 Z
11
+ date: 2024-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -75,7 +75,7 @@ dependencies:
75
75
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
76
  email: gerald.bauer@gmail.com
77
77
  executables:
78
- - fbt
78
+ - fbtok
79
79
  extensions: []
80
80
  extra_rdoc_files:
81
81
  - CHANGELOG.md
@@ -86,11 +86,10 @@ files:
86
86
  - Manifest.txt
87
87
  - README.md
88
88
  - Rakefile
89
- - bin/fbt
89
+ - bin/fbtok
90
90
  - lib/sportdb/parser.rb
91
91
  - lib/sportdb/parser/lang.rb
92
92
  - lib/sportdb/parser/linter.rb
93
- - lib/sportdb/parser/opts.rb
94
93
  - lib/sportdb/parser/outline_reader.rb
95
94
  - lib/sportdb/parser/parser.rb
96
95
  - lib/sportdb/parser/token-date.rb
@@ -1,70 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
7
- class Opts
8
-
9
- SEASON_RE = %r{ (?:
10
- \d{4}-\d{2}
11
- | \d{4}(--[a-z0-9_-]+)?
12
- )
13
- }x
14
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
15
-
16
-
17
- ## note: if pattern includes directory add here
18
- ## (otherwise move to more "generic" datafile) - why? why not?
19
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
20
- #{SEASON}
21
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
22
- }x
23
-
24
-
25
- def self.find( path )
26
- datafiles = []
27
-
28
- ## note: normalize path - use File.expand_path ??
29
- ## change all backslash to slash for now
30
- ## path = path.gsub( "\\", '/' )
31
- path = File.expand_path( path )
32
-
33
- ## check all txt files
34
- ## note: incl. files starting with dot (.)) as candidates
35
- ## (normally excluded with just *)
36
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
37
- ## pp candidates
38
- candidates.each do |candidate|
39
- datafiles << candidate if MATCH_RE.match( candidate )
40
- end
41
-
42
- ## pp datafiles
43
- datafiles
44
- end
45
-
46
-
47
- def self.expand_args( args )
48
- paths = []
49
-
50
- args.each do |arg|
51
- ## check if directory
52
- if Dir.exist?( arg )
53
- datafiles = find( arg )
54
- puts
55
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
56
- pp datafiles
57
- paths += datafiles
58
- else
59
- ## assume it's a file
60
- paths << arg
61
- end
62
- end
63
-
64
- paths
65
- end
66
- end # class Opts
67
-
68
-
69
- end # class Parser
70
- end # module SportDb