sportdb-parser 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3657cedc5125ee2515efa8be4a1838d05b7290523dd893f7eba5b87024e71238
4
- data.tar.gz: caf6d7e909e17fa0dcabf659ab8f5046ca1940d8f7c1c6f5312e485dc0089384
3
+ metadata.gz: 9ebb468318f2b87c33ca66afb6c46611ce5f420258e0c41b40a2cbfabcff7a49
4
+ data.tar.gz: 0cf1d511f3e936d73531442d1ca6bef94d90a50ae65346b5b57347d4d294dc77
5
5
  SHA512:
6
- metadata.gz: 4063565aada304a1eb96009b6fe542392f41a55d4ad4d21b5de156004bd69a055c5f86b076bed1defbe50423c8c891dd538931ea6ca9b8ec41e237c23e699219
7
- data.tar.gz: 91f6476810cb6617dfcd703ada57592cd38b87f3b4b9fc6fd4468a9457ff0e6ae6337a4e4f5c782e1b80f5f6b6015d5ce26ed6330915cd67a5fb6606f665017f
6
+ metadata.gz: e992ab97d7ae18c514de14078d30eb36adc40f5044242ce9ace089fb88b104c61b29ff86a2aa8101bb7257c3ff2ce32c6150439ff855e195bee1b26032bb0d9d
7
+ data.tar.gz: 25e66e45e7daf2783bc6507a3cb2c660d9153eab9530210ef51ef6e0d5d3fc531e5891897be3b0492b0ad7ea5fe3d406a0a3dd0559549b85518360d442ed4d8b
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.1
1
+ ### 0.3.0
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -2,11 +2,10 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
- bin/fbt
5
+ bin/fbtok
6
6
  lib/sportdb/parser.rb
7
7
  lib/sportdb/parser/lang.rb
8
8
  lib/sportdb/parser/linter.rb
9
- lib/sportdb/parser/opts.rb
10
9
  lib/sportdb/parser/outline_reader.rb
11
10
  lib/sportdb/parser/parser.rb
12
11
  lib/sportdb/parser/token-date.rb
data/bin/{fbt → fbtok} RENAMED
@@ -1,36 +1,25 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  ## tip: to test run:
4
- ## ruby -I ./lib bin/fbt
4
+ ## ruby -I ./lib bin/fbtok
5
5
 
6
- ## our own code
7
6
  require 'sportdb/parser'
8
7
 
9
8
 
9
+ require 'optparse' ## check - already auto-required in cocos? keep? why? why not?
10
10
 
11
- require 'optparse'
12
11
 
13
- ##
14
- ## read textfile
15
- ## and dump tokens
16
- ##
17
- ## fbt ../openfootball/.../euro.txt
12
+ args=ARGV
18
13
 
19
14
 
15
+ opts = {
16
+ debug: true,
17
+ metal: false,
18
+ }
20
19
 
21
-
22
- args = ARGV
23
- opts = { debug: false,
24
- metal: false }
25
-
26
- parser = OptionParser.new do |parser|
20
+ parser = OptionParser.new do |parser|
27
21
  parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
28
22
 
29
- ##
30
- ## check if git has a offline option?? (use same)
31
- ## check for other tools - why? why not?
32
-
33
-
34
23
  parser.on( "--verbose", "--debug",
35
24
  "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
36
25
  opts[:debug] = debug
@@ -50,29 +39,12 @@ puts "ARGV:"
50
39
  p args
51
40
 
52
41
 
53
-
54
-
55
-
56
- paths = if args.empty?
57
- [
58
- '../../../openfootball/euro/2021--europe/euro.txt',
59
- '../../../openfootball/euro/2024--germany/euro.txt',
60
- ]
61
- else
62
- ## check for directories
63
- ## and auto-expand
64
-
65
- SportDb::Parser::Opts.expand_args( args )
66
- end
67
-
68
-
69
-
70
42
  SportDb::Parser::Linter.debug = true if opts[:debug]
71
43
 
72
44
  linter = SportDb::Parser::Linter.new
73
-
74
45
  errors = []
75
46
 
47
+ paths = args
76
48
  paths.each_with_index do |path,i|
77
49
  puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
78
50
  linter.read( path, parse: !opts[:metal] )
@@ -90,5 +62,6 @@ else
90
62
  puts "OK no parse errors found in #{paths.size} datafile(s)"
91
63
  end
92
64
 
65
+
93
66
  puts "bye"
94
67
 
@@ -5,11 +5,11 @@ class Parser
5
5
  ###
6
6
  ## note - Linter for now nested inside Parser - keep? why? why not?
7
7
  class Linter
8
-
8
+
9
9
  def self.debug=(value) @@debug = value; end
10
10
  def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
11
+ def debug?() self.class.debug?; end
12
+
13
13
 
14
14
 
15
15
  attr_reader :errors
@@ -35,7 +35,7 @@ def errors?() @errors.size > 0; end
35
35
  ## Group B: - remove colon
36
36
  ## or lookup first
37
37
 
38
- ATTRIB_RE = %r{^
38
+ ATTRIB_RE = %r{^
39
39
  [ ]*? # slurp leading spaces
40
40
  (?<key>[^:|\]\[()\/; -]
41
41
  [^:|\]\[()\/;]{0,30}
@@ -50,12 +50,12 @@ def errors?() @errors.size > 0; end
50
50
 
51
51
  #########
52
52
  ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
53
+ ## - true - tokenize & parse
54
54
  def read( path, parse: false )
55
55
  ## note: every (new) read call - resets errors list to empty
56
56
  @errors = []
57
57
 
58
- nodes = OutlineReader.read( path )
58
+ nodes = OutlineReader.read( path )
59
59
 
60
60
  ## process nodes
61
61
  h1 = nil
@@ -66,7 +66,7 @@ def read( path, parse: false )
66
66
 
67
67
  nodes.each do |node|
68
68
  type = node[0]
69
-
69
+
70
70
  if type == :h1
71
71
  h1 = node[1] ## get heading text
72
72
  puts
@@ -74,14 +74,14 @@ def read( path, parse: false )
74
74
  elsif type == :p
75
75
 
76
76
  if h1.nil?
77
- orphans += 1 ## only warn once
77
+ orphans += 1 ## only warn once
78
78
  puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
79
79
  next
80
80
  end
81
81
 
82
82
  lines = node[1]
83
83
 
84
- tree = []
84
+ tree = []
85
85
  lines.each_with_index do |line,i|
86
86
 
87
87
  if debug?
@@ -91,10 +91,10 @@ def read( path, parse: false )
91
91
 
92
92
 
93
93
  ## skip new (experimental attrib syntax)
94
- if attrib_found == false &&
94
+ if attrib_found == false &&
95
95
  ATTRIB_RE.match?( line )
96
96
  ## note: check attrib regex AFTER group def e.g.:
97
- ## Group A:
97
+ ## Group A:
98
98
  ## Group B: etc.
99
99
  ## todo/fix - change Group A: to Group A etc.
100
100
  ## Group B: to Group B
@@ -107,17 +107,17 @@ def read( path, parse: false )
107
107
  ## check if line ends with dot
108
108
  ## if not slurp up lines to the next do!!!
109
109
  ## logger.debug "skipping key/value line - >#{line}<"
110
- attrib_found = false if line.end_with?( '.' )
110
+ attrib_found = false if line.end_with?( '.' )
111
111
  # logger.debug "skipping key/value line (cont.) - >#{line}<"
112
112
  next
113
- end
114
-
113
+ end
114
+
115
115
  t, error_messages = if parse
116
116
  @parser.parse_with_errors( line )
117
117
  else
118
- @parser.tokenize_with_errors( line )
118
+ @parser.tokenize_with_errors( line )
119
119
  end
120
-
120
+
121
121
 
122
122
  if error_messages.size > 0
123
123
  ## add to "global" error list
@@ -134,7 +134,7 @@ def read( path, parse: false )
134
134
 
135
135
  tree << t
136
136
  end
137
-
137
+
138
138
  ## pp tree
139
139
  else
140
140
  pp node
@@ -146,4 +146,4 @@ end # class Linter
146
146
 
147
147
 
148
148
  end # class Parser
149
- end # module SportDb
149
+ end # module SportDb
@@ -155,6 +155,35 @@ DATE_RE = Regexp.union(
155
155
  )
156
156
 
157
157
 
158
+ ##
159
+ ## add a date parser helper
160
+ def self.parse_date( str, start: )
161
+ if m=DATE_RE.match( str )
162
+
163
+ year = m[:year].to_i(10) if m[:year]
164
+ month = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
165
+ day = m[:day].to_i(10) if m[:day]
166
+ wday = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
167
+
168
+ if year.nil? ## try to calculate year
169
+ year = if month > start.month ||
170
+ (month == start.month && day >= start.day)
171
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
172
+ start.year
173
+ else
174
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
175
+ start.year+1
176
+ end
177
+ end
178
+ Date.new( year,month,day )
179
+ else
180
+ puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
181
+ exit 1
182
+ end
183
+ end
184
+
185
+
186
+
158
187
  ###
159
188
  # date duration
160
189
  # use - or + as separator
@@ -1,12 +1,12 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
4
-
3
+
4
+
5
5
  ## note - do NOT allow single alpha text for now
6
- ## add later?? A - B C - D - why?
6
+ ## add later?? A - B C - D - why?
7
7
  ## opt 1) one alpha
8
- ## (?<text_i> [a-z]) # only allow single letter text (not numbers!!)
9
-
8
+ ## (?<text_i> [a-z]) # only allow single letter text (not numbers!!)
9
+
10
10
  ## opt 2) more than one alphanum
11
11
 
12
12
 
@@ -26,19 +26,19 @@ class Parser
26
26
 
27
27
 
28
28
  TEXT_RE = %r{
29
- ## must start with alpha (allow unicode letters!!)
30
- (?<text>
31
- ## positive lookbehind
29
+ ## must start with alpha (allow unicode letters!!)
30
+ (?<text>
31
+ ## positive lookbehind
32
32
  ## (MUST be fixed number of chars - no quantifier e.g. +? etc.)
33
33
  (?<=[ ,;@|\[\]]
34
34
  |^
35
35
  )
36
- (?:
36
+ (?:
37
37
  # opt 1 - start with alpha
38
38
  \p{L}+ ## all unicode letters (e.g. [a-z])
39
39
  |
40
40
 
41
- # opt 2 - start with num!! - allow special case (e.g. 1. FC)
41
+ # opt 2 - start with num!! - allow special case (e.g. 1. FC)
42
42
  \d+ # check for num lookahead (MUST be space or dot)
43
43
  ## MUST be followed by (optional dot) and
44
44
  ## required space !!!
@@ -46,69 +46,79 @@ TEXT_RE = %r{
46
46
  \.? ## optional dot
47
47
  [ ]? ## make space optional too - why? why not?
48
48
  ## yes - eg. 1st, 2nd, 5th etc.
49
- \p{L}+
49
+ \p{L}+
50
50
  )
51
-
51
+
52
52
  (?:(?: (?:[ ]
53
53
  (?!vs?\.?[ ]) ## note - exclude (v[ ]/vs[ ]/v.[ ]/vs.[ ])
54
- )
54
+ )
55
55
  | # only single spaces allowed inline!!!
56
- [-]
56
+ [-]
57
57
  )?
58
58
  (?:
59
59
  \p{L} |
60
- [&/']
60
+ [&/']
61
61
  |
62
62
  (?:
63
- \d+
64
- (?![0-9.:h'/+-])
63
+ \d+
64
+ (?![0-9.:h'/+-])
65
65
  ## negative lookahead for numbers
66
66
  ## note - include digits itself!!!
67
- )|
68
- \.
69
- )
67
+ )|
68
+ \.
69
+ )
70
70
  )* ## must NOT end with space or dash(-)
71
71
  ## todo/fix - possible in regex here
72
72
  ## only end in alphanum a-z0-9 (not dot or & ???)
73
73
 
74
-
74
+
75
75
  ## allow optional at the end
76
76
  ## tag or year
77
- ## make it and in the future - why? why not?
78
- ##
77
+ ## make it and in the future - why? why not?
78
+ ##
79
+ ## change - fix
80
+ ## do NOT use (A) for amateur
81
+ ## use A or A. with NO ()!!!
79
82
  ## (A) - allow with predined alpha only for now
80
83
  ## e.g. (A) - amateur a team or b?
84
+ ### same for U21 or U9 etc
85
+ ## use with NO ()!!! - why? why not?
81
86
  ## or U21 U9 etc. - why? why not?
82
87
  ## or etc.
83
88
  ## (1879-1893) or allow years e.g. (1879-1893)
84
- ###
85
- (?:
86
- [ ]
87
- \( (?:
88
- A|B|
89
- U\d{1,2}
90
- )
91
- \)
92
- )?
89
+ ###
90
+ ## add allow country code three to five letters for now
91
+ ## change to generic 1 to 5 - why? why not?
92
+ ## e.g. (A), (I),
93
+ ## (AUT)
94
+ ## (TRNC) five? for UEFA code for northern cyprus
95
+ ## change to 1 to 4 - why? why not?
96
+ ## check - fix possible for upper case only here
97
+ ## inline for this group only?
93
98
  (?:
94
- [ ]
99
+ [ ]
95
100
  \(
96
101
  \d{4}-\d{4}
97
102
  \)
98
- )?
99
-
103
+ )?
104
+ (?:
105
+ [ ]+ ## allow more than once space - why? why not?
106
+ \( (?:
107
+ [A-Z]{1,5}
108
+ )
109
+ \)
110
+ )?
100
111
  ## add lookahead/lookbehind
101
- ## must be space!!!
112
+ ## must be space!!!
102
113
  ## (or comma or start/end of string)
103
114
  ## kind of \b !!!
104
115
  ## positive lookahead
105
116
  (?=[ ,;@|\[\]]
106
117
  |$
107
118
  )
108
- )
119
+ )
109
120
  }ix
110
121
 
111
122
 
112
123
  end # class Parser
113
- end # module SportDb
114
-
124
+ end # module SportDb
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 2
7
- PATCH = 1
6
+ MINOR = 3
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -24,11 +24,11 @@ require_relative 'parser/lang'
24
24
  require_relative 'parser/parser'
25
25
 
26
26
 
27
- ## more
27
+ ####
28
+ ## todo/check - move outline reader upstream to cocos - why? why not?
29
+ ## use read_outline(), parse_outline() - why? why not?
28
30
  require_relative 'parser/outline_reader'
29
31
  require_relative 'parser/linter'
30
- require_relative 'parser/opts'
31
-
32
32
 
33
33
  ###
34
34
  # make parser api (easily) available - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-24 00:00:00.000000000 Z
11
+ date: 2024-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -75,7 +75,7 @@ dependencies:
75
75
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
76
  email: gerald.bauer@gmail.com
77
77
  executables:
78
- - fbt
78
+ - fbtok
79
79
  extensions: []
80
80
  extra_rdoc_files:
81
81
  - CHANGELOG.md
@@ -86,11 +86,10 @@ files:
86
86
  - Manifest.txt
87
87
  - README.md
88
88
  - Rakefile
89
- - bin/fbt
89
+ - bin/fbtok
90
90
  - lib/sportdb/parser.rb
91
91
  - lib/sportdb/parser/lang.rb
92
92
  - lib/sportdb/parser/linter.rb
93
- - lib/sportdb/parser/opts.rb
94
93
  - lib/sportdb/parser/outline_reader.rb
95
94
  - lib/sportdb/parser/parser.rb
96
95
  - lib/sportdb/parser/token-date.rb
@@ -1,70 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
7
- class Opts
8
-
9
- SEASON_RE = %r{ (?:
10
- \d{4}-\d{2}
11
- | \d{4}(--[a-z0-9_-]+)?
12
- )
13
- }x
14
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
15
-
16
-
17
- ## note: if pattern includes directory add here
18
- ## (otherwise move to more "generic" datafile) - why? why not?
19
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
20
- #{SEASON}
21
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
22
- }x
23
-
24
-
25
- def self.find( path )
26
- datafiles = []
27
-
28
- ## note: normalize path - use File.expand_path ??
29
- ## change all backslash to slash for now
30
- ## path = path.gsub( "\\", '/' )
31
- path = File.expand_path( path )
32
-
33
- ## check all txt files
34
- ## note: incl. files starting with dot (.)) as candidates
35
- ## (normally excluded with just *)
36
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
37
- ## pp candidates
38
- candidates.each do |candidate|
39
- datafiles << candidate if MATCH_RE.match( candidate )
40
- end
41
-
42
- ## pp datafiles
43
- datafiles
44
- end
45
-
46
-
47
- def self.expand_args( args )
48
- paths = []
49
-
50
- args.each do |arg|
51
- ## check if directory
52
- if Dir.exist?( arg )
53
- datafiles = find( arg )
54
- puts
55
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
56
- pp datafiles
57
- paths += datafiles
58
- else
59
- ## assume it's a file
60
- paths << arg
61
- end
62
- end
63
-
64
- paths
65
- end
66
- end # class Opts
67
-
68
-
69
- end # class Parser
70
- end # module SportDb