sportdb-parser 0.3.8 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eadc79627364072a1d05801fec096aca27e914f214639c4bbe6bbffca2acab0d
4
- data.tar.gz: f052a6d668246082d9fbcc5b90c71b440048080dc423f9428a4560591e797080
3
+ metadata.gz: 820e285e928c8a4067faeee17a31201059e1a8c507430731c79fb7a2772d90a8
4
+ data.tar.gz: c7f83fcc6170f1b73280b175e95f2ebd6b262c3e6f6ec7615109bad554ed3c19
5
5
  SHA512:
6
- metadata.gz: 394f74c596ad1a624d626757972911eee7e0b009df66d5cac7ed1508cb3b056ece6f7fbc1d3c310468d5d065ae22ba18a28d7f8992f333767f20279eb5ce2f78
7
- data.tar.gz: ecaa4a25b3552e69013dea0cc9c87a91b3409cd02e86302f4387e658de6cdfadba2c82c7c31935d4f7c62aaab187a25ce6e85f9dc4239faa346838f5be8dce7b
6
+ metadata.gz: 2d0c8adc9e7406dcdfa9b96e1e9c5c9167cf7ba633830842775923f059bdf1b1bf833901020bb4e63eb786470cef2828265224e2a81bd8de090668d86b34d8ea
7
+ data.tar.gz: e591914f764fa0f2d27e00eda67673fc673734291b88f992bb8d1fe933974536536dde3ccf475522040f7ba9a2e257e8d70776796cf7a44e2ad8cf690ce50961
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.3.8
1
+ ### 0.4.0
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -2,17 +2,13 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
- bin/fbtok
6
5
  config/rounds_de.txt
7
6
  config/rounds_en.txt
8
7
  config/rounds_es.txt
9
8
  config/rounds_misc.txt
10
9
  config/rounds_pt.txt
11
10
  lib/sportdb/parser.rb
12
- lib/sportdb/parser/fbtok/main.rb
13
11
  lib/sportdb/parser/lang.rb
14
- lib/sportdb/parser/linter.rb
15
- lib/sportdb/parser/opts.rb
16
12
  lib/sportdb/parser/outline_reader.rb
17
13
  lib/sportdb/parser/parser.rb
18
14
  lib/sportdb/parser/token-date.rb
data/config/rounds_de.txt CHANGED
@@ -10,17 +10,32 @@ Spiele # in 2017/uy.1.txt -- double check if missing something
10
10
  # in 1960-61/it.1.txt
11
11
 
12
12
 
13
+ 10. Runde
14
+
15
+
13
16
  Zwischenrunde
14
17
 
15
18
  Sechzehntelfinale
16
19
  Platzierungsspiel
17
20
 
18
21
  Qualifikation
22
+ Qualifikation Copa Lib.
19
23
  Qual. 3. Runde
20
24
 
25
+
21
26
  2. Aufstieg Halbfinale
22
27
  2. Aufstieg Finale
23
28
 
29
+
30
+ Halbfinale Gruppe A
31
+ Halbfinale Gruppe B
32
+
33
+ Entscheidungsspiele Abstieg
34
+
35
+ Trostrunde Finale
36
+ Trostrunde Halbfinale
37
+
38
+
24
39
  Playoff-Runde
25
40
  Relegation
26
41
  Aufstieg
@@ -43,6 +58,7 @@ Direkter Abstieg
43
58
  7. Platz
44
59
  9. Platz
45
60
  11. Platz
61
+ 13. Platz
46
62
 
47
63
  5.-8. Platz Playoffs
48
64
  9.-12. Platz Playoffs
@@ -53,11 +69,27 @@ Entscheidung 1. Runde
53
69
  Entscheidung 2. Runde
54
70
 
55
71
 
56
- Zwischenrunde Gr. B ## move to group_de - why? why not?
57
72
  1. Runde Gruppe 1
58
73
  1. Runde Gruppe 2
59
74
 
60
75
 
76
+ Zwischenrunde Gr. A ## move to group_de - why? why not?
77
+ Zwischenrunde Gr. B
78
+ Zwischenrunde Gr. C
79
+ Zwischenrunde Gr. D
80
+
81
+
82
+ Vorrunde Gr. A
83
+ Vorrunde Gr. B
84
+ Vorrunde Gr. C
85
+ Vorrunde Gr. D
86
+ Vorrunde Gr. E
87
+ Vorrunde Gr. F
88
+ Vorrunde Gr. G
89
+ Vorrunde Gr. H
90
+
91
+
92
+
61
93
  ### todo/fix
62
94
  ### move to group - why? why not?
63
95
  Gruppe 1
data/config/rounds_en.txt CHANGED
@@ -13,3 +13,10 @@ Wildcard
13
13
  Elimination Final
14
14
  Quadrangular
15
15
 
16
+ Major Semi-Final
17
+ Minor Semi-Final
18
+
19
+
20
+ ## keep weirdo matchday ??
21
+ Matchday 0 ## in 2003-04/az.1.txt
22
+
data/config/rounds_es.txt CHANGED
@@ -11,9 +11,13 @@ Final Segunda Ronda
11
11
  Gran Final
12
12
 
13
13
  Interzone
14
- Zone A
14
+ Zona A
15
15
  Zona B
16
16
 
17
17
  Final de Grupos
18
18
  Repechaje
19
19
 
20
+ Copa Libertadores
21
+ Copa Sudamericana
22
+
23
+
@@ -16,3 +16,10 @@ Replay 1e ronde
16
16
  Replay 2e ronde
17
17
  Replay halve finale
18
18
  Replay finale
19
+
20
+ Beslissingswedstrijd
21
+ Groep 15
22
+ Groep 17
23
+ Groep 18
24
+ Groep 19
25
+ Groep 20
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 3
7
- PATCH = 8
6
+ MINOR = 4
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -3,9 +3,6 @@ require 'cocos'
3
3
  require 'season/formats' # e.g. Season() support machinery
4
4
 
5
5
 
6
- ## more stdlibs
7
- require 'optparse' ## check - already auto-required in cocos? keep? why? why not?
8
-
9
6
 
10
7
 
11
8
  ####
@@ -35,10 +32,6 @@ require_relative 'parser/parser'
35
32
  require_relative 'parser/outline_reader'
36
33
 
37
34
 
38
- require_relative 'parser/opts'
39
- require_relative 'parser/linter'
40
- require_relative 'parser/fbtok/main'
41
-
42
35
 
43
36
  ###
44
37
  # make parser api (easily) available - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-13 00:00:00.000000000 Z
11
+ date: 2025-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -64,18 +64,17 @@ dependencies:
64
64
  requirements:
65
65
  - - "~>"
66
66
  - !ruby/object:Gem::Version
67
- version: '4.1'
67
+ version: '4.2'
68
68
  type: :development
69
69
  prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
- version: '4.1'
74
+ version: '4.2'
75
75
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
76
  email: gerald.bauer@gmail.com
77
- executables:
78
- - fbtok
77
+ executables: []
79
78
  extensions: []
80
79
  extra_rdoc_files:
81
80
  - CHANGELOG.md
@@ -91,17 +90,13 @@ files:
91
90
  - Manifest.txt
92
91
  - README.md
93
92
  - Rakefile
94
- - bin/fbtok
95
93
  - config/rounds_de.txt
96
94
  - config/rounds_en.txt
97
95
  - config/rounds_es.txt
98
96
  - config/rounds_misc.txt
99
97
  - config/rounds_pt.txt
100
98
  - lib/sportdb/parser.rb
101
- - lib/sportdb/parser/fbtok/main.rb
102
99
  - lib/sportdb/parser/lang.rb
103
- - lib/sportdb/parser/linter.rb
104
- - lib/sportdb/parser/opts.rb
105
100
  - lib/sportdb/parser/outline_reader.rb
106
101
  - lib/sportdb/parser/parser.rb
107
102
  - lib/sportdb/parser/token-date.rb
@@ -131,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
126
  - !ruby/object:Gem::Version
132
127
  version: '0'
133
128
  requirements: []
134
- rubygems_version: 3.4.10
129
+ rubygems_version: 3.5.22
135
130
  signing_key:
136
131
  specification_version: 4
137
132
  summary: sportdb-parser - football.txt match parser (& tokenizer)
data/bin/fbtok DELETED
@@ -1,13 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ## tip: to test run:
4
- ## ruby -I ./lib bin/fbtok
5
-
6
- require 'sportdb/parser'
7
-
8
-
9
- Fbtok.main( ARGV )
10
-
11
-
12
- puts "bye"
13
-
@@ -1,141 +0,0 @@
1
-
2
- module Fbtok
3
- def self.main( args=ARGV )
4
-
5
- opts = {
6
- debug: true,
7
- metal: false,
8
- file: nil,
9
- }
10
-
11
- parser = OptionParser.new do |parser|
12
- parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
13
-
14
-
15
- parser.on( "-q", "--quiet",
16
- "less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
17
- opts[:debug] = false
18
- end
19
- parser.on( "--verbose", "--debug",
20
- "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
21
- opts[:debug] = true
22
- end
23
-
24
- parser.on( "--metal",
25
- "turn off typed parse tree; show to the metal tokens"+
26
- " (default: #{opts[:metal]})" ) do |metal|
27
- opts[:metal] = true
28
- end
29
-
30
- parser.on( "-f FILE", "--file FILE",
31
- "read datafiles (pathspecs) via .csv file") do |file|
32
- opts[:file] = file
33
- ## note: for batch (massive) processing auto-set debug (verbose output) to false (as default)
34
- opts[:debug] = false
35
- end
36
- end
37
- parser.parse!( args )
38
-
39
- puts "OPTS:"
40
- p opts
41
- puts "ARGV:"
42
- p args
43
-
44
-
45
- ## todo/check - use packs or projects or such
46
- ## instead of specs - why? why not?
47
- specs = []
48
- if opts[:file]
49
- recs = read_csv( opts[:file] )
50
- pp recs
51
- ## note - make pathspecs relative to passed in file arg!!!
52
- basedir = File.dirname( opts[:file] )
53
- recs.each do |rec|
54
- paths = SportDb::Parser::Opts.find( rec['path'], dir: basedir )
55
- specs << [paths, rec]
56
- end
57
- else
58
- paths = if args.empty?
59
- [
60
- '../../../openfootball/euro/2021--europe/euro.txt',
61
- '../../../openfootball/euro/2024--germany/euro.txt',
62
- ]
63
- else
64
- ## check for directories
65
- ## and auto-expand
66
- SportDb::Parser::Opts.expand_args( args )
67
- end
68
- specs << [paths, {}]
69
- end
70
-
71
-
72
- SportDb::Parser::Linter.debug = true if opts[:debug]
73
-
74
- linter = SportDb::Parser::Linter.new
75
-
76
-
77
- specs.each_with_index do |(paths, rec),i|
78
- errors = []
79
-
80
- paths.each_with_index do |path,j|
81
- puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
82
- linter.read( path, parse: !opts[:metal] )
83
-
84
- errors += linter.errors if linter.errors?
85
- end
86
-
87
- if errors.size > 0
88
- puts
89
- pp errors
90
- puts
91
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
92
- else
93
- puts
94
- puts "OK no parse errors found in #{paths.size} datafile(s)"
95
- end
96
-
97
- ## add errors to rec via rec['errors'] to allow
98
- ## for further processing/reporting
99
- rec['errors'] = errors
100
- end
101
-
102
-
103
- ###
104
- ## generate a report if --file option used
105
- if opts[:file]
106
-
107
- buf = String.new
108
-
109
- buf << "# fbtok summary report - #{specs.size} dataset(s)\n\n"
110
-
111
- specs.each_with_index do |(paths, rec),i|
112
- errors = rec['errors']
113
-
114
- if errors.size > 0
115
- buf << "!! #{errors.size} ERROR(S) "
116
- else
117
- buf << " OK "
118
- end
119
- buf << "%-20s" % rec['path']
120
- buf << " - #{paths.size} datafile(s)"
121
- buf << "\n"
122
-
123
- if errors.size > 0
124
- buf << errors.pretty_inspect
125
- buf << "\n"
126
- end
127
- end
128
-
129
- puts
130
- puts "SUMMARY:"
131
- puts buf
132
-
133
- # maybe write out in the future?
134
- # basedir = File.dirname( opts[:file] )
135
- # basename = File.basename( opts[:file], File.extname( opts[:file] ))
136
- end
137
-
138
-
139
-
140
- end # method self.main
141
- end # module Fbtok
@@ -1,156 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Linter for now nested inside Parser - keep? why? why not?
7
- class Linter
8
-
9
- def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
13
-
14
-
15
- attr_reader :errors
16
-
17
- def initialize
18
- @errors = []
19
- @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
- end
21
-
22
-
23
- def errors?() @errors.size > 0; end
24
-
25
-
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
- #########
52
- ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
54
- def read( path, parse: false )
55
- ## note: every (new) read call - resets errors list to empty
56
- @errors = []
57
-
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- h2 = nil
63
- orphans = 0 ## track paragraphs's with no heading
64
-
65
- attrib_found = false
66
-
67
-
68
- nodes.each do |node|
69
- type = node[0]
70
-
71
- if type == :h1
72
- h1 = node[1] ## get heading text
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :h2
75
- if h1.nil?
76
- puts "!! WARN - no heading for subheading; skipping parse"
77
- next
78
- end
79
- h2 = node[1] ## get heading text
80
- puts " == Heading 2 >#{node[1]}<"
81
- elsif type == :p
82
-
83
- if h1.nil?
84
- orphans += 1 ## only warn once
85
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
86
- next
87
- end
88
-
89
- lines = node[1]
90
-
91
- tree = []
92
- lines.each_with_index do |line,i|
93
-
94
- if debug?
95
- puts
96
- puts "line >#{line}<"
97
- end
98
-
99
-
100
- ## skip new (experimental attrib syntax)
101
- if attrib_found == false &&
102
- ATTRIB_RE.match?( line )
103
- ## note: check attrib regex AFTER group def e.g.:
104
- ## Group A:
105
- ## Group B: etc.
106
- ## todo/fix - change Group A: to Group A etc.
107
- ## Group B: to Group B
108
- attrib_found = true
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- next
111
- end
112
-
113
- if attrib_found
114
- ## check if line ends with dot
115
- ## if not slurp up lines to the next do!!!
116
- ## logger.debug "skipping key/value line - >#{line}<"
117
- attrib_found = false if line.end_with?( '.' )
118
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
119
- next
120
- end
121
-
122
- t, error_messages = if parse
123
- @parser.parse_with_errors( line )
124
- else
125
- @parser.tokenize_with_errors( line )
126
- end
127
-
128
-
129
- if error_messages.size > 0
130
- ## add to "global" error list
131
- ## make a triplet tuple (file / msg / line text)
132
- error_messages.each do |msg|
133
- @errors << [ path,
134
- msg,
135
- line
136
- ]
137
- end
138
- end
139
-
140
- pp t if debug?
141
-
142
- tree << t
143
- end
144
-
145
- ## pp tree
146
- else
147
- pp node
148
- raise ArgumentError, "unsupported (node) type >#{type}<"
149
- end
150
- end # each node
151
- end # read
152
- end # class Linter
153
-
154
-
155
- end # class Parser
156
- end # module SportDb
@@ -1,81 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
-
6
- ###
7
- ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
8
- class Opts
9
-
10
- SEASON_RE = %r{ (?:
11
- \d{4}-\d{2}
12
- | \d{4}(--[a-z0-9_-]+)?
13
- )
14
- }x
15
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
16
-
17
-
18
- ## note: if pattern includes directory add here
19
- ## (otherwise move to more "generic" datafile) - why? why not?
20
- ## update - note include/allow dot (.) too
21
- ## e.g. 2024-25/at.1.txt
22
- ## change to at_1 or uefa_cl or such - why? why not?
23
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
24
- #{SEASON}
25
- /[a-z0-9_.-]+\.txt$ ## txt e.g /1-premierleague.txt
26
- }x
27
-
28
-
29
- def self.find( path, dir: nil )
30
- ## check - rename dir
31
- ## use root_dir or work_dir or cd or such - why? why not?
32
-
33
- datafiles = []
34
-
35
- ## note: normalize path - use File.expand_path ??
36
- ## change all backslash to slash for now
37
- ## path = path.gsub( "\\", '/' )
38
- path = if dir
39
- File.expand_path( path, File.expand_path( dir ))
40
- else
41
- File.expand_path( path )
42
- end
43
-
44
- ## check all txt files
45
- ## note: incl. files starting with dot (.)) as candidates
46
- ## (normally excluded with just *)
47
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
48
- ## pp candidates
49
- candidates.each do |candidate|
50
- datafiles << candidate if MATCH_RE.match( candidate )
51
- end
52
-
53
- ## pp datafiles
54
- datafiles
55
- end
56
-
57
-
58
- def self.expand_args( args )
59
- paths = []
60
-
61
- args.each do |arg|
62
- ## check if directory
63
- if Dir.exist?( arg )
64
- datafiles = find( arg )
65
- puts
66
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
67
- pp datafiles
68
- paths += datafiles
69
- else
70
- ## assume it's a file
71
- paths << arg
72
- end
73
- end
74
-
75
- paths
76
- end
77
- end # class Opts
78
-
79
-
80
- end # class Parser
81
- end # module SportDb