sportdb-parser 0.3.9 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9187500dd6b826499a59de1fc84b98adcc79fbb60fbdeff02cd810c17170d4e4
4
- data.tar.gz: 628a1145d613f71301673c33ed416605b84a65e6c1c795bb8e9997b85ab21f09
3
+ metadata.gz: 820e285e928c8a4067faeee17a31201059e1a8c507430731c79fb7a2772d90a8
4
+ data.tar.gz: c7f83fcc6170f1b73280b175e95f2ebd6b262c3e6f6ec7615109bad554ed3c19
5
5
  SHA512:
6
- metadata.gz: dfbc14a3a63460efc6fbf2f33a9c4b40173b127784ec3885be1968b46eb0036610362c3022db5c584db4d6ed782e51f0952cd605100f4ff58c371a9332037044
7
- data.tar.gz: 8c40d210b66c06c3166583f146b433c9c6436dec010960d18a0f95edb6048838a67f27b5cbbae8bb5f0dbe6e76d8ed732505163eab10efb8582c9625dbf3c5a8
6
+ metadata.gz: 2d0c8adc9e7406dcdfa9b96e1e9c5c9167cf7ba633830842775923f059bdf1b1bf833901020bb4e63eb786470cef2828265224e2a81bd8de090668d86b34d8ea
7
+ data.tar.gz: e591914f764fa0f2d27e00eda67673fc673734291b88f992bb8d1fe933974536536dde3ccf475522040f7ba9a2e257e8d70776796cf7a44e2ad8cf690ce50961
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.3.9
1
+ ### 0.4.0
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -2,17 +2,13 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
- bin/fbtok
6
5
  config/rounds_de.txt
7
6
  config/rounds_en.txt
8
7
  config/rounds_es.txt
9
8
  config/rounds_misc.txt
10
9
  config/rounds_pt.txt
11
10
  lib/sportdb/parser.rb
12
- lib/sportdb/parser/fbtok/main.rb
13
11
  lib/sportdb/parser/lang.rb
14
- lib/sportdb/parser/linter.rb
15
- lib/sportdb/parser/opts.rb
16
12
  lib/sportdb/parser/outline_reader.rb
17
13
  lib/sportdb/parser/parser.rb
18
14
  lib/sportdb/parser/token-date.rb
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 3
7
- PATCH = 9
6
+ MINOR = 4
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -3,9 +3,6 @@ require 'cocos'
3
3
  require 'season/formats' # e.g. Season() support machinery
4
4
 
5
5
 
6
- ## more stdlibs
7
- require 'optparse' ## check - already auto-required in cocos? keep? why? why not?
8
-
9
6
 
10
7
 
11
8
  ####
@@ -35,10 +32,6 @@ require_relative 'parser/parser'
35
32
  require_relative 'parser/outline_reader'
36
33
 
37
34
 
38
- require_relative 'parser/opts'
39
- require_relative 'parser/linter'
40
- require_relative 'parser/fbtok/main'
41
-
42
35
 
43
36
  ###
44
37
  # make parser api (easily) available - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-13 00:00:00.000000000 Z
11
+ date: 2025-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -64,18 +64,17 @@ dependencies:
64
64
  requirements:
65
65
  - - "~>"
66
66
  - !ruby/object:Gem::Version
67
- version: '4.1'
67
+ version: '4.2'
68
68
  type: :development
69
69
  prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
- version: '4.1'
74
+ version: '4.2'
75
75
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
76
  email: gerald.bauer@gmail.com
77
- executables:
78
- - fbtok
77
+ executables: []
79
78
  extensions: []
80
79
  extra_rdoc_files:
81
80
  - CHANGELOG.md
@@ -91,17 +90,13 @@ files:
91
90
  - Manifest.txt
92
91
  - README.md
93
92
  - Rakefile
94
- - bin/fbtok
95
93
  - config/rounds_de.txt
96
94
  - config/rounds_en.txt
97
95
  - config/rounds_es.txt
98
96
  - config/rounds_misc.txt
99
97
  - config/rounds_pt.txt
100
98
  - lib/sportdb/parser.rb
101
- - lib/sportdb/parser/fbtok/main.rb
102
99
  - lib/sportdb/parser/lang.rb
103
- - lib/sportdb/parser/linter.rb
104
- - lib/sportdb/parser/opts.rb
105
100
  - lib/sportdb/parser/outline_reader.rb
106
101
  - lib/sportdb/parser/parser.rb
107
102
  - lib/sportdb/parser/token-date.rb
@@ -131,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
126
  - !ruby/object:Gem::Version
132
127
  version: '0'
133
128
  requirements: []
134
- rubygems_version: 3.4.10
129
+ rubygems_version: 3.5.22
135
130
  signing_key:
136
131
  specification_version: 4
137
132
  summary: sportdb-parser - football.txt match parser (& tokenizer)
data/bin/fbtok DELETED
@@ -1,13 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ## tip: to test run:
4
- ## ruby -I ./lib bin/fbtok
5
-
6
- require 'sportdb/parser'
7
-
8
-
9
- Fbtok.main( ARGV )
10
-
11
-
12
- puts "bye"
13
-
@@ -1,141 +0,0 @@
1
-
2
- module Fbtok
3
- def self.main( args=ARGV )
4
-
5
- opts = {
6
- debug: true,
7
- metal: false,
8
- file: nil,
9
- }
10
-
11
- parser = OptionParser.new do |parser|
12
- parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
13
-
14
-
15
- parser.on( "-q", "--quiet",
16
- "less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
17
- opts[:debug] = false
18
- end
19
- parser.on( "--verbose", "--debug",
20
- "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
21
- opts[:debug] = true
22
- end
23
-
24
- parser.on( "--metal",
25
- "turn off typed parse tree; show to the metal tokens"+
26
- " (default: #{opts[:metal]})" ) do |metal|
27
- opts[:metal] = true
28
- end
29
-
30
- parser.on( "-f FILE", "--file FILE",
31
- "read datafiles (pathspecs) via .csv file") do |file|
32
- opts[:file] = file
33
- ## note: for batch (massive) processing auto-set debug (verbose output) to false (as default)
34
- opts[:debug] = false
35
- end
36
- end
37
- parser.parse!( args )
38
-
39
- puts "OPTS:"
40
- p opts
41
- puts "ARGV:"
42
- p args
43
-
44
-
45
- ## todo/check - use packs or projects or such
46
- ## instead of specs - why? why not?
47
- specs = []
48
- if opts[:file]
49
- recs = read_csv( opts[:file] )
50
- pp recs
51
- ## note - make pathspecs relative to passed in file arg!!!
52
- basedir = File.dirname( opts[:file] )
53
- recs.each do |rec|
54
- paths = SportDb::Parser::Opts.find( rec['path'], dir: basedir )
55
- specs << [paths, rec]
56
- end
57
- else
58
- paths = if args.empty?
59
- [
60
- '../../../openfootball/euro/2021--europe/euro.txt',
61
- '../../../openfootball/euro/2024--germany/euro.txt',
62
- ]
63
- else
64
- ## check for directories
65
- ## and auto-expand
66
- SportDb::Parser::Opts.expand_args( args )
67
- end
68
- specs << [paths, {}]
69
- end
70
-
71
-
72
- SportDb::Parser::Linter.debug = true if opts[:debug]
73
-
74
- linter = SportDb::Parser::Linter.new
75
-
76
-
77
- specs.each_with_index do |(paths, rec),i|
78
- errors = []
79
-
80
- paths.each_with_index do |path,j|
81
- puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
82
- linter.read( path, parse: !opts[:metal] )
83
-
84
- errors += linter.errors if linter.errors?
85
- end
86
-
87
- if errors.size > 0
88
- puts
89
- pp errors
90
- puts
91
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
92
- else
93
- puts
94
- puts "OK no parse errors found in #{paths.size} datafile(s)"
95
- end
96
-
97
- ## add errors to rec via rec['errors'] to allow
98
- ## for further processing/reporting
99
- rec['errors'] = errors
100
- end
101
-
102
-
103
- ###
104
- ## generate a report if --file option used
105
- if opts[:file]
106
-
107
- buf = String.new
108
-
109
- buf << "# fbtok summary report - #{specs.size} dataset(s)\n\n"
110
-
111
- specs.each_with_index do |(paths, rec),i|
112
- errors = rec['errors']
113
-
114
- if errors.size > 0
115
- buf << "!! #{errors.size} ERROR(S) "
116
- else
117
- buf << " OK "
118
- end
119
- buf << "%-20s" % rec['path']
120
- buf << " - #{paths.size} datafile(s)"
121
- buf << "\n"
122
-
123
- if errors.size > 0
124
- buf << errors.pretty_inspect
125
- buf << "\n"
126
- end
127
- end
128
-
129
- puts
130
- puts "SUMMARY:"
131
- puts buf
132
-
133
- # maybe write out in the future?
134
- # basedir = File.dirname( opts[:file] )
135
- # basename = File.basename( opts[:file], File.extname( opts[:file] ))
136
- end
137
-
138
-
139
-
140
- end # method self.main
141
- end # module Fbtok
@@ -1,156 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Linter for now nested inside Parser - keep? why? why not?
7
- class Linter
8
-
9
- def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
13
-
14
-
15
- attr_reader :errors
16
-
17
- def initialize
18
- @errors = []
19
- @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
- end
21
-
22
-
23
- def errors?() @errors.size > 0; end
24
-
25
-
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
- #########
52
- ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
54
- def read( path, parse: false )
55
- ## note: every (new) read call - resets errors list to empty
56
- @errors = []
57
-
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- h2 = nil
63
- orphans = 0 ## track paragraphs's with no heading
64
-
65
- attrib_found = false
66
-
67
-
68
- nodes.each do |node|
69
- type = node[0]
70
-
71
- if type == :h1
72
- h1 = node[1] ## get heading text
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :h2
75
- if h1.nil?
76
- puts "!! WARN - no heading for subheading; skipping parse"
77
- next
78
- end
79
- h2 = node[1] ## get heading text
80
- puts " == Heading 2 >#{node[1]}<"
81
- elsif type == :p
82
-
83
- if h1.nil?
84
- orphans += 1 ## only warn once
85
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
86
- next
87
- end
88
-
89
- lines = node[1]
90
-
91
- tree = []
92
- lines.each_with_index do |line,i|
93
-
94
- if debug?
95
- puts
96
- puts "line >#{line}<"
97
- end
98
-
99
-
100
- ## skip new (experimental attrib syntax)
101
- if attrib_found == false &&
102
- ATTRIB_RE.match?( line )
103
- ## note: check attrib regex AFTER group def e.g.:
104
- ## Group A:
105
- ## Group B: etc.
106
- ## todo/fix - change Group A: to Group A etc.
107
- ## Group B: to Group B
108
- attrib_found = true
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- next
111
- end
112
-
113
- if attrib_found
114
- ## check if line ends with dot
115
- ## if not slurp up lines to the next do!!!
116
- ## logger.debug "skipping key/value line - >#{line}<"
117
- attrib_found = false if line.end_with?( '.' )
118
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
119
- next
120
- end
121
-
122
- t, error_messages = if parse
123
- @parser.parse_with_errors( line )
124
- else
125
- @parser.tokenize_with_errors( line )
126
- end
127
-
128
-
129
- if error_messages.size > 0
130
- ## add to "global" error list
131
- ## make a triplet tuple (file / msg / line text)
132
- error_messages.each do |msg|
133
- @errors << [ path,
134
- msg,
135
- line
136
- ]
137
- end
138
- end
139
-
140
- pp t if debug?
141
-
142
- tree << t
143
- end
144
-
145
- ## pp tree
146
- else
147
- pp node
148
- raise ArgumentError, "unsupported (node) type >#{type}<"
149
- end
150
- end # each node
151
- end # read
152
- end # class Linter
153
-
154
-
155
- end # class Parser
156
- end # module SportDb
@@ -1,81 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
-
6
- ###
7
- ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
8
- class Opts
9
-
10
- SEASON_RE = %r{ (?:
11
- \d{4}-\d{2}
12
- | \d{4}(--[a-z0-9_-]+)?
13
- )
14
- }x
15
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
16
-
17
-
18
- ## note: if pattern includes directory add here
19
- ## (otherwise move to more "generic" datafile) - why? why not?
20
- ## update - note include/allow dot (.) too
21
- ## e.g. 2024-25/at.1.txt
22
- ## change to at_1 or uefa_cl or such - why? why not?
23
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
24
- #{SEASON}
25
- /[a-z0-9_.-]+\.txt$ ## txt e.g /1-premierleague.txt
26
- }x
27
-
28
-
29
- def self.find( path, dir: nil )
30
- ## check - rename dir
31
- ## use root_dir or work_dir or cd or such - why? why not?
32
-
33
- datafiles = []
34
-
35
- ## note: normalize path - use File.expand_path ??
36
- ## change all backslash to slash for now
37
- ## path = path.gsub( "\\", '/' )
38
- path = if dir
39
- File.expand_path( path, File.expand_path( dir ))
40
- else
41
- File.expand_path( path )
42
- end
43
-
44
- ## check all txt files
45
- ## note: incl. files starting with dot (.)) as candidates
46
- ## (normally excluded with just *)
47
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
48
- ## pp candidates
49
- candidates.each do |candidate|
50
- datafiles << candidate if MATCH_RE.match( candidate )
51
- end
52
-
53
- ## pp datafiles
54
- datafiles
55
- end
56
-
57
-
58
- def self.expand_args( args )
59
- paths = []
60
-
61
- args.each do |arg|
62
- ## check if directory
63
- if Dir.exist?( arg )
64
- datafiles = find( arg )
65
- puts
66
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
67
- pp datafiles
68
- paths += datafiles
69
- else
70
- ## assume it's a file
71
- paths << arg
72
- end
73
- end
74
-
75
- paths
76
- end
77
- end # class Opts
78
-
79
-
80
- end # class Parser
81
- end # module SportDb