sportdb-parser 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3657cedc5125ee2515efa8be4a1838d05b7290523dd893f7eba5b87024e71238
4
- data.tar.gz: caf6d7e909e17fa0dcabf659ab8f5046ca1940d8f7c1c6f5312e485dc0089384
3
+ metadata.gz: 0c9225b21f400b9f9cced2052c3062f41a091ed81d3d4239164c9652f53ebc6e
4
+ data.tar.gz: f7250eaa21324962df27e7cdd397857afa570c610f00c80c31e5105e40964002
5
5
  SHA512:
6
- metadata.gz: 4063565aada304a1eb96009b6fe542392f41a55d4ad4d21b5de156004bd69a055c5f86b076bed1defbe50423c8c891dd538931ea6ca9b8ec41e237c23e699219
7
- data.tar.gz: 91f6476810cb6617dfcd703ada57592cd38b87f3b4b9fc6fd4468a9457ff0e6ae6337a4e4f5c782e1b80f5f6b6015d5ce26ed6330915cd67a5fb6606f665017f
6
+ metadata.gz: 471c938c233d8f81d7a0fd5e4470a27a52486906764816b6c35ea3d88e19650c81302fd5ff9ee30b85d3a8e9f81ada8eef20b49bd3de924c7238acb106ba6082
7
+ data.tar.gz: 24d1cf3846404859ad7e751895325b256321d43e2881413fda6325c744ca0c31b52ef2032a9dfc8e56e67d7a06df54a6d2780a297982440b8e40b7055fe06c26
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.1
1
+ ### 0.2.2
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -2,12 +2,8 @@ CHANGELOG.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
- bin/fbt
6
5
  lib/sportdb/parser.rb
7
6
  lib/sportdb/parser/lang.rb
8
- lib/sportdb/parser/linter.rb
9
- lib/sportdb/parser/opts.rb
10
- lib/sportdb/parser/outline_reader.rb
11
7
  lib/sportdb/parser/parser.rb
12
8
  lib/sportdb/parser/token-date.rb
13
9
  lib/sportdb/parser/token-score.rb
@@ -155,6 +155,35 @@ DATE_RE = Regexp.union(
155
155
  )
156
156
 
157
157
 
158
+ ##
159
+ ## add a date parser helper
160
+ def self.parse_date( str, start: )
161
+ if m=DATE_RE.match( str )
162
+
163
+ year = m[:year].to_i(10) if m[:year]
164
+ month = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
165
+ day = m[:day].to_i(10) if m[:day]
166
+ wday = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
167
+
168
+ if year.nil? ## try to calculate year
169
+ year = if month > start.month ||
170
+ (month == start.month && day >= start.day)
171
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
172
+ start.year
173
+ else
174
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
175
+ start.year+1
176
+ end
177
+ end
178
+ Date.new( year,month,day )
179
+ else
180
+ puts "!! ERROR - unexpected date format; cannot parse >#{str}<"
181
+ exit 1
182
+ end
183
+ end
184
+
185
+
186
+
158
187
  ###
159
188
  # date duration
160
189
  # use - or + as separator
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 2
7
- PATCH = 1
7
+ PATCH = 2
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -24,11 +24,6 @@ require_relative 'parser/lang'
24
24
  require_relative 'parser/parser'
25
25
 
26
26
 
27
- ## more
28
- require_relative 'parser/outline_reader'
29
- require_relative 'parser/linter'
30
- require_relative 'parser/opts'
31
-
32
27
 
33
28
  ###
34
29
  # make parser api (easily) available - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-24 00:00:00.000000000 Z
11
+ date: 2024-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -74,8 +74,7 @@ dependencies:
74
74
  version: '4.1'
75
75
  description: sportdb-parser - football.txt match parser (& tokenizer)
76
76
  email: gerald.bauer@gmail.com
77
- executables:
78
- - fbt
77
+ executables: []
79
78
  extensions: []
80
79
  extra_rdoc_files:
81
80
  - CHANGELOG.md
@@ -86,12 +85,8 @@ files:
86
85
  - Manifest.txt
87
86
  - README.md
88
87
  - Rakefile
89
- - bin/fbt
90
88
  - lib/sportdb/parser.rb
91
89
  - lib/sportdb/parser/lang.rb
92
- - lib/sportdb/parser/linter.rb
93
- - lib/sportdb/parser/opts.rb
94
- - lib/sportdb/parser/outline_reader.rb
95
90
  - lib/sportdb/parser/parser.rb
96
91
  - lib/sportdb/parser/token-date.rb
97
92
  - lib/sportdb/parser/token-score.rb
data/bin/fbt DELETED
@@ -1,94 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ## tip: to test run:
4
- ## ruby -I ./lib bin/fbt
5
-
6
- ## our own code
7
- require 'sportdb/parser'
8
-
9
-
10
-
11
- require 'optparse'
12
-
13
- ##
14
- ## read textfile
15
- ## and dump tokens
16
- ##
17
- ## fbt ../openfootball/.../euro.txt
18
-
19
-
20
-
21
-
22
- args = ARGV
23
- opts = { debug: false,
24
- metal: false }
25
-
26
- parser = OptionParser.new do |parser|
27
- parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
28
-
29
- ##
30
- ## check if git has a offline option?? (use same)
31
- ## check for other tools - why? why not?
32
-
33
-
34
- parser.on( "--verbose", "--debug",
35
- "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
36
- opts[:debug] = debug
37
- end
38
-
39
- parser.on( "--metal",
40
- "turn off typed parse tree; show to the metal tokens"+
41
- " (default: #{opts[:metal]})" ) do |metal|
42
- opts[:metal] = metal
43
- end
44
- end
45
- parser.parse!( args )
46
-
47
- puts "OPTS:"
48
- p opts
49
- puts "ARGV:"
50
- p args
51
-
52
-
53
-
54
-
55
-
56
- paths = if args.empty?
57
- [
58
- '../../../openfootball/euro/2021--europe/euro.txt',
59
- '../../../openfootball/euro/2024--germany/euro.txt',
60
- ]
61
- else
62
- ## check for directories
63
- ## and auto-expand
64
-
65
- SportDb::Parser::Opts.expand_args( args )
66
- end
67
-
68
-
69
-
70
- SportDb::Parser::Linter.debug = true if opts[:debug]
71
-
72
- linter = SportDb::Parser::Linter.new
73
-
74
- errors = []
75
-
76
- paths.each_with_index do |path,i|
77
- puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
78
- linter.read( path, parse: !opts[:metal] )
79
-
80
- errors += linter.errors if linter.errors?
81
- end
82
-
83
- if errors.size > 0
84
- puts
85
- pp errors
86
- puts
87
- puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
88
- else
89
- puts
90
- puts "OK no parse errors found in #{paths.size} datafile(s)"
91
- end
92
-
93
- puts "bye"
94
-
@@ -1,149 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Linter for now nested inside Parser - keep? why? why not?
7
- class Linter
8
-
9
- def self.debug=(value) @@debug = value; end
10
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
- def debug?() self.class.debug?; end
12
-
13
-
14
-
15
- attr_reader :errors
16
-
17
- def initialize
18
- @errors = []
19
- @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
- end
21
-
22
-
23
- def errors?() @errors.size > 0; end
24
-
25
-
26
-
27
- ## note: colon (:) MUST be followed by one (or more) spaces
28
- ## make sure mon feb 12 18:10 will not match
29
- ## allow 1. FC Köln etc.
30
- ## Mainz 05:
31
- ## limit to 30 chars max
32
- ## only allow chars incl. intl buut (NOT ()[]/;)
33
- ##
34
- ## Group A:
35
- ## Group B: - remove colon
36
- ## or lookup first
37
-
38
- ATTRIB_RE = %r{^
39
- [ ]*? # slurp leading spaces
40
- (?<key>[^:|\]\[()\/; -]
41
- [^:|\]\[()\/;]{0,30}
42
- )
43
- [ ]*? # slurp trailing spaces
44
- :[ ]+
45
- (?<value>.+)
46
- [ ]*? # slurp trailing spaces
47
- $
48
- }ix
49
-
50
-
51
- #########
52
- ## parse - false (default) - tokenize (only)
53
- ## - true - tokenize & parse
54
- def read( path, parse: false )
55
- ## note: every (new) read call - resets errors list to empty
56
- @errors = []
57
-
58
- nodes = OutlineReader.read( path )
59
-
60
- ## process nodes
61
- h1 = nil
62
- orphans = 0 ## track paragraphs's with no heading
63
-
64
- attrib_found = false
65
-
66
-
67
- nodes.each do |node|
68
- type = node[0]
69
-
70
- if type == :h1
71
- h1 = node[1] ## get heading text
72
- puts
73
- puts " = Heading 1 >#{node[1]}<"
74
- elsif type == :p
75
-
76
- if h1.nil?
77
- orphans += 1 ## only warn once
78
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
79
- next
80
- end
81
-
82
- lines = node[1]
83
-
84
- tree = []
85
- lines.each_with_index do |line,i|
86
-
87
- if debug?
88
- puts
89
- puts "line >#{line}<"
90
- end
91
-
92
-
93
- ## skip new (experimental attrib syntax)
94
- if attrib_found == false &&
95
- ATTRIB_RE.match?( line )
96
- ## note: check attrib regex AFTER group def e.g.:
97
- ## Group A:
98
- ## Group B: etc.
99
- ## todo/fix - change Group A: to Group A etc.
100
- ## Group B: to Group B
101
- attrib_found = true
102
- ## logger.debug "skipping key/value line - >#{line}<"
103
- next
104
- end
105
-
106
- if attrib_found
107
- ## check if line ends with dot
108
- ## if not slurp up lines to the next do!!!
109
- ## logger.debug "skipping key/value line - >#{line}<"
110
- attrib_found = false if line.end_with?( '.' )
111
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
112
- next
113
- end
114
-
115
- t, error_messages = if parse
116
- @parser.parse_with_errors( line )
117
- else
118
- @parser.tokenize_with_errors( line )
119
- end
120
-
121
-
122
- if error_messages.size > 0
123
- ## add to "global" error list
124
- ## make a triplet tuple (file / msg / line text)
125
- error_messages.each do |msg|
126
- @errors << [ path,
127
- msg,
128
- line
129
- ]
130
- end
131
- end
132
-
133
- pp t if debug?
134
-
135
- tree << t
136
- end
137
-
138
- ## pp tree
139
- else
140
- pp node
141
- raise ArgumentError, "unsupported (node) type >#{type}<"
142
- end
143
- end # each node
144
- end # read
145
- end # class Linter
146
-
147
-
148
- end # class Parser
149
- end # module SportDb
@@ -1,70 +0,0 @@
1
-
2
- module SportDb
3
- class Parser
4
-
5
- ###
6
- ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
7
- class Opts
8
-
9
- SEASON_RE = %r{ (?:
10
- \d{4}-\d{2}
11
- | \d{4}(--[a-z0-9_-]+)?
12
- )
13
- }x
14
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
15
-
16
-
17
- ## note: if pattern includes directory add here
18
- ## (otherwise move to more "generic" datafile) - why? why not?
19
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
20
- #{SEASON}
21
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
22
- }x
23
-
24
-
25
- def self.find( path )
26
- datafiles = []
27
-
28
- ## note: normalize path - use File.expand_path ??
29
- ## change all backslash to slash for now
30
- ## path = path.gsub( "\\", '/' )
31
- path = File.expand_path( path )
32
-
33
- ## check all txt files
34
- ## note: incl. files starting with dot (.)) as candidates
35
- ## (normally excluded with just *)
36
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
37
- ## pp candidates
38
- candidates.each do |candidate|
39
- datafiles << candidate if MATCH_RE.match( candidate )
40
- end
41
-
42
- ## pp datafiles
43
- datafiles
44
- end
45
-
46
-
47
- def self.expand_args( args )
48
- paths = []
49
-
50
- args.each do |arg|
51
- ## check if directory
52
- if Dir.exist?( arg )
53
- datafiles = find( arg )
54
- puts
55
- puts " found #{datafiles.size} match txt datafiles in #{arg}"
56
- pp datafiles
57
- paths += datafiles
58
- else
59
- ## assume it's a file
60
- paths << arg
61
- end
62
- end
63
-
64
- paths
65
- end
66
- end # class Opts
67
-
68
-
69
- end # class Parser
70
- end # module SportDb
@@ -1,97 +0,0 @@
1
-
2
-
3
- module SportDb
4
-
5
- class OutlineReader
6
-
7
- def self.debug=(value) @@debug = value; end
8
- def self.debug?() @@debug ||= false; end
9
- def debug?() self.class.debug?; end
10
-
11
-
12
-
13
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
- txt = File.open( path, 'r:utf-8' ) {|f| f.read }
15
- parse( txt )
16
- end
17
-
18
- def self.parse( txt )
19
- new( txt ).parse
20
- end
21
-
22
- def initialize( txt )
23
- @txt = txt
24
- end
25
-
26
- ## note: skip "decorative" only heading e.g. ========
27
- ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
28
- HEADING_BLANK_RE = %r{\A
29
- ={1,}
30
- \z}x
31
-
32
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
33
- HEADING_RE = %r{\A
34
- (?<marker>={1,}) ## 1. leading ======
35
- [ ]*
36
- (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
37
- [ ]*
38
- =* ## 3. (optional) trailing ====
39
- \z}x
40
-
41
- def parse
42
- outline=[] ## outline structure
43
- start_para = true ## start new para(graph) on new text line?
44
-
45
- @txt.each_line do |line|
46
- line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
47
-
48
- if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
49
- start_para = true
50
- next
51
- end
52
-
53
- break if line == '__END__'
54
-
55
- next if line.start_with?( '#' ) ## skip comments too
56
- ## strip inline (until end-of-line) comments too
57
- ## e.g Eupen | KAS Eupen ## [de]
58
- ## => Eupen | KAS Eupen
59
- ## e.g bq Bonaire, BOE # CONCACAF
60
- ## => bq Bonaire, BOE
61
- line = line.sub( /#.*/, '' ).strip
62
- pp line if debug?
63
-
64
- ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
65
- next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
66
-
67
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
68
- if m=HEADING_RE.match( line )
69
- start_para = true
70
-
71
- heading_marker = m[:marker]
72
- heading_level = heading_marker.length ## count number of = for heading level
73
- heading = m[:text].strip
74
-
75
- puts "heading #{heading_level} >#{heading}<" if debug?
76
- outline << [:"h#{heading_level}", heading]
77
- else ## assume it's a (plain/regular) text line
78
- if start_para
79
- outline << [:p, [line]]
80
- start_para = false
81
- else
82
- node = outline[-1] ## get last entry
83
- if node[0] == :p ## assert it's a p(aragraph) node!!!
84
- node[1] << line ## add line to p(aragraph)
85
- else
86
- puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
87
- pp node
88
- exit 1
89
- end
90
- end
91
- end
92
- end
93
- outline
94
- end # method read
95
- end # class OutlineReader
96
-
97
- end # module SportDb