sportdb-quick 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +13 -0
- data/README.md +5 -0
- data/Rakefile +32 -0
- data/bin/fbt +121 -0
- data/lib/sportdb/quick/linter.rb +149 -0
- data/lib/sportdb/quick/match_parser.rb +735 -0
- data/lib/sportdb/quick/opts.rb +70 -0
- data/lib/sportdb/quick/outline_reader.rb +97 -0
- data/lib/sportdb/quick/quick_league_outline_reader.rb +85 -0
- data/lib/sportdb/quick/quick_match_reader.rb +95 -0
- data/lib/sportdb/quick/version.rb +24 -0
- data/lib/sportdb/quick.rb +31 -0
- metadata +137 -0
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Quick
|
4
|
+
|
5
|
+
###
|
6
|
+
## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
|
7
|
+
class Opts
|
8
|
+
|
9
|
+
SEASON_RE = %r{ (?:
|
10
|
+
\d{4}-\d{2}
|
11
|
+
| \d{4}(--[a-z0-9_-]+)?
|
12
|
+
)
|
13
|
+
}x
|
14
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
15
|
+
|
16
|
+
|
17
|
+
## note: if pattern includes directory add here
|
18
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
19
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
20
|
+
#{SEASON}
|
21
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
22
|
+
}x
|
23
|
+
|
24
|
+
|
25
|
+
def self.find( path )
|
26
|
+
datafiles = []
|
27
|
+
|
28
|
+
## note: normalize path - use File.expand_path ??
|
29
|
+
## change all backslash to slash for now
|
30
|
+
## path = path.gsub( "\\", '/' )
|
31
|
+
path = File.expand_path( path )
|
32
|
+
|
33
|
+
## check all txt files
|
34
|
+
## note: incl. files starting with dot (.)) as candidates
|
35
|
+
## (normally excluded with just *)
|
36
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
37
|
+
## pp candidates
|
38
|
+
candidates.each do |candidate|
|
39
|
+
datafiles << candidate if MATCH_RE.match( candidate )
|
40
|
+
end
|
41
|
+
|
42
|
+
## pp datafiles
|
43
|
+
datafiles
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def self.expand_args( args )
|
48
|
+
paths = []
|
49
|
+
|
50
|
+
args.each do |arg|
|
51
|
+
## check if directory
|
52
|
+
if Dir.exist?( arg )
|
53
|
+
datafiles = find( arg )
|
54
|
+
puts
|
55
|
+
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
56
|
+
pp datafiles
|
57
|
+
paths += datafiles
|
58
|
+
else
|
59
|
+
## assume it's a file
|
60
|
+
paths << arg
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
paths
|
65
|
+
end
|
66
|
+
end # class Opts
|
67
|
+
|
68
|
+
|
69
|
+
end # module Quick
|
70
|
+
end # module SportDb
|
@@ -0,0 +1,97 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
class OutlineReader
|
6
|
+
|
7
|
+
def self.debug=(value) @@debug = value; end
|
8
|
+
def self.debug?() @@debug ||= false; end
|
9
|
+
def debug?() self.class.debug?; end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
14
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
15
|
+
parse( txt )
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.parse( txt )
|
19
|
+
new( txt ).parse
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize( txt )
|
23
|
+
@txt = txt
|
24
|
+
end
|
25
|
+
|
26
|
+
## note: skip "decorative" only heading e.g. ========
|
27
|
+
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
28
|
+
HEADING_BLANK_RE = %r{\A
|
29
|
+
={1,}
|
30
|
+
\z}x
|
31
|
+
|
32
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
33
|
+
HEADING_RE = %r{\A
|
34
|
+
(?<marker>={1,}) ## 1. leading ======
|
35
|
+
[ ]*
|
36
|
+
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
37
|
+
[ ]*
|
38
|
+
=* ## 3. (optional) trailing ====
|
39
|
+
\z}x
|
40
|
+
|
41
|
+
def parse
|
42
|
+
outline=[] ## outline structure
|
43
|
+
start_para = true ## start new para(graph) on new text line?
|
44
|
+
|
45
|
+
@txt.each_line do |line|
|
46
|
+
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
47
|
+
|
48
|
+
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
49
|
+
start_para = true
|
50
|
+
next
|
51
|
+
end
|
52
|
+
|
53
|
+
break if line == '__END__'
|
54
|
+
|
55
|
+
next if line.start_with?( '#' ) ## skip comments too
|
56
|
+
## strip inline (until end-of-line) comments too
|
57
|
+
## e.g Eupen | KAS Eupen ## [de]
|
58
|
+
## => Eupen | KAS Eupen
|
59
|
+
## e.g bq Bonaire, BOE # CONCACAF
|
60
|
+
## => bq Bonaire, BOE
|
61
|
+
line = line.sub( /#.*/, '' ).strip
|
62
|
+
pp line if debug?
|
63
|
+
|
64
|
+
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
65
|
+
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
66
|
+
|
67
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
68
|
+
if m=HEADING_RE.match( line )
|
69
|
+
start_para = true
|
70
|
+
|
71
|
+
heading_marker = m[:marker]
|
72
|
+
heading_level = heading_marker.length ## count number of = for heading level
|
73
|
+
heading = m[:text].strip
|
74
|
+
|
75
|
+
puts "heading #{heading_level} >#{heading}<" if debug?
|
76
|
+
outline << [:"h#{heading_level}", heading]
|
77
|
+
else ## assume it's a (plain/regular) text line
|
78
|
+
if start_para
|
79
|
+
outline << [:p, [line]]
|
80
|
+
start_para = false
|
81
|
+
else
|
82
|
+
node = outline[-1] ## get last entry
|
83
|
+
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
84
|
+
node[1] << line ## add line to p(aragraph)
|
85
|
+
else
|
86
|
+
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
87
|
+
pp node
|
88
|
+
exit 1
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
outline
|
94
|
+
end # method read
|
95
|
+
end # class OutlineReader
|
96
|
+
|
97
|
+
end # module SportDb
|
@@ -0,0 +1,85 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
## shared "higher-level" outline reader
|
6
|
+
### quick version WITHOUT any validation/mapping !!!
|
7
|
+
|
8
|
+
class QuickLeagueOutlineReader
|
9
|
+
|
10
|
+
def self.read( path )
|
11
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
12
|
+
parse( txt )
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.parse( txt )
|
16
|
+
new( txt ).parse
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def initialize( txt )
|
21
|
+
@txt = txt
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse
|
25
|
+
secs=[] # sec(tion)s
|
26
|
+
OutlineReader.parse( @txt ).each do |node|
|
27
|
+
if node[0] == :h1
|
28
|
+
## check for league (and stage) and season
|
29
|
+
heading = node[1]
|
30
|
+
values = split_league( heading )
|
31
|
+
if m=values[0].match( LEAGUE_SEASON_HEADING_RE )
|
32
|
+
puts "league >#{m[:league]}<, season >#{m[:season]}<"
|
33
|
+
|
34
|
+
secs << { league: m[:league],
|
35
|
+
season: m[:season],
|
36
|
+
stage: values[1], ## note: defaults to nil if not present
|
37
|
+
lines: []
|
38
|
+
}
|
39
|
+
else
|
40
|
+
puts "** !!! ERROR - cannot match league and season in heading; season missing?"
|
41
|
+
pp heading
|
42
|
+
exit 1
|
43
|
+
end
|
44
|
+
elsif node[0] == :p ## paragraph with (text) lines
|
45
|
+
lines = node[1]
|
46
|
+
## note: skip lines if no heading seen
|
47
|
+
if secs.empty?
|
48
|
+
puts "** !!! WARN - skipping lines (no heading):"
|
49
|
+
pp lines
|
50
|
+
else
|
51
|
+
## todo/check: unroll paragraphs into lines or pass along paragraphs - why? why not?
|
52
|
+
secs[-1][:lines] += lines
|
53
|
+
end
|
54
|
+
else
|
55
|
+
puts "** !!! ERROR - unknown line type; for now only heading 1 for leagues supported; sorry:"
|
56
|
+
pp node
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
secs
|
61
|
+
end # method parse
|
62
|
+
|
63
|
+
|
64
|
+
## split into league + season
|
65
|
+
## e.g. Österr. Bundesliga 2015/16 ## or 2015-16
|
66
|
+
## World Cup 2018
|
67
|
+
LEAGUE_SEASON_HEADING_RE = %r{^
|
68
|
+
(?<league>.+?) ## non-greedy
|
69
|
+
\s+
|
70
|
+
(?<season>\d{4}
|
71
|
+
(?:[\/-]\d{1,4})? ## optional 2nd year in season
|
72
|
+
)
|
73
|
+
$}x
|
74
|
+
|
75
|
+
def split_league( str ) ## todo/check: rename to parse_league(s) - why? why not?
|
76
|
+
## split into league / stage / ... e.g.
|
77
|
+
## => Österr. Bundesliga 2018/19, Regular Season
|
78
|
+
## => Österr. Bundesliga 2018/19, Championship Round
|
79
|
+
## etc.
|
80
|
+
values = str.split( /[,<>‹›]/ ) ## note: allow , > < or › ‹ for now
|
81
|
+
values = values.map { |value| value.strip } ## remove all whitespaces
|
82
|
+
values
|
83
|
+
end
|
84
|
+
end # class QuickLeagueOutlineReader
|
85
|
+
end # module SportDb
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#####
|
2
|
+
## quick match reader for datafiles with league outlines
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
class QuickMatchReader
|
6
|
+
|
7
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
8
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
9
|
+
parse( txt )
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.parse( txt )
|
13
|
+
new( txt ).parse
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
include Logging
|
18
|
+
|
19
|
+
def initialize( txt )
|
20
|
+
@txt = txt
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse
|
24
|
+
data = {} # return data hash with leagues
|
25
|
+
# and seasons
|
26
|
+
# for now merge stage into matches
|
27
|
+
|
28
|
+
secs = QuickLeagueOutlineReader.parse( @txt )
|
29
|
+
pp secs
|
30
|
+
|
31
|
+
secs.each do |sec| ## sec(tion)s
|
32
|
+
season = Season.parse( sec[:season] ) ## convert (str) to season obj!!!
|
33
|
+
league = sec[:league]
|
34
|
+
stage = sec[:stage]
|
35
|
+
lines = sec[:lines]
|
36
|
+
|
37
|
+
start = if season.year?
|
38
|
+
Date.new( season.start_year, 1, 1 )
|
39
|
+
else
|
40
|
+
Date.new( season.start_year, 7, 1 )
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
parser = MatchParser.new( lines,
|
45
|
+
start ) ## note: keep season start_at date for now (no need for more specific stage date need for now)
|
46
|
+
|
47
|
+
auto_conf_teams, matches, rounds, groups = parser.parse
|
48
|
+
|
49
|
+
puts ">>> #{auto_conf_teams.size} teams:"
|
50
|
+
pp auto_conf_teams
|
51
|
+
puts ">>> #{matches.size} matches:"
|
52
|
+
## pp matches
|
53
|
+
puts ">>> #{rounds.size} rounds:"
|
54
|
+
pp rounds
|
55
|
+
puts ">>> #{groups.size} groups:"
|
56
|
+
pp groups
|
57
|
+
|
58
|
+
## note: pass along stage (if present): stage - optional from heading!!!!
|
59
|
+
if stage
|
60
|
+
matches.each do |match|
|
61
|
+
match = match.update( stage: stage )
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
data[ league ] ||= {}
|
66
|
+
data[ league ][ season.key ] ||= []
|
67
|
+
|
68
|
+
data[ league ][ season.key ] += matches
|
69
|
+
## note - skip teams, rounds, and groups for now
|
70
|
+
end
|
71
|
+
|
72
|
+
## check - only one league and one season
|
73
|
+
## allowed in quick style
|
74
|
+
|
75
|
+
|
76
|
+
leagues = data.keys
|
77
|
+
if leagues.size != 1
|
78
|
+
puts "!! (QUICK) PARSE ERROR - expected one league only; got #{leagues.size}:"
|
79
|
+
pp leagues
|
80
|
+
exit 1
|
81
|
+
end
|
82
|
+
|
83
|
+
seasons = data[ leagues[0] ].keys
|
84
|
+
if seasons.size != 1
|
85
|
+
puts "!! (QUICK) PARSE ERROR - expected one #{leagues[0]} season only; got #{seasons.size}:"
|
86
|
+
pp seasons
|
87
|
+
exit 1
|
88
|
+
end
|
89
|
+
|
90
|
+
data[ leagues[0] ][ seasons[0] ]
|
91
|
+
end # method parse
|
92
|
+
|
93
|
+
end # class QuickMatchReader
|
94
|
+
end # module SportDb
|
95
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Module
|
4
|
+
module Quick
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 0
|
7
|
+
PATCH = 1
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"sportdb-quick/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module Quick
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'sportdb/structs' # deps: score-foramts
|
2
|
+
# season-formats
|
3
|
+
# alphabets
|
4
|
+
require 'sportdb/parser' # deps: cocos
|
5
|
+
# season-formats
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
require 'logutils'
|
10
|
+
module SportDb
|
11
|
+
## logging machinery shortcut; use LogUtils for now
|
12
|
+
Logging = LogUtils::Logging
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
## our own code
|
18
|
+
require_relative 'quick/version'
|
19
|
+
require_relative 'quick/opts'
|
20
|
+
require_relative 'quick/linter'
|
21
|
+
require_relative 'quick/outline_reader'
|
22
|
+
|
23
|
+
require_relative 'quick/match_parser'
|
24
|
+
|
25
|
+
require_relative 'quick/quick_league_outline_reader'
|
26
|
+
require_relative 'quick/quick_match_reader'
|
27
|
+
|
28
|
+
|
29
|
+
puts SportDb::Module::Quick.banner # say hello
|
30
|
+
|
31
|
+
|
metadata
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sportdb-quick
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-08-27 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sportdb-parser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: sportdb-structs
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: logutils
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.6.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.6.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rdoc
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '4.0'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '7'
|
65
|
+
type: :development
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '4.0'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '7'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: hoe
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '4.1'
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '4.1'
|
89
|
+
description: sportdb-quick - football.txt (quick) match parsers and more
|
90
|
+
email: gerald.bauer@gmail.com
|
91
|
+
executables:
|
92
|
+
- fbt
|
93
|
+
extensions: []
|
94
|
+
extra_rdoc_files:
|
95
|
+
- CHANGELOG.md
|
96
|
+
- Manifest.txt
|
97
|
+
- README.md
|
98
|
+
files:
|
99
|
+
- CHANGELOG.md
|
100
|
+
- Manifest.txt
|
101
|
+
- README.md
|
102
|
+
- Rakefile
|
103
|
+
- bin/fbt
|
104
|
+
- lib/sportdb/quick.rb
|
105
|
+
- lib/sportdb/quick/linter.rb
|
106
|
+
- lib/sportdb/quick/match_parser.rb
|
107
|
+
- lib/sportdb/quick/opts.rb
|
108
|
+
- lib/sportdb/quick/outline_reader.rb
|
109
|
+
- lib/sportdb/quick/quick_league_outline_reader.rb
|
110
|
+
- lib/sportdb/quick/quick_match_reader.rb
|
111
|
+
- lib/sportdb/quick/version.rb
|
112
|
+
homepage: https://github.com/sportdb/sport.db
|
113
|
+
licenses:
|
114
|
+
- Public Domain
|
115
|
+
metadata: {}
|
116
|
+
post_install_message:
|
117
|
+
rdoc_options:
|
118
|
+
- "--main"
|
119
|
+
- README.md
|
120
|
+
require_paths:
|
121
|
+
- lib
|
122
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: 3.1.0
|
127
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
requirements: []
|
133
|
+
rubygems_version: 3.4.10
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: sportdb-quick - football.txt (quick) match parsers and more
|
137
|
+
test_files: []
|