sportdb-formats 2.0.2 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,152 +0,0 @@
1
-
2
- module SportDb
3
- module Import
4
-
5
-
6
- class LeagueReader
7
-
8
- def world() Import.world; end
9
-
10
-
11
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
- parse( txt )
14
- end
15
-
16
- def self.parse( txt )
17
- new( txt ).parse
18
- end
19
-
20
-
21
-
22
- include Logging
23
-
24
- def initialize( txt )
25
- @txt = txt
26
- end
27
-
28
- def parse
29
- recs = []
30
- last_rec = nil
31
-
32
- country = nil # last country
33
- intl = false # is international (league/tournament/cup/competition)
34
- clubs = true # or clubs|national teams
35
-
36
- OutlineReader.parse( @txt ).each do |node|
37
- if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
38
- heading_level = node[0][1].to_i
39
- heading = node[1]
40
-
41
- logger.debug "heading #{heading_level} >#{heading}<"
42
-
43
- if heading_level != 1
44
- puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
45
- pp line
46
- exit 1
47
- else
48
- logger.debug "heading (#{heading_level}) >#{heading}<"
49
- last_heading = heading
50
- ## map to country or international / int'l or national teams
51
- if heading =~ /national team/i ## national team tournament
52
- country = nil
53
- intl = true
54
- clubs = false
55
- elsif heading =~ /international|int'l/i ## int'l club tournament
56
- country = nil
57
- intl = true
58
- clubs = true
59
- else
60
- ## assume country in heading; allow all "formats" supported by parse e.g.
61
- ## Österreich • Austria (at)
62
- ## Österreich • Austria
63
- ## Austria
64
- ## Deutschland (de) • Germany
65
- country = world.countries.parse( heading )
66
- intl = false
67
- clubs = true
68
-
69
- ## check country code - MUST exist for now!!!!
70
- if country.nil?
71
- puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
72
- exit 1
73
- end
74
- end
75
- end
76
- elsif node[0] == :p ## paragraph with (text) lines
77
- lines = node[1]
78
- lines.each do |line|
79
-
80
- if line.start_with?( '|' )
81
- ## assume continuation with line of alternative names
82
- ## note: skip leading pipe
83
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
84
- values = values.map {|value| _norm(value) } ## squish/strip etc.
85
-
86
- logger.debug "alt_names: #{values.join( '|' )}"
87
-
88
- last_rec.alt_names += values
89
- else
90
- ## assume "regular" line
91
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
92
- if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
93
- league_key = $1
94
- ## 1) strip (commercial) sponsor markers/tags e.g $$
95
- ## 2) strip and squish (white)spaces
96
- league_name = _norm( $2 )
97
-
98
- logger.debug "key: >#{league_key}<, name: >#{league_name}<"
99
-
100
-
101
- ## prepend country key/code if country present
102
- ## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
103
- ## why? lets you "overwrite" key if desired - use it - why? why not?
104
- if country
105
- league_key = "#{country.key}.#{league_key}"
106
- end
107
-
108
- rec = League.new( key: league_key,
109
- name: league_name,
110
- country: country,
111
- intl: intl,
112
- clubs: clubs)
113
- recs << rec
114
- last_rec = rec
115
- else
116
- puts "** !!! ERROR !!! missing key for (canonical) league name"
117
- exit 1
118
- end
119
- end
120
- end # each line
121
- else
122
- puts "** !!! ERROR !!! [league reader] - unknown line type:"
123
- pp node
124
- exit 1
125
- end
126
- ## pp line
127
- end
128
- recs
129
- end # method parse
130
-
131
-
132
-
133
- #######################
134
- ### helpers
135
-
136
- ## norm(alize) helper - squish (spaces)
137
- ## and remove dollars ($$$)
138
- ## and remove leading and trailing spaces
139
- def _norm( str )
140
- ## only extra clean-up of dollars for now ($$$)
141
- _squish( str.gsub( '$', '' ) )
142
- end
143
-
144
- def _squish( str )
145
- str.gsub( /[ \t\u00a0]+/, ' ' ).strip
146
- end
147
-
148
-
149
- end # class LeagueReader
150
-
151
- end ## module Import
152
- end ## module SportDb
@@ -1,132 +0,0 @@
1
- module SportDb
2
-
3
- class ConfParser
4
-
5
- def self.parse( lines )
6
- parser = new( lines )
7
- parser.parse
8
- end
9
-
10
- include Logging ## e.g. logger#debug, logger#info, etc.
11
-
12
- def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
13
- ## returns an array of lines with comments and empty lines striped / removed
14
- lines = []
15
- txt.each_line do |line| ## preprocess
16
- line = line.strip
17
-
18
- next if line.empty? || line.start_with?('#') ### skip empty lines and comments
19
- line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
20
- lines << line
21
- end
22
- lines
23
- end
24
-
25
-
26
- def initialize( lines )
27
- # for convenience split string into lines
28
- ## note: removes/strips empty lines
29
- ## todo/check: change to text instead of array of lines - why? why not?
30
- @lines = lines.is_a?( String ) ? _read_lines( lines ) : lines
31
- end
32
-
33
-
34
-
35
- COUNTRY_RE = %r{ [<>‹›,]
36
- [ ]*
37
- (?<country>[A-Z]{2,4}) ## todo/check: allow one-letter (motor vehicle plates) or 5 letter possible?
38
- \b}xi
39
-
40
-
41
- ## standings table row regex matcher e.g.
42
- ## 1 Manchester City 38 32 4 2 106-27 100
43
- ## or 1. Manchester City 38 32 4 2 106:27 100
44
- TABLE_RE = %r{ ^
45
- (?:
46
- (?<rank>\d+)\.?
47
- |
48
- [-]
49
- )
50
- [ ]+
51
- (?<team>.+?) ## note: let's use non-greedy (MINIMUM length) match for now
52
- [ ]+
53
- (?<pld>\d+) ## (pl)aye(d)
54
- [ ]+
55
- (?<w>\d+) ## (w)ins
56
- [ ]+
57
- (?<d>\d+) ## (d)raws
58
- [ ]+
59
- (?<l>\d+) ## (l)ost
60
- [ ]+
61
- (?<gf>\d+) ## (g)oal (f)or
62
- [ ]*
63
- [:-] ## note: allow 10-10 or 10:10 or 10 - 10 or 10 : 10 etc.
64
- [ ]*
65
- (?<ga>\d+) ## (g)oal (a)gainst
66
- (?: ## allow optional (g)oal (d)ifference
67
- [ ]+
68
- (?<gd>[±+-]?\d+) ## (g)oal (d)ifference
69
- )?
70
- [ ]+
71
- (?<pts>\d+) ## (p)oin(ts)
72
- (?: ## allow optional deductions e.g. [-7]
73
- [ ]+
74
- \[(?<deduction>-\d+)\]
75
- )?
76
- $}x
77
-
78
- def parse
79
- teams = {} ## convert lines to teams
80
-
81
- @lines.each do |line|
82
- next if line =~ /^[ -]+$/ ## skip decorative lines with dash only (e.g. ---- or - - - -) etc.
83
-
84
-
85
- ## quick hack - check for/extract (optional) county code (for teams) first
86
- ## allow as separators <>‹›, NOTE: includes (,) comma for now too
87
- m = nil
88
- country = nil
89
- if m=COUNTRY_RE.match( line )
90
- country = m[:country]
91
- line = line.sub( m[0], '' ) ## replace match with nothing for now
92
- end
93
-
94
- if m=TABLE_RE.match( line )
95
- puts " matching table entry >#{line}<"
96
-
97
- name = m[:team]
98
- rank = m[:rank] ? Integer(m[:rank]) : nil
99
-
100
- standing = {
101
- pld: Integer(m[:pld]),
102
- w: Integer(m[:w]),
103
- d: Integer(m[:d]),
104
- l: Integer(m[:l]),
105
- gf: Integer(m[:gf]),
106
- ga: Integer(m[:ga]),
107
- }
108
- standing[ :gd ] = Integer(m[:gd].gsub(/[±+]/,'')) if m[:gd]
109
- standing[ :pts ] = Integer(m[:pts])
110
- standing[ :deduction ] = Integer(m[:deduction]) if m[:deduction]
111
-
112
-
113
- ## todo/fix: track double usage - why? why not? report/raise error/exception on duplicates?
114
- team = teams[ name ] ||= { }
115
- team[ :country ] = country if country
116
-
117
- team[ :rank ] = rank if rank
118
- team[ :standing ] = standing if standing
119
- else
120
- ## assume team is full line
121
- name = line.strip # note: strip leading and trailing spaces
122
-
123
- team = teams[ name ] ||= { }
124
- team[ :country ] = country if country
125
- end
126
- end
127
-
128
- teams
129
- end # method parse
130
-
131
- end # class ConfParser
132
- end # module SportDb