sportdb-formats 1.1.5 → 1.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: df02092c7b7825d8f490db2a084ee9ea597c1d03
4
- data.tar.gz: 1a4668e864faae56d438ba834738ca73b3768810
3
+ metadata.gz: 0c8eb774dbb161f38c9cc4daffb32c8c4ea3d80c
4
+ data.tar.gz: 3e5b51d861148bba7c2f04c4f8acd30a8be486e8
5
5
  SHA512:
6
- metadata.gz: ad775203cc25f042c5b3008833512945ea5345bd9d2cf64787d87c92b163248d6785a52d0731ae2f61d06eb378c0bd8ca9cfa2e91c1dc5da710c9ccfe6bc8de4
7
- data.tar.gz: 41fea36796164be0deff4b297b9a7cece1344f9a16ef2fbec184d1f104c353a59ee30e221db4b6c9ccce7ba9009aafb8051593f510f8c81a1fa72d7fb797f5a1
6
+ metadata.gz: 3eee51ac8a01998082b1b00e6397ded3224dac3dad299a001a812f62c83e579246ced7af214b54c3f964211f5faec1cb08e708dd4237b28d7ac1782807de104f
7
+ data.tar.gz: d257a4f169babec6caf2bd34583a8e01fa1cfb76f3f72d0d13c4f38aadc3ffc2082591c73ed976e9ae7e0bbe08383c0f2f278d1358dc2219d4283912e315bff5
@@ -1,142 +1,142 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class CountryReader
9
-
10
-
11
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
- parse( txt )
14
- end
15
-
16
- def self.parse( txt )
17
- new( txt ).parse
18
- end
19
-
20
-
21
- def initialize( txt )
22
- @txt = txt
23
- end
24
-
25
- def parse
26
- countries = []
27
- last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
-
29
- OutlineReader.parse( @txt ).each do |node|
30
-
31
- node_type = node[0]
32
-
33
- if [:h1, :h2].include?( node_type )
34
- ## skip headings (and headings) for now too
35
- elsif node_type == :p ## paragraph
36
- lines = node[1]
37
- lines.each do |line|
38
- if line.start_with?( '|' )
39
- ## assume continuation with line of alternative names
40
- ## note: skip leading pipe
41
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
- ## strip and squish (white)spaces
43
- # e.g. East Germany (-1989) => East Germany (-1989)
44
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
- last_country.alt_names += values
46
- elsif line =~ /^-[ ]*(\d{4})
47
- [ ]+
48
- (.+)$
49
- /x ## check for historic lines e.g. -1989
50
- year = $1.to_i
51
- parts = $2.split( /=>|⇒/ )
52
- values = parts[0].split( ',' )
53
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
-
55
- name = values[0]
56
- code = values[1]
57
-
58
- last_country = country = Country.new( name: "#{name} (-#{year})",
59
- code: code )
60
- ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
-
62
- countries << country
63
- ## todo/fix: add reference to country today (in parts[1] !!!!)
64
- else
65
- ## assume "regular" line
66
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
- ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
- if line =~ /^([a-z]{2,4})
69
- [ ]+
70
- (.+)$/x
71
- key = $1
72
- values = $2.split( ',' )
73
- ## strip and squish (white)spaces
74
- # e.g. East Germany (-1989) => East Germany (-1989)
75
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
-
77
- ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
- ## e.g. England › UK => England
79
- ## Puerto Rico › US => Puerto Rico
80
- geos = split_geo( values[0] )
81
- name = geos[0] ## note: ignore all other geos for now
82
-
83
- ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
- code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
- values[1]
86
- else
87
- if values[1]
88
- puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
- else
90
- puts "** !!! ERROR !!! missing code for (canonical) country name"
91
- end
92
- exit 1
93
- end
94
-
95
- tags = if values[2] ## check if tags presents
96
- split_tags( values[2] )
97
- else
98
- []
99
- end
100
-
101
- last_country = country = Country.new( key: key,
102
- name: name,
103
- code: code,
104
- tags: tags )
105
- countries << country
106
- else
107
- puts "** !! ERROR - missing key for (canonical) country name"
108
- exit 1
109
- end
110
- end
111
- end # each line
112
- else
113
- puts "** !! ERROR - unknown node type / (input) source line:"
114
- pp node
115
- exit 1
116
- end
117
- end # each node
118
-
119
- countries
120
- end # method parse
121
-
122
-
123
-
124
- #######################################
125
- ## helpers
126
- def split_tags( str )
127
- tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
- tags = tags.map { |tag| tag.strip }
129
- tags
130
- end
131
-
132
- def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
- ## split into geo tree
134
- geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
- geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
- geos
137
- end
138
-
139
- end # class CountryReader
140
-
141
- end # module Import
142
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class CountryReader
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+ def initialize( txt )
22
+ @txt = txt
23
+ end
24
+
25
+ def parse
26
+ countries = []
27
+ last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
+
29
+ OutlineReader.parse( @txt ).each do |node|
30
+
31
+ node_type = node[0]
32
+
33
+ if [:h1, :h2].include?( node_type )
34
+ ## skip headings (and headings) for now too
35
+ elsif node_type == :p ## paragraph
36
+ lines = node[1]
37
+ lines.each do |line|
38
+ if line.start_with?( '|' )
39
+ ## assume continuation with line of alternative names
40
+ ## note: skip leading pipe
41
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
+ ## strip and squish (white)spaces
43
+ # e.g. East Germany (-1989) => East Germany (-1989)
44
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
+ last_country.alt_names += values
46
+ elsif line =~ /^-[ ]*(\d{4})
47
+ [ ]+
48
+ (.+)$
49
+ /x ## check for historic lines e.g. -1989
50
+ year = $1.to_i
51
+ parts = $2.split( /=>|⇒/ )
52
+ values = parts[0].split( ',' )
53
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
+
55
+ name = values[0]
56
+ code = values[1]
57
+
58
+ last_country = country = Country.new( name: "#{name} (-#{year})",
59
+ code: code )
60
+ ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
+
62
+ countries << country
63
+ ## todo/fix: add reference to country today (in parts[1] !!!!)
64
+ else
65
+ ## assume "regular" line
66
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
+ ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
+ if line =~ /^([a-z]{2,4})
69
+ [ ]+
70
+ (.+)$/x
71
+ key = $1
72
+ values = $2.split( ',' )
73
+ ## strip and squish (white)spaces
74
+ # e.g. East Germany (-1989) => East Germany (-1989)
75
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
+
77
+ ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
+ ## e.g. England › UK => England
79
+ ## Puerto Rico › US => Puerto Rico
80
+ geos = split_geo( values[0] )
81
+ name = geos[0] ## note: ignore all other geos for now
82
+
83
+ ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
+ code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
+ values[1]
86
+ else
87
+ if values[1]
88
+ puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
+ else
90
+ puts "** !!! ERROR !!! missing code for (canonical) country name"
91
+ end
92
+ exit 1
93
+ end
94
+
95
+ tags = if values[2] ## check if tags presents
96
+ split_tags( values[2] )
97
+ else
98
+ []
99
+ end
100
+
101
+ last_country = country = Country.new( key: key,
102
+ name: name,
103
+ code: code,
104
+ tags: tags )
105
+ countries << country
106
+ else
107
+ puts "** !! ERROR - missing key for (canonical) country name"
108
+ exit 1
109
+ end
110
+ end
111
+ end # each line
112
+ else
113
+ puts "** !! ERROR - unknown node type / (input) source line:"
114
+ pp node
115
+ exit 1
116
+ end
117
+ end # each node
118
+
119
+ countries
120
+ end # method parse
121
+
122
+
123
+
124
+ #######################################
125
+ ## helpers
126
+ def split_tags( str )
127
+ tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
+ tags = tags.map { |tag| tag.strip }
129
+ tags
130
+ end
131
+
132
+ def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
+ ## split into geo tree
134
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
+ geos
137
+ end
138
+
139
+ end # class CountryReader
140
+
141
+ end # module Import
142
+ end # module SportDb
@@ -1,59 +1,59 @@
1
- # encoding: utf-8
2
-
3
-
4
- module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
-
6
- def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
- ## note: always assume utf-8 for now!!!
8
- File.open( path, 'r:utf-8') {|f| f.read }
9
- end
10
-
11
-
12
- ########################
13
- ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
- class Bundle
15
- def initialize( path )
16
- @path = path
17
- @buf = String.new('')
18
- end
19
-
20
- def <<(value)
21
- if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
- datafiles = value
23
- datafiles.each do |datafile|
24
- text = Datafile.read( datafile )
25
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
- @buf << text
28
- @buf << "\n\n"
29
- end
30
- else ## assume string (e.g. header, comments, etc.)
31
- text = value
32
- @buf << text
33
- @buf << "\n\n"
34
- end
35
- end
36
- alias_method :write, :<<
37
-
38
- ## todo/fix/check: write only on close? or write on every write and use close for close?
39
- def close
40
- File.open( @path, 'w:utf-8' ) do |f|
41
- f.write @buf
42
- end
43
- end
44
- end # class Bundle
45
-
46
-
47
- def self.write_bundle( path, datafiles:, header: nil )
48
- bundle = Bundle.new( path )
49
- bundle.write( header ) if header
50
- datafiles.each do |datafile|
51
- text = read( datafile )
52
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
- bundle.write( text )
55
- end
56
- bundle.close
57
- end
58
-
59
- end # module Datafile
1
+ # encoding: utf-8
2
+
3
+
4
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
+
6
+ def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
+ ## note: always assume utf-8 for now!!!
8
+ File.open( path, 'r:utf-8') {|f| f.read }
9
+ end
10
+
11
+
12
+ ########################
13
+ ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
+ class Bundle
15
+ def initialize( path )
16
+ @path = path
17
+ @buf = String.new('')
18
+ end
19
+
20
+ def <<(value)
21
+ if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
+ datafiles = value
23
+ datafiles.each do |datafile|
24
+ text = Datafile.read( datafile )
25
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
+ @buf << text
28
+ @buf << "\n\n"
29
+ end
30
+ else ## assume string (e.g. header, comments, etc.)
31
+ text = value
32
+ @buf << text
33
+ @buf << "\n\n"
34
+ end
35
+ end
36
+ alias_method :write, :<<
37
+
38
+ ## todo/fix/check: write only on close? or write on every write and use close for close?
39
+ def close
40
+ File.open( @path, 'w:utf-8' ) do |f|
41
+ f.write @buf
42
+ end
43
+ end
44
+ end # class Bundle
45
+
46
+
47
+ def self.write_bundle( path, datafiles:, header: nil )
48
+ bundle = Bundle.new( path )
49
+ bundle.write( header ) if header
50
+ datafiles.each do |datafile|
51
+ text = read( datafile )
52
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
+ bundle.write( text )
55
+ end
56
+ bundle.close
57
+ end
58
+
59
+ end # module Datafile
@@ -1,141 +1,141 @@
1
- module SportDb
2
- module Import
3
-
4
-
5
-
6
- class EventIndex
7
-
8
- def self.build( path )
9
- pack = Package.new( path ) ## lets us use direcotry or zip archive
10
-
11
- recs = []
12
- pack.each_seasons do |entry|
13
- recs += EventInfoReader.parse( entry.read )
14
- end
15
- recs
16
-
17
- index = new
18
- index.add( recs )
19
- index
20
- end
21
-
22
-
23
-
24
- attr_reader :events
25
- def initialize
26
- @events = []
27
- @leagues = {}
28
- end
29
-
30
- def add( recs )
31
- @events += recs ## add to "linear" records
32
-
33
- recs.each do |rec|
34
- league = rec.league
35
- season = rec.season
36
-
37
- seasons = @leagues[ league.key ] ||= {}
38
- seasons[season.key] = rec
39
- end
40
- ## build search index by leagues (and season)
41
- end
42
-
43
- def find_by( league:, season: )
44
- league_key = league.is_a?( String ) ? league : league.key
45
- season_key = season.is_a?( String ) ? season : season.key
46
-
47
- seasons = @leagues[ league_key ]
48
- if seasons
49
- seasons[ season_key ]
50
- else
51
- nil
52
- end
53
- end # method find_by
54
- end ## class EventIndex
55
-
56
-
57
-
58
- class SeasonIndex
59
- def initialize( *args )
60
- @leagues = {} ## use a league hash by years for now; change later
61
-
62
- if args.size == 1 && args[0].is_a?( EventIndex )
63
- ## convenience setup/hookup
64
- ## (auto-)add all events from event index
65
- add( args[0].events )
66
- else
67
- pp args
68
- raise ArgumentError.new( 'unsupported arguments' )
69
- end
70
- end
71
-
72
- def add( recs )
73
- ## use a lookup index by year for now
74
- ## todo - find something better/more generic for searching/matching date periods!!!
75
- recs.each do |rec|
76
- league = rec.league
77
- season = rec.season
78
-
79
- years = @leagues[ league.key ] ||= {}
80
- if season.year?
81
- years[season.start_year] ||= []
82
- years[season.start_year] << rec
83
- else
84
- years[season.start_year] ||= []
85
- years[season.end_year] ||= []
86
- years[season.start_year] << rec
87
- years[season.end_year] << rec
88
- end
89
- end
90
- end # method add
91
-
92
- def find_by( date:, league: )
93
- date = Date.strptime( date, '%Y-%m-%d' ) if date.is_a?( String )
94
- league_key = league.is_a?( String ) ? league : league.key
95
-
96
- years = @leagues[ league_key ]
97
- if years
98
- year = years[ date.year ]
99
- if year
100
- season_key = nil
101
- year.each do |event|
102
- ## todo/check: rename/use between? instead of include? - why? why not?
103
- if event.include?( date )
104
- season_key = event.season.key
105
- break
106
- end
107
- end
108
- if season_key.nil?
109
- puts "!! WARN: date >#{date}< out-of-seasons for year #{date.year} in league #{league_key}:"
110
- year.each do |event|
111
- puts " #{event.season.key} | #{event.start_date} - #{event.end_date}"
112
- end
113
- ## retry again and pick season with "overflow" at the end (date is great end_date)
114
- year.each do |event|
115
- if date > event.end_date
116
- diff_in_days = date.to_date.jd - event.end_date.to_date.jd
117
- puts " +#{diff_in_days} days - adding overflow to #{event.season.key} ending on #{event.end_date} ++ #{date}"
118
- season_key = event.season.key
119
- break
120
- end
121
- end
122
- ## exit now for sure - if still empty!!!!
123
- if season_key.nil?
124
- puts "!! ERROR: CANNOT auto-fix / (auto-)append date at the end of an event; check season setup - sorry"
125
- exit 1
126
- end
127
- end
128
- season_key
129
- else
130
- nil ## no year defined / found for league
131
- end
132
- else
133
- nil ## no league defined / found
134
- end
135
- end # method find
136
-
137
- end # class SeasonIndex
138
-
139
-
140
- end # module Import
141
- end # module SportDb
1
+ module SportDb
2
+ module Import
3
+
4
+
5
+
6
+ class EventIndex
7
+
8
+ def self.build( path )
9
+ pack = Package.new( path ) ## lets us use direcotry or zip archive
10
+
11
+ recs = []
12
+ pack.each_seasons do |entry|
13
+ recs += EventInfoReader.parse( entry.read )
14
+ end
15
+ recs
16
+
17
+ index = new
18
+ index.add( recs )
19
+ index
20
+ end
21
+
22
+
23
+
24
+ attr_reader :events
25
+ def initialize
26
+ @events = []
27
+ @leagues = {}
28
+ end
29
+
30
+ def add( recs )
31
+ @events += recs ## add to "linear" records
32
+
33
+ recs.each do |rec|
34
+ league = rec.league
35
+ season = rec.season
36
+
37
+ seasons = @leagues[ league.key ] ||= {}
38
+ seasons[season.key] = rec
39
+ end
40
+ ## build search index by leagues (and season)
41
+ end
42
+
43
+ def find_by( league:, season: )
44
+ league_key = league.is_a?( String ) ? league : league.key
45
+ season_key = season.is_a?( String ) ? season : season.key
46
+
47
+ seasons = @leagues[ league_key ]
48
+ if seasons
49
+ seasons[ season_key ]
50
+ else
51
+ nil
52
+ end
53
+ end # method find_by
54
+ end ## class EventIndex
55
+
56
+
57
+
58
+ class SeasonIndex
59
+ def initialize( *args )
60
+ @leagues = {} ## use a league hash by years for now; change later
61
+
62
+ if args.size == 1 && args[0].is_a?( EventIndex )
63
+ ## convenience setup/hookup
64
+ ## (auto-)add all events from event index
65
+ add( args[0].events )
66
+ else
67
+ pp args
68
+ raise ArgumentError.new( 'unsupported arguments' )
69
+ end
70
+ end
71
+
72
+ def add( recs )
73
+ ## use a lookup index by year for now
74
+ ## todo - find something better/more generic for searching/matching date periods!!!
75
+ recs.each do |rec|
76
+ league = rec.league
77
+ season = rec.season
78
+
79
+ years = @leagues[ league.key ] ||= {}
80
+ if season.year?
81
+ years[season.start_year] ||= []
82
+ years[season.start_year] << rec
83
+ else
84
+ years[season.start_year] ||= []
85
+ years[season.end_year] ||= []
86
+ years[season.start_year] << rec
87
+ years[season.end_year] << rec
88
+ end
89
+ end
90
+ end # method add
91
+
92
+ def find_by( date:, league: )
93
+ date = Date.strptime( date, '%Y-%m-%d' ) if date.is_a?( String )
94
+ league_key = league.is_a?( String ) ? league : league.key
95
+
96
+ years = @leagues[ league_key ]
97
+ if years
98
+ year = years[ date.year ]
99
+ if year
100
+ season_key = nil
101
+ year.each do |event|
102
+ ## todo/check: rename/use between? instead of include? - why? why not?
103
+ if event.include?( date )
104
+ season_key = event.season.key
105
+ break
106
+ end
107
+ end
108
+ if season_key.nil?
109
+ puts "!! WARN: date >#{date}< out-of-seasons for year #{date.year} in league #{league_key}:"
110
+ year.each do |event|
111
+ puts " #{event.season.key} | #{event.start_date} - #{event.end_date}"
112
+ end
113
+ ## retry again and pick season with "overflow" at the end (date is great end_date)
114
+ year.each do |event|
115
+ if date > event.end_date
116
+ diff_in_days = date.to_date.jd - event.end_date.to_date.jd
117
+ puts " +#{diff_in_days} days - adding overflow to #{event.season.key} ending on #{event.end_date} ++ #{date}"
118
+ season_key = event.season.key
119
+ break
120
+ end
121
+ end
122
+ ## exit now for sure - if still empty!!!!
123
+ if season_key.nil?
124
+ puts "!! ERROR: CANNOT auto-fix / (auto-)append date at the end of an event; check season setup - sorry"
125
+ exit 1
126
+ end
127
+ end
128
+ season_key
129
+ else
130
+ nil ## no year defined / found for league
131
+ end
132
+ else
133
+ nil ## no league defined / found
134
+ end
135
+ end # method find
136
+
137
+ end # class SeasonIndex
138
+
139
+
140
+ end # module Import
141
+ end # module SportDb