sportdb-formats 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: df02092c7b7825d8f490db2a084ee9ea597c1d03
4
- data.tar.gz: 1a4668e864faae56d438ba834738ca73b3768810
3
+ metadata.gz: 0c8eb774dbb161f38c9cc4daffb32c8c4ea3d80c
4
+ data.tar.gz: 3e5b51d861148bba7c2f04c4f8acd30a8be486e8
5
5
  SHA512:
6
- metadata.gz: ad775203cc25f042c5b3008833512945ea5345bd9d2cf64787d87c92b163248d6785a52d0731ae2f61d06eb378c0bd8ca9cfa2e91c1dc5da710c9ccfe6bc8de4
7
- data.tar.gz: 41fea36796164be0deff4b297b9a7cece1344f9a16ef2fbec184d1f104c353a59ee30e221db4b6c9ccce7ba9009aafb8051593f510f8c81a1fa72d7fb797f5a1
6
+ metadata.gz: 3eee51ac8a01998082b1b00e6397ded3224dac3dad299a001a812f62c83e579246ced7af214b54c3f964211f5faec1cb08e708dd4237b28d7ac1782807de104f
7
+ data.tar.gz: d257a4f169babec6caf2bd34583a8e01fa1cfb76f3f72d0d13c4f38aadc3ffc2082591c73ed976e9ae7e0bbe08383c0f2f278d1358dc2219d4283912e315bff5
@@ -1,142 +1,142 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class CountryReader
9
-
10
-
11
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
- parse( txt )
14
- end
15
-
16
- def self.parse( txt )
17
- new( txt ).parse
18
- end
19
-
20
-
21
- def initialize( txt )
22
- @txt = txt
23
- end
24
-
25
- def parse
26
- countries = []
27
- last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
-
29
- OutlineReader.parse( @txt ).each do |node|
30
-
31
- node_type = node[0]
32
-
33
- if [:h1, :h2].include?( node_type )
34
- ## skip headings (and headings) for now too
35
- elsif node_type == :p ## paragraph
36
- lines = node[1]
37
- lines.each do |line|
38
- if line.start_with?( '|' )
39
- ## assume continuation with line of alternative names
40
- ## note: skip leading pipe
41
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
- ## strip and squish (white)spaces
43
- # e.g. East Germany (-1989) => East Germany (-1989)
44
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
- last_country.alt_names += values
46
- elsif line =~ /^-[ ]*(\d{4})
47
- [ ]+
48
- (.+)$
49
- /x ## check for historic lines e.g. -1989
50
- year = $1.to_i
51
- parts = $2.split( /=>|⇒/ )
52
- values = parts[0].split( ',' )
53
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
-
55
- name = values[0]
56
- code = values[1]
57
-
58
- last_country = country = Country.new( name: "#{name} (-#{year})",
59
- code: code )
60
- ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
-
62
- countries << country
63
- ## todo/fix: add reference to country today (in parts[1] !!!!)
64
- else
65
- ## assume "regular" line
66
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
- ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
- if line =~ /^([a-z]{2,4})
69
- [ ]+
70
- (.+)$/x
71
- key = $1
72
- values = $2.split( ',' )
73
- ## strip and squish (white)spaces
74
- # e.g. East Germany (-1989) => East Germany (-1989)
75
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
-
77
- ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
- ## e.g. England › UK => England
79
- ## Puerto Rico › US => Puerto Rico
80
- geos = split_geo( values[0] )
81
- name = geos[0] ## note: ignore all other geos for now
82
-
83
- ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
- code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
- values[1]
86
- else
87
- if values[1]
88
- puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
- else
90
- puts "** !!! ERROR !!! missing code for (canonical) country name"
91
- end
92
- exit 1
93
- end
94
-
95
- tags = if values[2] ## check if tags presents
96
- split_tags( values[2] )
97
- else
98
- []
99
- end
100
-
101
- last_country = country = Country.new( key: key,
102
- name: name,
103
- code: code,
104
- tags: tags )
105
- countries << country
106
- else
107
- puts "** !! ERROR - missing key for (canonical) country name"
108
- exit 1
109
- end
110
- end
111
- end # each line
112
- else
113
- puts "** !! ERROR - unknown node type / (input) source line:"
114
- pp node
115
- exit 1
116
- end
117
- end # each node
118
-
119
- countries
120
- end # method parse
121
-
122
-
123
-
124
- #######################################
125
- ## helpers
126
- def split_tags( str )
127
- tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
- tags = tags.map { |tag| tag.strip }
129
- tags
130
- end
131
-
132
- def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
- ## split into geo tree
134
- geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
- geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
- geos
137
- end
138
-
139
- end # class CountryReader
140
-
141
- end # module Import
142
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class CountryReader
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+ def initialize( txt )
22
+ @txt = txt
23
+ end
24
+
25
+ def parse
26
+ countries = []
27
+ last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
+
29
+ OutlineReader.parse( @txt ).each do |node|
30
+
31
+ node_type = node[0]
32
+
33
+ if [:h1, :h2].include?( node_type )
34
+ ## skip headings (and headings) for now too
35
+ elsif node_type == :p ## paragraph
36
+ lines = node[1]
37
+ lines.each do |line|
38
+ if line.start_with?( '|' )
39
+ ## assume continuation with line of alternative names
40
+ ## note: skip leading pipe
41
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
+ ## strip and squish (white)spaces
43
+ # e.g. East Germany (-1989) => East Germany (-1989)
44
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
+ last_country.alt_names += values
46
+ elsif line =~ /^-[ ]*(\d{4})
47
+ [ ]+
48
+ (.+)$
49
+ /x ## check for historic lines e.g. -1989
50
+ year = $1.to_i
51
+ parts = $2.split( /=>|⇒/ )
52
+ values = parts[0].split( ',' )
53
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
+
55
+ name = values[0]
56
+ code = values[1]
57
+
58
+ last_country = country = Country.new( name: "#{name} (-#{year})",
59
+ code: code )
60
+ ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
+
62
+ countries << country
63
+ ## todo/fix: add reference to country today (in parts[1] !!!!)
64
+ else
65
+ ## assume "regular" line
66
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
+ ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
+ if line =~ /^([a-z]{2,4})
69
+ [ ]+
70
+ (.+)$/x
71
+ key = $1
72
+ values = $2.split( ',' )
73
+ ## strip and squish (white)spaces
74
+ # e.g. East Germany (-1989) => East Germany (-1989)
75
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
+
77
+ ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
+ ## e.g. England › UK => England
79
+ ## Puerto Rico › US => Puerto Rico
80
+ geos = split_geo( values[0] )
81
+ name = geos[0] ## note: ignore all other geos for now
82
+
83
+ ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
+ code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
+ values[1]
86
+ else
87
+ if values[1]
88
+ puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
+ else
90
+ puts "** !!! ERROR !!! missing code for (canonical) country name"
91
+ end
92
+ exit 1
93
+ end
94
+
95
+ tags = if values[2] ## check if tags presents
96
+ split_tags( values[2] )
97
+ else
98
+ []
99
+ end
100
+
101
+ last_country = country = Country.new( key: key,
102
+ name: name,
103
+ code: code,
104
+ tags: tags )
105
+ countries << country
106
+ else
107
+ puts "** !! ERROR - missing key for (canonical) country name"
108
+ exit 1
109
+ end
110
+ end
111
+ end # each line
112
+ else
113
+ puts "** !! ERROR - unknown node type / (input) source line:"
114
+ pp node
115
+ exit 1
116
+ end
117
+ end # each node
118
+
119
+ countries
120
+ end # method parse
121
+
122
+
123
+
124
+ #######################################
125
+ ## helpers
126
+ def split_tags( str )
127
+ tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
+ tags = tags.map { |tag| tag.strip }
129
+ tags
130
+ end
131
+
132
+ def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
+ ## split into geo tree
134
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
+ geos
137
+ end
138
+
139
+ end # class CountryReader
140
+
141
+ end # module Import
142
+ end # module SportDb
@@ -1,59 +1,59 @@
1
- # encoding: utf-8
2
-
3
-
4
- module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
-
6
- def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
- ## note: always assume utf-8 for now!!!
8
- File.open( path, 'r:utf-8') {|f| f.read }
9
- end
10
-
11
-
12
- ########################
13
- ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
- class Bundle
15
- def initialize( path )
16
- @path = path
17
- @buf = String.new('')
18
- end
19
-
20
- def <<(value)
21
- if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
- datafiles = value
23
- datafiles.each do |datafile|
24
- text = Datafile.read( datafile )
25
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
- @buf << text
28
- @buf << "\n\n"
29
- end
30
- else ## assume string (e.g. header, comments, etc.)
31
- text = value
32
- @buf << text
33
- @buf << "\n\n"
34
- end
35
- end
36
- alias_method :write, :<<
37
-
38
- ## todo/fix/check: write only on close? or write on every write and use close for close?
39
- def close
40
- File.open( @path, 'w:utf-8' ) do |f|
41
- f.write @buf
42
- end
43
- end
44
- end # class Bundle
45
-
46
-
47
- def self.write_bundle( path, datafiles:, header: nil )
48
- bundle = Bundle.new( path )
49
- bundle.write( header ) if header
50
- datafiles.each do |datafile|
51
- text = read( datafile )
52
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
- bundle.write( text )
55
- end
56
- bundle.close
57
- end
58
-
59
- end # module Datafile
1
+ # encoding: utf-8
2
+
3
+
4
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
+
6
+ def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
+ ## note: always assume utf-8 for now!!!
8
+ File.open( path, 'r:utf-8') {|f| f.read }
9
+ end
10
+
11
+
12
+ ########################
13
+ ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
+ class Bundle
15
+ def initialize( path )
16
+ @path = path
17
+ @buf = String.new('')
18
+ end
19
+
20
+ def <<(value)
21
+ if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
+ datafiles = value
23
+ datafiles.each do |datafile|
24
+ text = Datafile.read( datafile )
25
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
+ @buf << text
28
+ @buf << "\n\n"
29
+ end
30
+ else ## assume string (e.g. header, comments, etc.)
31
+ text = value
32
+ @buf << text
33
+ @buf << "\n\n"
34
+ end
35
+ end
36
+ alias_method :write, :<<
37
+
38
+ ## todo/fix/check: write only on close? or write on every write and use close for close?
39
+ def close
40
+ File.open( @path, 'w:utf-8' ) do |f|
41
+ f.write @buf
42
+ end
43
+ end
44
+ end # class Bundle
45
+
46
+
47
+ def self.write_bundle( path, datafiles:, header: nil )
48
+ bundle = Bundle.new( path )
49
+ bundle.write( header ) if header
50
+ datafiles.each do |datafile|
51
+ text = read( datafile )
52
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
+ bundle.write( text )
55
+ end
56
+ bundle.close
57
+ end
58
+
59
+ end # module Datafile
@@ -1,141 +1,141 @@
1
- module SportDb
2
- module Import
3
-
4
-
5
-
6
- class EventIndex
7
-
8
- def self.build( path )
9
- pack = Package.new( path ) ## lets us use direcotry or zip archive
10
-
11
- recs = []
12
- pack.each_seasons do |entry|
13
- recs += EventInfoReader.parse( entry.read )
14
- end
15
- recs
16
-
17
- index = new
18
- index.add( recs )
19
- index
20
- end
21
-
22
-
23
-
24
- attr_reader :events
25
- def initialize
26
- @events = []
27
- @leagues = {}
28
- end
29
-
30
- def add( recs )
31
- @events += recs ## add to "linear" records
32
-
33
- recs.each do |rec|
34
- league = rec.league
35
- season = rec.season
36
-
37
- seasons = @leagues[ league.key ] ||= {}
38
- seasons[season.key] = rec
39
- end
40
- ## build search index by leagues (and season)
41
- end
42
-
43
- def find_by( league:, season: )
44
- league_key = league.is_a?( String ) ? league : league.key
45
- season_key = season.is_a?( String ) ? season : season.key
46
-
47
- seasons = @leagues[ league_key ]
48
- if seasons
49
- seasons[ season_key ]
50
- else
51
- nil
52
- end
53
- end # method find_by
54
- end ## class EventIndex
55
-
56
-
57
-
58
- class SeasonIndex
59
- def initialize( *args )
60
- @leagues = {} ## use a league hash by years for now; change later
61
-
62
- if args.size == 1 && args[0].is_a?( EventIndex )
63
- ## convenience setup/hookup
64
- ## (auto-)add all events from event index
65
- add( args[0].events )
66
- else
67
- pp args
68
- raise ArgumentError.new( 'unsupported arguments' )
69
- end
70
- end
71
-
72
- def add( recs )
73
- ## use a lookup index by year for now
74
- ## todo - find something better/more generic for searching/matching date periods!!!
75
- recs.each do |rec|
76
- league = rec.league
77
- season = rec.season
78
-
79
- years = @leagues[ league.key ] ||= {}
80
- if season.year?
81
- years[season.start_year] ||= []
82
- years[season.start_year] << rec
83
- else
84
- years[season.start_year] ||= []
85
- years[season.end_year] ||= []
86
- years[season.start_year] << rec
87
- years[season.end_year] << rec
88
- end
89
- end
90
- end # method add
91
-
92
- def find_by( date:, league: )
93
- date = Date.strptime( date, '%Y-%m-%d' ) if date.is_a?( String )
94
- league_key = league.is_a?( String ) ? league : league.key
95
-
96
- years = @leagues[ league_key ]
97
- if years
98
- year = years[ date.year ]
99
- if year
100
- season_key = nil
101
- year.each do |event|
102
- ## todo/check: rename/use between? instead of include? - why? why not?
103
- if event.include?( date )
104
- season_key = event.season.key
105
- break
106
- end
107
- end
108
- if season_key.nil?
109
- puts "!! WARN: date >#{date}< out-of-seasons for year #{date.year} in league #{league_key}:"
110
- year.each do |event|
111
- puts " #{event.season.key} | #{event.start_date} - #{event.end_date}"
112
- end
113
- ## retry again and pick season with "overflow" at the end (date is great end_date)
114
- year.each do |event|
115
- if date > event.end_date
116
- diff_in_days = date.to_date.jd - event.end_date.to_date.jd
117
- puts " +#{diff_in_days} days - adding overflow to #{event.season.key} ending on #{event.end_date} ++ #{date}"
118
- season_key = event.season.key
119
- break
120
- end
121
- end
122
- ## exit now for sure - if still empty!!!!
123
- if season_key.nil?
124
- puts "!! ERROR: CANNOT auto-fix / (auto-)append date at the end of an event; check season setup - sorry"
125
- exit 1
126
- end
127
- end
128
- season_key
129
- else
130
- nil ## no year defined / found for league
131
- end
132
- else
133
- nil ## no league defined / found
134
- end
135
- end # method find
136
-
137
- end # class SeasonIndex
138
-
139
-
140
- end # module Import
141
- end # module SportDb
1
+ module SportDb
2
+ module Import
3
+
4
+
5
+
6
+ class EventIndex
7
+
8
+ def self.build( path )
9
+ pack = Package.new( path ) ## lets us use direcotry or zip archive
10
+
11
+ recs = []
12
+ pack.each_seasons do |entry|
13
+ recs += EventInfoReader.parse( entry.read )
14
+ end
15
+ recs
16
+
17
+ index = new
18
+ index.add( recs )
19
+ index
20
+ end
21
+
22
+
23
+
24
+ attr_reader :events
25
+ def initialize
26
+ @events = []
27
+ @leagues = {}
28
+ end
29
+
30
+ def add( recs )
31
+ @events += recs ## add to "linear" records
32
+
33
+ recs.each do |rec|
34
+ league = rec.league
35
+ season = rec.season
36
+
37
+ seasons = @leagues[ league.key ] ||= {}
38
+ seasons[season.key] = rec
39
+ end
40
+ ## build search index by leagues (and season)
41
+ end
42
+
43
+ def find_by( league:, season: )
44
+ league_key = league.is_a?( String ) ? league : league.key
45
+ season_key = season.is_a?( String ) ? season : season.key
46
+
47
+ seasons = @leagues[ league_key ]
48
+ if seasons
49
+ seasons[ season_key ]
50
+ else
51
+ nil
52
+ end
53
+ end # method find_by
54
+ end ## class EventIndex
55
+
56
+
57
+
58
+ class SeasonIndex
59
+ def initialize( *args )
60
+ @leagues = {} ## use a league hash by years for now; change later
61
+
62
+ if args.size == 1 && args[0].is_a?( EventIndex )
63
+ ## convenience setup/hookup
64
+ ## (auto-)add all events from event index
65
+ add( args[0].events )
66
+ else
67
+ pp args
68
+ raise ArgumentError.new( 'unsupported arguments' )
69
+ end
70
+ end
71
+
72
+ def add( recs )
73
+ ## use a lookup index by year for now
74
+ ## todo - find something better/more generic for searching/matching date periods!!!
75
+ recs.each do |rec|
76
+ league = rec.league
77
+ season = rec.season
78
+
79
+ years = @leagues[ league.key ] ||= {}
80
+ if season.year?
81
+ years[season.start_year] ||= []
82
+ years[season.start_year] << rec
83
+ else
84
+ years[season.start_year] ||= []
85
+ years[season.end_year] ||= []
86
+ years[season.start_year] << rec
87
+ years[season.end_year] << rec
88
+ end
89
+ end
90
+ end # method add
91
+
92
+ def find_by( date:, league: )
93
+ date = Date.strptime( date, '%Y-%m-%d' ) if date.is_a?( String )
94
+ league_key = league.is_a?( String ) ? league : league.key
95
+
96
+ years = @leagues[ league_key ]
97
+ if years
98
+ year = years[ date.year ]
99
+ if year
100
+ season_key = nil
101
+ year.each do |event|
102
+ ## todo/check: rename/use between? instead of include? - why? why not?
103
+ if event.include?( date )
104
+ season_key = event.season.key
105
+ break
106
+ end
107
+ end
108
+ if season_key.nil?
109
+ puts "!! WARN: date >#{date}< out-of-seasons for year #{date.year} in league #{league_key}:"
110
+ year.each do |event|
111
+ puts " #{event.season.key} | #{event.start_date} - #{event.end_date}"
112
+ end
113
+ ## retry again and pick season with "overflow" at the end (date is great end_date)
114
+ year.each do |event|
115
+ if date > event.end_date
116
+ diff_in_days = date.to_date.jd - event.end_date.to_date.jd
117
+ puts " +#{diff_in_days} days - adding overflow to #{event.season.key} ending on #{event.end_date} ++ #{date}"
118
+ season_key = event.season.key
119
+ break
120
+ end
121
+ end
122
+ ## exit now for sure - if still empty!!!!
123
+ if season_key.nil?
124
+ puts "!! ERROR: CANNOT auto-fix / (auto-)append date at the end of an event; check season setup - sorry"
125
+ exit 1
126
+ end
127
+ end
128
+ season_key
129
+ else
130
+ nil ## no year defined / found for league
131
+ end
132
+ else
133
+ nil ## no league defined / found
134
+ end
135
+ end # method find
136
+
137
+ end # class SeasonIndex
138
+
139
+
140
+ end # module Import
141
+ end # module SportDb