sportdb-formats 1.1.6 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +53 -9
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +130 -13
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 0c8eb774dbb161f38c9cc4daffb32c8c4ea3d80c
4
- data.tar.gz: 3e5b51d861148bba7c2f04c4f8acd30a8be486e8
2
+ SHA256:
3
+ metadata.gz: a9f1a3d4643a73600d020fd3e34356df44f869eda7a0e1e06479819142f22a57
4
+ data.tar.gz: 1ae30d054bb50a52785bc13c6f3d568129d28cc5c0f10b88bae964ea88263c94
5
5
  SHA512:
6
- metadata.gz: 3eee51ac8a01998082b1b00e6397ded3224dac3dad299a001a812f62c83e579246ced7af214b54c3f964211f5faec1cb08e708dd4237b28d7ac1782807de104f
7
- data.tar.gz: d257a4f169babec6caf2bd34583a8e01fa1cfb76f3f72d0d13c4f38aadc3ffc2082591c73ed976e9ae7e0bbe08383c0f2f278d1358dc2219d4283912e315bff5
6
+ metadata.gz: 752dad1c1f27f0ced1b410232d1b187551eda65e7e1a3be55000b3fa00b669bcfc943da7df1c858eafbabc83b05e9914c86d95e8fc14dc6f4fd7b72bcb1b73da
7
+ data.tar.gz: b92e4b47d4570f54ebd04e3153b05038df6bc5e1f622a45e87d034af38aa5fb31ab7c6d81f4d391a1c99370e5948f13e3f4dff6a00e069d73429e490ec61de91
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 1.2.1
2
+
1
3
  ### 0.0.1 / 2019-10-28
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -3,16 +3,15 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/sportdb/formats.rb
6
- lib/sportdb/formats/country/country_index.rb
7
6
  lib/sportdb/formats/country/country_reader.rb
8
7
  lib/sportdb/formats/datafile.rb
9
8
  lib/sportdb/formats/datafile_package.rb
10
- lib/sportdb/formats/event/event_index.rb
11
9
  lib/sportdb/formats/event/event_reader.rb
12
10
  lib/sportdb/formats/goals.rb
13
- lib/sportdb/formats/league/league_index.rb
11
+ lib/sportdb/formats/ground/ground_reader.rb
14
12
  lib/sportdb/formats/league/league_outline_reader.rb
15
13
  lib/sportdb/formats/league/league_reader.rb
14
+ lib/sportdb/formats/lines_reader.rb
16
15
  lib/sportdb/formats/match/conf_parser.rb
17
16
  lib/sportdb/formats/match/mapper.rb
18
17
  lib/sportdb/formats/match/mapper_teams.rb
@@ -21,31 +20,11 @@ lib/sportdb/formats/match/match_parser_auto_conf.rb
21
20
  lib/sportdb/formats/outline_reader.rb
22
21
  lib/sportdb/formats/package.rb
23
22
  lib/sportdb/formats/parser_helper.rb
24
- lib/sportdb/formats/team/club_index.rb
23
+ lib/sportdb/formats/search/sport.rb
24
+ lib/sportdb/formats/search/world.rb
25
25
  lib/sportdb/formats/team/club_index_history.rb
26
26
  lib/sportdb/formats/team/club_reader.rb
27
27
  lib/sportdb/formats/team/club_reader_history.rb
28
28
  lib/sportdb/formats/team/club_reader_props.rb
29
- lib/sportdb/formats/team/national_team_index.rb
30
- lib/sportdb/formats/team/team_index.rb
31
29
  lib/sportdb/formats/team/wiki_reader.rb
32
30
  lib/sportdb/formats/version.rb
33
- test/helper.rb
34
- test/test_club_index.rb
35
- test/test_club_index_history.rb
36
- test/test_club_reader.rb
37
- test/test_club_reader_history.rb
38
- test/test_club_reader_props.rb
39
- test/test_country_index.rb
40
- test/test_country_reader.rb
41
- test/test_datafile.rb
42
- test/test_datafile_package.rb
43
- test/test_goals.rb
44
- test/test_league_index.rb
45
- test/test_league_outline_reader.rb
46
- test/test_league_reader.rb
47
- test/test_outline_reader.rb
48
- test/test_package.rb
49
- test/test_package_match.rb
50
- test/test_regex.rb
51
- test/test_wiki_reader.rb
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'sportdb-formats' do
11
11
  self.urls = { home: 'https://github.com/sportdb/sport.db' }
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'opensport@googlegroups.com'
14
+ self.email = 'gerald.bauer@gmail.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
@@ -1,142 +1,142 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class CountryReader
9
-
10
-
11
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
- parse( txt )
14
- end
15
-
16
- def self.parse( txt )
17
- new( txt ).parse
18
- end
19
-
20
-
21
- def initialize( txt )
22
- @txt = txt
23
- end
24
-
25
- def parse
26
- countries = []
27
- last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
-
29
- OutlineReader.parse( @txt ).each do |node|
30
-
31
- node_type = node[0]
32
-
33
- if [:h1, :h2].include?( node_type )
34
- ## skip headings (and headings) for now too
35
- elsif node_type == :p ## paragraph
36
- lines = node[1]
37
- lines.each do |line|
38
- if line.start_with?( '|' )
39
- ## assume continuation with line of alternative names
40
- ## note: skip leading pipe
41
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
- ## strip and squish (white)spaces
43
- # e.g. East Germany (-1989) => East Germany (-1989)
44
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
- last_country.alt_names += values
46
- elsif line =~ /^-[ ]*(\d{4})
47
- [ ]+
48
- (.+)$
49
- /x ## check for historic lines e.g. -1989
50
- year = $1.to_i
51
- parts = $2.split( /=>|⇒/ )
52
- values = parts[0].split( ',' )
53
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
-
55
- name = values[0]
56
- code = values[1]
57
-
58
- last_country = country = Country.new( name: "#{name} (-#{year})",
59
- code: code )
60
- ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
-
62
- countries << country
63
- ## todo/fix: add reference to country today (in parts[1] !!!!)
64
- else
65
- ## assume "regular" line
66
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
- ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
- if line =~ /^([a-z]{2,4})
69
- [ ]+
70
- (.+)$/x
71
- key = $1
72
- values = $2.split( ',' )
73
- ## strip and squish (white)spaces
74
- # e.g. East Germany (-1989) => East Germany (-1989)
75
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
-
77
- ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
- ## e.g. England › UK => England
79
- ## Puerto Rico › US => Puerto Rico
80
- geos = split_geo( values[0] )
81
- name = geos[0] ## note: ignore all other geos for now
82
-
83
- ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
- code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
- values[1]
86
- else
87
- if values[1]
88
- puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
- else
90
- puts "** !!! ERROR !!! missing code for (canonical) country name"
91
- end
92
- exit 1
93
- end
94
-
95
- tags = if values[2] ## check if tags presents
96
- split_tags( values[2] )
97
- else
98
- []
99
- end
100
-
101
- last_country = country = Country.new( key: key,
102
- name: name,
103
- code: code,
104
- tags: tags )
105
- countries << country
106
- else
107
- puts "** !! ERROR - missing key for (canonical) country name"
108
- exit 1
109
- end
110
- end
111
- end # each line
112
- else
113
- puts "** !! ERROR - unknown node type / (input) source line:"
114
- pp node
115
- exit 1
116
- end
117
- end # each node
118
-
119
- countries
120
- end # method parse
121
-
122
-
123
-
124
- #######################################
125
- ## helpers
126
- def split_tags( str )
127
- tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
- tags = tags.map { |tag| tag.strip }
129
- tags
130
- end
131
-
132
- def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
- ## split into geo tree
134
- geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
- geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
- geos
137
- end
138
-
139
- end # class CountryReader
140
-
141
- end # module Import
142
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class CountryReader
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+ def initialize( txt )
22
+ @txt = txt
23
+ end
24
+
25
+ def parse
26
+ countries = []
27
+ last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
+
29
+ OutlineReader.parse( @txt ).each do |node|
30
+
31
+ node_type = node[0]
32
+
33
+ if [:h1, :h2].include?( node_type )
34
+ ## skip headings (and headings) for now too
35
+ elsif node_type == :p ## paragraph
36
+ lines = node[1]
37
+ lines.each do |line|
38
+ if line.start_with?( '|' )
39
+ ## assume continuation with line of alternative names
40
+ ## note: skip leading pipe
41
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
+ ## strip and squish (white)spaces
43
+ # e.g. East Germany (-1989) => East Germany (-1989)
44
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
+ last_country.alt_names += values
46
+ elsif line =~ /^-[ ]*(\d{4})
47
+ [ ]+
48
+ (.+)$
49
+ /x ## check for historic lines e.g. -1989
50
+ year = $1.to_i
51
+ parts = $2.split( /=>|⇒/ )
52
+ values = parts[0].split( ',' )
53
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
+
55
+ name = values[0]
56
+ code = values[1]
57
+
58
+ last_country = country = Country.new( name: "#{name} (-#{year})",
59
+ code: code )
60
+ ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
+
62
+ countries << country
63
+ ## todo/fix: add reference to country today (in parts[1] !!!!)
64
+ else
65
+ ## assume "regular" line
66
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
+ ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
+ if line =~ /^([a-z]{2,4})
69
+ [ ]+
70
+ (.+)$/x
71
+ key = $1
72
+ values = $2.split( ',' )
73
+ ## strip and squish (white)spaces
74
+ # e.g. East Germany (-1989) => East Germany (-1989)
75
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
+
77
+ ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
+ ## e.g. England › UK => England
79
+ ## Puerto Rico › US => Puerto Rico
80
+ geos = split_geo( values[0] )
81
+ name = geos[0] ## note: ignore all other geos for now
82
+
83
+ ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
+ code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
+ values[1]
86
+ else
87
+ if values[1]
88
+ puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
+ else
90
+ puts "** !!! ERROR !!! missing code for (canonical) country name"
91
+ end
92
+ exit 1
93
+ end
94
+
95
+ tags = if values[2] ## check if tags presents
96
+ split_tags( values[2] )
97
+ else
98
+ []
99
+ end
100
+
101
+ last_country = country = Country.new( key: key,
102
+ name: name,
103
+ code: code,
104
+ tags: tags )
105
+ countries << country
106
+ else
107
+ puts "** !! ERROR - missing key for (canonical) country name"
108
+ exit 1
109
+ end
110
+ end
111
+ end # each line
112
+ else
113
+ puts "** !! ERROR - unknown node type / (input) source line:"
114
+ pp node
115
+ exit 1
116
+ end
117
+ end # each node
118
+
119
+ countries
120
+ end # method parse
121
+
122
+
123
+
124
+ #######################################
125
+ ## helpers
126
+ def split_tags( str )
127
+ tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
+ tags = tags.map { |tag| tag.strip }
129
+ tags
130
+ end
131
+
132
+ def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
+ ## split into geo tree
134
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
+ geos
137
+ end
138
+
139
+ end # class CountryReader
140
+
141
+ end # module Import
142
+ end # module SportDb
@@ -1,59 +1,59 @@
1
- # encoding: utf-8
2
-
3
-
4
- module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
-
6
- def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
- ## note: always assume utf-8 for now!!!
8
- File.open( path, 'r:utf-8') {|f| f.read }
9
- end
10
-
11
-
12
- ########################
13
- ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
- class Bundle
15
- def initialize( path )
16
- @path = path
17
- @buf = String.new('')
18
- end
19
-
20
- def <<(value)
21
- if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
- datafiles = value
23
- datafiles.each do |datafile|
24
- text = Datafile.read( datafile )
25
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
- @buf << text
28
- @buf << "\n\n"
29
- end
30
- else ## assume string (e.g. header, comments, etc.)
31
- text = value
32
- @buf << text
33
- @buf << "\n\n"
34
- end
35
- end
36
- alias_method :write, :<<
37
-
38
- ## todo/fix/check: write only on close? or write on every write and use close for close?
39
- def close
40
- File.open( @path, 'w:utf-8' ) do |f|
41
- f.write @buf
42
- end
43
- end
44
- end # class Bundle
45
-
46
-
47
- def self.write_bundle( path, datafiles:, header: nil )
48
- bundle = Bundle.new( path )
49
- bundle.write( header ) if header
50
- datafiles.each do |datafile|
51
- text = read( datafile )
52
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
- bundle.write( text )
55
- end
56
- bundle.close
57
- end
58
-
59
- end # module Datafile
1
+ # encoding: utf-8
2
+
3
+
4
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
+
6
+ def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
+ ## note: always assume utf-8 for now!!!
8
+ File.open( path, 'r:utf-8') {|f| f.read }
9
+ end
10
+
11
+
12
+ ########################
13
+ ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
+ class Bundle
15
+ def initialize( path )
16
+ @path = path
17
+ @buf = String.new('')
18
+ end
19
+
20
+ def <<(value)
21
+ if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
+ datafiles = value
23
+ datafiles.each do |datafile|
24
+ text = Datafile.read( datafile )
25
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
+ @buf << text
28
+ @buf << "\n\n"
29
+ end
30
+ else ## assume string (e.g. header, comments, etc.)
31
+ text = value
32
+ @buf << text
33
+ @buf << "\n\n"
34
+ end
35
+ end
36
+ alias_method :write, :<<
37
+
38
+ ## todo/fix/check: write only on close? or write on every write and use close for close?
39
+ def close
40
+ File.open( @path, 'w:utf-8' ) do |f|
41
+ f.write @buf
42
+ end
43
+ end
44
+ end # class Bundle
45
+
46
+
47
+ def self.write_bundle( path, datafiles:, header: nil )
48
+ bundle = Bundle.new( path )
49
+ bundle.write( header ) if header
50
+ datafiles.each do |datafile|
51
+ text = read( datafile )
52
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
+ bundle.write( text )
55
+ end
56
+ bundle.close
57
+ end
58
+
59
+ end # module Datafile