sportdb-formats 1.1.6 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +37 -1
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +102 -12
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 0c8eb774dbb161f38c9cc4daffb32c8c4ea3d80c
4
- data.tar.gz: 3e5b51d861148bba7c2f04c4f8acd30a8be486e8
2
+ SHA256:
3
+ metadata.gz: 51b22ea9a05b89006e8048871b2616d5a4fadd4fcb435dc1fd33c1194a6182de
4
+ data.tar.gz: '069b114b3679b2f0bf1fda38c4b97b52a949d7e3e0ab1e9cad41e0f08b540c28'
5
5
  SHA512:
6
- metadata.gz: 3eee51ac8a01998082b1b00e6397ded3224dac3dad299a001a812f62c83e579246ced7af214b54c3f964211f5faec1cb08e708dd4237b28d7ac1782807de104f
7
- data.tar.gz: d257a4f169babec6caf2bd34583a8e01fa1cfb76f3f72d0d13c4f38aadc3ffc2082591c73ed976e9ae7e0bbe08383c0f2f278d1358dc2219d4283912e315bff5
6
+ metadata.gz: 29f00ceff229bc2e8445ee6ef6c2c0a7ecd27b00c899535130f10f480ec0526b18d5e3df30e78f1577f35f6e65fe6aafff2f99ef587eb869445b9b9ab5f20c2c
7
+ data.tar.gz: c97c1568d3b063ba2e25afa21c07479ac4f098df7c0bf2e5a3701a5ef2eb9d8c2c56415573191bae0cb7fbbc07e8ee229eca30fdeda9b1a0713615cdf4c46b5a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 1.2.0
2
+
1
3
  ### 0.0.1 / 2019-10-28
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -3,16 +3,15 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/sportdb/formats.rb
6
- lib/sportdb/formats/country/country_index.rb
7
6
  lib/sportdb/formats/country/country_reader.rb
8
7
  lib/sportdb/formats/datafile.rb
9
8
  lib/sportdb/formats/datafile_package.rb
10
- lib/sportdb/formats/event/event_index.rb
11
9
  lib/sportdb/formats/event/event_reader.rb
12
10
  lib/sportdb/formats/goals.rb
13
- lib/sportdb/formats/league/league_index.rb
11
+ lib/sportdb/formats/ground/ground_reader.rb
14
12
  lib/sportdb/formats/league/league_outline_reader.rb
15
13
  lib/sportdb/formats/league/league_reader.rb
14
+ lib/sportdb/formats/lines_reader.rb
16
15
  lib/sportdb/formats/match/conf_parser.rb
17
16
  lib/sportdb/formats/match/mapper.rb
18
17
  lib/sportdb/formats/match/mapper_teams.rb
@@ -21,31 +20,11 @@ lib/sportdb/formats/match/match_parser_auto_conf.rb
21
20
  lib/sportdb/formats/outline_reader.rb
22
21
  lib/sportdb/formats/package.rb
23
22
  lib/sportdb/formats/parser_helper.rb
24
- lib/sportdb/formats/team/club_index.rb
23
+ lib/sportdb/formats/search/sport.rb
24
+ lib/sportdb/formats/search/world.rb
25
25
  lib/sportdb/formats/team/club_index_history.rb
26
26
  lib/sportdb/formats/team/club_reader.rb
27
27
  lib/sportdb/formats/team/club_reader_history.rb
28
28
  lib/sportdb/formats/team/club_reader_props.rb
29
- lib/sportdb/formats/team/national_team_index.rb
30
- lib/sportdb/formats/team/team_index.rb
31
29
  lib/sportdb/formats/team/wiki_reader.rb
32
30
  lib/sportdb/formats/version.rb
33
- test/helper.rb
34
- test/test_club_index.rb
35
- test/test_club_index_history.rb
36
- test/test_club_reader.rb
37
- test/test_club_reader_history.rb
38
- test/test_club_reader_props.rb
39
- test/test_country_index.rb
40
- test/test_country_reader.rb
41
- test/test_datafile.rb
42
- test/test_datafile_package.rb
43
- test/test_goals.rb
44
- test/test_league_index.rb
45
- test/test_league_outline_reader.rb
46
- test/test_league_reader.rb
47
- test/test_outline_reader.rb
48
- test/test_package.rb
49
- test/test_package_match.rb
50
- test/test_regex.rb
51
- test/test_wiki_reader.rb
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'sportdb-formats' do
11
11
  self.urls = { home: 'https://github.com/sportdb/sport.db' }
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'opensport@googlegroups.com'
14
+ self.email = 'gerald.bauer@gmail.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
@@ -1,142 +1,142 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class CountryReader
9
-
10
-
11
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
- parse( txt )
14
- end
15
-
16
- def self.parse( txt )
17
- new( txt ).parse
18
- end
19
-
20
-
21
- def initialize( txt )
22
- @txt = txt
23
- end
24
-
25
- def parse
26
- countries = []
27
- last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
-
29
- OutlineReader.parse( @txt ).each do |node|
30
-
31
- node_type = node[0]
32
-
33
- if [:h1, :h2].include?( node_type )
34
- ## skip headings (and headings) for now too
35
- elsif node_type == :p ## paragraph
36
- lines = node[1]
37
- lines.each do |line|
38
- if line.start_with?( '|' )
39
- ## assume continuation with line of alternative names
40
- ## note: skip leading pipe
41
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
- ## strip and squish (white)spaces
43
- # e.g. East Germany (-1989) => East Germany (-1989)
44
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
- last_country.alt_names += values
46
- elsif line =~ /^-[ ]*(\d{4})
47
- [ ]+
48
- (.+)$
49
- /x ## check for historic lines e.g. -1989
50
- year = $1.to_i
51
- parts = $2.split( /=>|⇒/ )
52
- values = parts[0].split( ',' )
53
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
-
55
- name = values[0]
56
- code = values[1]
57
-
58
- last_country = country = Country.new( name: "#{name} (-#{year})",
59
- code: code )
60
- ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
-
62
- countries << country
63
- ## todo/fix: add reference to country today (in parts[1] !!!!)
64
- else
65
- ## assume "regular" line
66
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
- ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
- if line =~ /^([a-z]{2,4})
69
- [ ]+
70
- (.+)$/x
71
- key = $1
72
- values = $2.split( ',' )
73
- ## strip and squish (white)spaces
74
- # e.g. East Germany (-1989) => East Germany (-1989)
75
- values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
-
77
- ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
- ## e.g. England › UK => England
79
- ## Puerto Rico › US => Puerto Rico
80
- geos = split_geo( values[0] )
81
- name = geos[0] ## note: ignore all other geos for now
82
-
83
- ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
- code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
- values[1]
86
- else
87
- if values[1]
88
- puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
- else
90
- puts "** !!! ERROR !!! missing code for (canonical) country name"
91
- end
92
- exit 1
93
- end
94
-
95
- tags = if values[2] ## check if tags presents
96
- split_tags( values[2] )
97
- else
98
- []
99
- end
100
-
101
- last_country = country = Country.new( key: key,
102
- name: name,
103
- code: code,
104
- tags: tags )
105
- countries << country
106
- else
107
- puts "** !! ERROR - missing key for (canonical) country name"
108
- exit 1
109
- end
110
- end
111
- end # each line
112
- else
113
- puts "** !! ERROR - unknown node type / (input) source line:"
114
- pp node
115
- exit 1
116
- end
117
- end # each node
118
-
119
- countries
120
- end # method parse
121
-
122
-
123
-
124
- #######################################
125
- ## helpers
126
- def split_tags( str )
127
- tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
- tags = tags.map { |tag| tag.strip }
129
- tags
130
- end
131
-
132
- def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
- ## split into geo tree
134
- geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
- geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
- geos
137
- end
138
-
139
- end # class CountryReader
140
-
141
- end # module Import
142
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class CountryReader
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+ def initialize( txt )
22
+ @txt = txt
23
+ end
24
+
25
+ def parse
26
+ countries = []
27
+ last_country = nil ## note/check/fix: use countries[-1] - why? why not?
28
+
29
+ OutlineReader.parse( @txt ).each do |node|
30
+
31
+ node_type = node[0]
32
+
33
+ if [:h1, :h2].include?( node_type )
34
+ ## skip headings (and headings) for now too
35
+ elsif node_type == :p ## paragraph
36
+ lines = node[1]
37
+ lines.each do |line|
38
+ if line.start_with?( '|' )
39
+ ## assume continuation with line of alternative names
40
+ ## note: skip leading pipe
41
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
42
+ ## strip and squish (white)spaces
43
+ # e.g. East Germany (-1989) => East Germany (-1989)
44
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
45
+ last_country.alt_names += values
46
+ elsif line =~ /^-[ ]*(\d{4})
47
+ [ ]+
48
+ (.+)$
49
+ /x ## check for historic lines e.g. -1989
50
+ year = $1.to_i
51
+ parts = $2.split( /=>|⇒/ )
52
+ values = parts[0].split( ',' )
53
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
54
+
55
+ name = values[0]
56
+ code = values[1]
57
+
58
+ last_country = country = Country.new( name: "#{name} (-#{year})",
59
+ code: code )
60
+ ## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
61
+
62
+ countries << country
63
+ ## todo/fix: add reference to country today (in parts[1] !!!!)
64
+ else
65
+ ## assume "regular" line
66
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
67
+ ## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
68
+ if line =~ /^([a-z]{2,4})
69
+ [ ]+
70
+ (.+)$/x
71
+ key = $1
72
+ values = $2.split( ',' )
73
+ ## strip and squish (white)spaces
74
+ # e.g. East Germany (-1989) => East Germany (-1989)
75
+ values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
76
+
77
+ ## note: remove "overlords" from geo-tree marked territories e.g. UK, US, etc. from name
78
+ ## e.g. England › UK => England
79
+ ## Puerto Rico › US => Puerto Rico
80
+ geos = split_geo( values[0] )
81
+ name = geos[0] ## note: ignore all other geos for now
82
+
83
+ ## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
84
+ code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
85
+ values[1]
86
+ else
87
+ if values[1]
88
+ puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
89
+ else
90
+ puts "** !!! ERROR !!! missing code for (canonical) country name"
91
+ end
92
+ exit 1
93
+ end
94
+
95
+ tags = if values[2] ## check if tags presents
96
+ split_tags( values[2] )
97
+ else
98
+ []
99
+ end
100
+
101
+ last_country = country = Country.new( key: key,
102
+ name: name,
103
+ code: code,
104
+ tags: tags )
105
+ countries << country
106
+ else
107
+ puts "** !! ERROR - missing key for (canonical) country name"
108
+ exit 1
109
+ end
110
+ end
111
+ end # each line
112
+ else
113
+ puts "** !! ERROR - unknown node type / (input) source line:"
114
+ pp node
115
+ exit 1
116
+ end
117
+ end # each node
118
+
119
+ countries
120
+ end # method parse
121
+
122
+
123
+
124
+ #######################################
125
+ ## helpers
126
+ def split_tags( str )
127
+ tags = str.split( /[|<>‹›]/ ) ## allow pipe (|) and (<>‹›) as divider for now - add more? why? why not?
128
+ tags = tags.map { |tag| tag.strip }
129
+ tags
130
+ end
131
+
132
+ def split_geo( str ) ## todo/check: rename to parse_geo(s) - why? why not?
133
+ ## split into geo tree
134
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹ for now
135
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
136
+ geos
137
+ end
138
+
139
+ end # class CountryReader
140
+
141
+ end # module Import
142
+ end # module SportDb
@@ -1,59 +1,59 @@
1
- # encoding: utf-8
2
-
3
-
4
- module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
-
6
- def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
- ## note: always assume utf-8 for now!!!
8
- File.open( path, 'r:utf-8') {|f| f.read }
9
- end
10
-
11
-
12
- ########################
13
- ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
- class Bundle
15
- def initialize( path )
16
- @path = path
17
- @buf = String.new('')
18
- end
19
-
20
- def <<(value)
21
- if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
- datafiles = value
23
- datafiles.each do |datafile|
24
- text = Datafile.read( datafile )
25
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
- @buf << text
28
- @buf << "\n\n"
29
- end
30
- else ## assume string (e.g. header, comments, etc.)
31
- text = value
32
- @buf << text
33
- @buf << "\n\n"
34
- end
35
- end
36
- alias_method :write, :<<
37
-
38
- ## todo/fix/check: write only on close? or write on every write and use close for close?
39
- def close
40
- File.open( @path, 'w:utf-8' ) do |f|
41
- f.write @buf
42
- end
43
- end
44
- end # class Bundle
45
-
46
-
47
- def self.write_bundle( path, datafiles:, header: nil )
48
- bundle = Bundle.new( path )
49
- bundle.write( header ) if header
50
- datafiles.each do |datafile|
51
- text = read( datafile )
52
- ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
- bundle.write( text )
55
- end
56
- bundle.close
57
- end
58
-
59
- end # module Datafile
1
+ # encoding: utf-8
2
+
3
+
4
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
+
6
+ def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
+ ## note: always assume utf-8 for now!!!
8
+ File.open( path, 'r:utf-8') {|f| f.read }
9
+ end
10
+
11
+
12
+ ########################
13
+ ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
+ class Bundle
15
+ def initialize( path )
16
+ @path = path
17
+ @buf = String.new('')
18
+ end
19
+
20
+ def <<(value)
21
+ if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
+ datafiles = value
23
+ datafiles.each do |datafile|
24
+ text = Datafile.read( datafile )
25
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
+ @buf << text
28
+ @buf << "\n\n"
29
+ end
30
+ else ## assume string (e.g. header, comments, etc.)
31
+ text = value
32
+ @buf << text
33
+ @buf << "\n\n"
34
+ end
35
+ end
36
+ alias_method :write, :<<
37
+
38
+ ## todo/fix/check: write only on close? or write on every write and use close for close?
39
+ def close
40
+ File.open( @path, 'w:utf-8' ) do |f|
41
+ f.write @buf
42
+ end
43
+ end
44
+ end # class Bundle
45
+
46
+
47
+ def self.write_bundle( path, datafiles:, header: nil )
48
+ bundle = Bundle.new( path )
49
+ bundle.write( header ) if header
50
+ datafiles.each do |datafile|
51
+ text = read( datafile )
52
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
+ bundle.write( text )
55
+ end
56
+ bundle.close
57
+ end
58
+
59
+ end # module Datafile