sportdb-formats 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +24 -4
  3. data/Rakefile +3 -3
  4. data/lib/sportdb/formats.rb +25 -2
  5. data/lib/sportdb/formats/config.rb +40 -0
  6. data/lib/sportdb/formats/datafile.rb +42 -62
  7. data/lib/sportdb/formats/datafile_package.rb +160 -0
  8. data/lib/sportdb/formats/match/conf_parser.rb +120 -0
  9. data/lib/sportdb/formats/match/mapper.rb +319 -0
  10. data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
  11. data/lib/sportdb/formats/match/match_parser.rb +659 -0
  12. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
  13. data/lib/sportdb/formats/name_helper.rb +84 -0
  14. data/lib/sportdb/formats/outline_reader.rb +53 -15
  15. data/lib/sportdb/formats/package.rb +172 -160
  16. data/lib/sportdb/formats/parser_helper.rb +81 -0
  17. data/lib/sportdb/formats/score/score_formats.rb +180 -0
  18. data/lib/sportdb/formats/score/score_parser.rb +196 -0
  19. data/lib/sportdb/formats/structs/country.rb +1 -43
  20. data/lib/sportdb/formats/structs/group.rb +25 -0
  21. data/lib/sportdb/formats/structs/league.rb +7 -26
  22. data/lib/sportdb/formats/structs/match.rb +72 -51
  23. data/lib/sportdb/formats/structs/round.rb +14 -4
  24. data/lib/sportdb/formats/structs/season.rb +3 -0
  25. data/lib/sportdb/formats/structs/team.rb +144 -0
  26. data/lib/sportdb/formats/version.rb +2 -2
  27. data/test/helper.rb +83 -1
  28. data/test/test_clubs.rb +3 -3
  29. data/test/test_conf.rb +65 -0
  30. data/test/test_datafile.rb +21 -30
  31. data/test/test_match.rb +0 -6
  32. data/test/test_match_auto.rb +72 -0
  33. data/test/test_match_auto_champs.rb +45 -0
  34. data/test/test_match_auto_euro.rb +37 -0
  35. data/test/test_match_auto_worldcup.rb +61 -0
  36. data/test/test_match_champs.rb +27 -0
  37. data/test/test_match_eng.rb +26 -0
  38. data/test/test_match_euro.rb +27 -0
  39. data/test/test_match_worldcup.rb +27 -0
  40. data/test/test_name_helper.rb +67 -0
  41. data/test/test_outline_reader.rb +3 -3
  42. data/test/test_package.rb +21 -2
  43. data/test/test_package_match.rb +78 -0
  44. data/test/test_scores.rb +67 -51
  45. metadata +32 -12
  46. data/lib/sportdb/formats/scores.rb +0 -253
  47. data/lib/sportdb/formats/structs/club.rb +0 -213
  48. data/test/test_club_helpers.rb +0 -63
  49. data/test/test_datafile_match.rb +0 -65
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7bd2657cda87fd1024bec068f79453d73b0bbac5
4
- data.tar.gz: 7c35301f832b66bf691c9d1964de00c9a9b2371d
3
+ metadata.gz: 1188cc0edf55cd83ccdee10638662c64b302a44e
4
+ data.tar.gz: 06b6b09902513298357a9f47deef83cdf6962b9e
5
5
  SHA512:
6
- metadata.gz: 90ef6ad29934fc9bb692045d9f5e2a447a2dcaf80ab8ca0a3e1b50ae34e95bae7a139e88ef454314c0a26bc978784abb6abd117bf1961e71c65a95046d21efe1
7
- data.tar.gz: 73e55608b287d974d40c4529c3f104496acde866dc3908509adcee6ed8a40dc0f10f8e48f193cd6184ef29c31bdfa3ebd9eb38a38c85924dde207cc0433ea318
6
+ metadata.gz: 337fdb0f298625e847e40a29e47f2ee2e6c638e1ed8a4e72e340008b16de6d10ea5907923530594c0e4739de1a477dd4fe847f10d062372a64c5e4103ef3c06b
7
+ data.tar.gz: 72c0281e3f32a74750f8eb494aafd26b25a3c5f497842f389ea714bcf430f41451346b9aec7aac005dba7b8e8ab283945aca58a49ef7ecc8df6f8116d87b6370
data/Manifest.txt CHANGED
@@ -3,31 +3,51 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/sportdb/formats.rb
6
+ lib/sportdb/formats/config.rb
6
7
  lib/sportdb/formats/datafile.rb
8
+ lib/sportdb/formats/datafile_package.rb
7
9
  lib/sportdb/formats/goals.rb
10
+ lib/sportdb/formats/match/conf_parser.rb
11
+ lib/sportdb/formats/match/mapper.rb
12
+ lib/sportdb/formats/match/mapper_teams.rb
13
+ lib/sportdb/formats/match/match_parser.rb
14
+ lib/sportdb/formats/match/match_parser_auto_conf.rb
15
+ lib/sportdb/formats/name_helper.rb
8
16
  lib/sportdb/formats/outline_reader.rb
9
17
  lib/sportdb/formats/package.rb
10
- lib/sportdb/formats/scores.rb
18
+ lib/sportdb/formats/parser_helper.rb
19
+ lib/sportdb/formats/score/score_formats.rb
20
+ lib/sportdb/formats/score/score_parser.rb
11
21
  lib/sportdb/formats/season_utils.rb
12
- lib/sportdb/formats/structs/club.rb
13
22
  lib/sportdb/formats/structs/country.rb
23
+ lib/sportdb/formats/structs/group.rb
14
24
  lib/sportdb/formats/structs/league.rb
15
25
  lib/sportdb/formats/structs/match.rb
16
26
  lib/sportdb/formats/structs/matchlist.rb
17
27
  lib/sportdb/formats/structs/round.rb
18
28
  lib/sportdb/formats/structs/season.rb
19
29
  lib/sportdb/formats/structs/standings.rb
30
+ lib/sportdb/formats/structs/team.rb
20
31
  lib/sportdb/formats/structs/team_usage.rb
21
32
  lib/sportdb/formats/version.rb
22
33
  test/helper.rb
23
- test/test_club_helpers.rb
24
34
  test/test_clubs.rb
35
+ test/test_conf.rb
25
36
  test/test_csv_reader.rb
26
37
  test/test_datafile.rb
27
- test/test_datafile_match.rb
28
38
  test/test_goals.rb
29
39
  test/test_match.rb
40
+ test/test_match_auto.rb
41
+ test/test_match_auto_champs.rb
42
+ test/test_match_auto_euro.rb
43
+ test/test_match_auto_worldcup.rb
44
+ test/test_match_champs.rb
45
+ test/test_match_eng.rb
46
+ test/test_match_euro.rb
47
+ test/test_match_worldcup.rb
48
+ test/test_name_helper.rb
30
49
  test/test_outline_reader.rb
31
50
  test/test_package.rb
51
+ test/test_package_match.rb
32
52
  test/test_scores.rb
33
53
  test/test_season.rb
data/Rakefile CHANGED
@@ -20,10 +20,10 @@ Hoe.spec 'sportdb-formats' do
20
20
  self.licenses = ['Public Domain']
21
21
 
22
22
  self.extra_deps = [
23
- ['alphabets', '>= 0.1.3'],
24
- ['date-formats', '>= 0.2.4'],
23
+ ['alphabets', '>= 1.0.0'],
24
+ ['date-formats', '>= 1.0.0'],
25
25
  ['csvreader', '>= 1.2.4'],
26
- ['sportdb-langs', '>= 0.0.1'],
26
+ ['sportdb-langs', '>= 0.1.0'],
27
27
 
28
28
  ['rubyzip', '>= 1.2.4' ],
29
29
  ]
@@ -23,29 +23,52 @@ end
23
23
  require 'sportdb/langs'
24
24
 
25
25
 
26
+ ## todo/fix: move shortcut to sportdb/langs!!!
27
+ module SportDb
28
+ Logging = LogUtils::Logging ## logging machinery shortcut; use LogUtils for now
29
+ end
30
+
31
+
32
+
26
33
  ###
27
34
  # our own code
28
35
  require 'sportdb/formats/version' # let version always go first
36
+
37
+ require 'sportdb/formats/config' # let "global" config "framework" go next - why? why not?
38
+
29
39
  require 'sportdb/formats/outline_reader'
30
40
  require 'sportdb/formats/datafile'
41
+ require 'sportdb/formats/datafile_package'
31
42
  require 'sportdb/formats/package'
32
43
  require 'sportdb/formats/season_utils'
33
44
 
45
+ require 'sportdb/formats/name_helper'
46
+ require 'sportdb/formats/parser_helper'
47
+
34
48
  require 'sportdb/formats/structs/country'
35
49
  require 'sportdb/formats/structs/season'
36
50
  require 'sportdb/formats/structs/league'
37
- require 'sportdb/formats/structs/club'
51
+ require 'sportdb/formats/structs/team'
38
52
  require 'sportdb/formats/structs/round'
53
+ require 'sportdb/formats/structs/group'
39
54
  require 'sportdb/formats/structs/match'
40
55
  require 'sportdb/formats/structs/matchlist'
41
56
  require 'sportdb/formats/structs/standings'
42
57
  require 'sportdb/formats/structs/team_usage'
43
58
 
44
59
 
45
- require 'sportdb/formats/scores'
60
+ require 'sportdb/formats/score/score_formats'
61
+ require 'sportdb/formats/score/score_parser'
46
62
  require 'sportdb/formats/goals'
47
63
 
48
64
 
65
+ require 'sportdb/formats/match/mapper'
66
+ require 'sportdb/formats/match/mapper_teams'
67
+ require 'sportdb/formats/match/match_parser'
68
+ require 'sportdb/formats/match/match_parser_auto_conf'
69
+ require 'sportdb/formats/match/conf_parser'
70
+
71
+
49
72
  ## let's put test configuration in its own namespace / module
50
73
  module SportDb
51
74
  class Test ## todo/check: works with module too? use a module - why? why not?
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+ class Configuration
7
+ ##
8
+ ## todo: allow configure of countries_dir like clubs_dir
9
+ ## "fallback" and use a default built-in world/countries.txt
10
+
11
+ attr_accessor :catalog
12
+
13
+ attr_reader :lang
14
+ def lang=(value)
15
+ ## check/todo: always use to_sym - why? needed?
16
+ DateFormats.lang = value
17
+ ScoreFormats.lang = value
18
+ SportDb.lang.lang = value
19
+
20
+ ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
21
+ ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
22
+ end
23
+
24
+ end # class Configuration
25
+
26
+
27
+ ## lets you use
28
+ ## SportDb::Import.configure do |config|
29
+ ## config.lang = 'it'
30
+ ## end
31
+
32
+ def self.configure() yield( config ); end
33
+
34
+ def self.config() @config ||= Configuration.new; end
35
+
36
+ ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
37
+ def self.catalog() config.catalog; end
38
+
39
+ end # module Import
40
+ end # module SportDb
@@ -3,77 +3,57 @@
3
3
 
4
4
  module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
5
 
6
- def self.find( path, pattern )
7
- datafiles = []
8
-
9
- ## check all txt files
10
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
11
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
12
- pp candidates
13
- candidates.each do |candidate|
14
- datafiles << candidate if pattern.match( candidate )
15
- end
16
-
17
- pp datafiles
18
- datafiles
6
+ def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
+ ## note: always assume utf-8 for now!!!
8
+ File.open( path, 'r:utf-8') {|f| f.read }
19
9
  end
20
10
 
21
11
 
22
- CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
23
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
24
- clubs\.txt$
25
- }x
26
-
27
- CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
28
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
29
- clubs\.wiki\.txt$
30
- }x
31
-
32
- CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
33
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
34
- clubs\.props\.txt$
35
- }x
36
-
37
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
38
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
39
-
40
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
41
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
42
- def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
43
-
44
-
45
- LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
46
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
47
- leagues\.txt$
48
- }x
49
-
50
- def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
51
- def self.match_leagues( path ) LEAGUES_RE.match( path ); end
52
-
53
-
54
- CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
55
- \.conf\.txt$
56
- }x
12
+ ########################
13
+ ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
+ class Bundle
15
+ def initialize( path )
16
+ @path = path
17
+ @buf = String.new('')
18
+ end
57
19
 
58
- def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
59
- def self.match_conf( path ) CONF_RE.match( path ); end
20
+ def <<(value)
21
+ if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
+ datafiles = value
23
+ datafiles.each do |datafile|
24
+ text = Datafile.read( datafile )
25
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
+ @buf << text
28
+ @buf << "\n\n"
29
+ end
30
+ else ## assume string (e.g. header, comments, etc.)
31
+ text = value
32
+ @buf << text
33
+ @buf << "\n\n"
34
+ end
35
+ end
36
+ alias_method :write, :<<
60
37
 
38
+ ## todo/fix/check: write only on close? or write on every write and use close for close?
39
+ def close
40
+ File.open( @path, 'w:utf-8' ) do |f|
41
+ f.write @buf
42
+ end
43
+ end
44
+ end # class Bundle
61
45
 
62
46
 
63
47
  def self.write_bundle( path, datafiles:, header: nil )
64
- File.open( path, 'w:utf-8') do |fout|
65
- if header
66
- fout.write( header )
67
- fout.write( "\n\n" )
68
- end
69
- datafiles.each do |datafile|
70
- File.open( datafile, 'r:utf-8') do |fin|
71
- text = fin.read
72
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
73
- fout.write( text )
74
- end
75
- end
48
+ bundle = Bundle.new( path )
49
+ bundle.write( header ) if header
50
+ datafiles.each do |datafile|
51
+ text = read( datafile )
52
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
+ bundle.write( text )
76
55
  end
56
+ bundle.close
77
57
  end
78
58
 
79
59
  end # module Datafile
@@ -0,0 +1,160 @@
1
+
2
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
3
+
4
+
5
+ class Package ## use a shared base class for DirPackage, ZipPackage, etc.
6
+ ## exclude pattern
7
+ ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
8
+ ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
9
+ EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
11
+ /
12
+ }x
13
+ def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
14
+ class << self ## check if module << self is possible? (like class << self) - check if there's a better / more idomatic way??
15
+ alias_method :match_exclude?, :match_exclude
16
+ alias_method :exclude?, :match_exclude
17
+ end
18
+ end # class Package
19
+
20
+
21
+ class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
22
+ class Entry
23
+ def initialize( pack, path )
24
+ @pack = pack ## parent package
25
+ @path = path
26
+ ## note: calculate name (cut-off pack.path!!!), that is, make path relative (to pack)
27
+ ## e.g.
28
+ ## ../../../openfootball/austria/2011-12/1-bundesliga-i.txt
29
+ ## becomes => 2011-12/1-bundesliga-i.txt
30
+ @name = path[ pack.path.length+1..-1 ]
31
+ end
32
+ def name() @name; end
33
+ def read() File.open( @path, 'r:utf-8' ).read; end
34
+ end # class DirPackage::Entry
35
+
36
+
37
+ attr_reader :name, :path
38
+
39
+ def initialize( path )
40
+ ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
+ @path = path ## rename to root_path or base_path or somehting - why? why not?
42
+
43
+ basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
+ @name = basename
45
+ end
46
+
47
+ def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
+ ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
+ Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
+ ## todo/fix: (auto) skip and check for directories
52
+ if EXCLUDE_RE.match( path )
53
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
+ elsif pattern.match( path )
55
+ yield( Entry.new( self, path ))
56
+ else
57
+ ## puts " skipping >#{path}<"
58
+ end
59
+ end
60
+ end
61
+
62
+ def find( name )
63
+ Entry.new( self, "#{@path}/#{name}" )
64
+ end
65
+ end # class DirPackage
66
+
67
+
68
+ ## helper wrapper for datafiles in zips
69
+ class ZipPackage < Package
70
+ class Entry
71
+ def initialize( pack, entry )
72
+ @pack = pack
73
+ @entry = entry
74
+ end
75
+
76
+ def name() @entry.name; end
77
+ def read
78
+ txt = @entry.get_input_stream.read
79
+ ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
80
+ txt = txt.force_encoding( Encoding::UTF_8 )
81
+ txt
82
+ end
83
+ end # class ZipPackage::Entry
84
+
85
+ attr_reader :name, :path
86
+
87
+ def initialize( path )
88
+ @path = path
89
+
90
+ extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
91
+ basename = File.basename( path, extname )
92
+ @name = basename
93
+ end
94
+
95
+ def each( pattern: )
96
+ Zip::File.open( @path ) do |zipfile|
97
+ zipfile.each do |entry|
98
+ if entry.directory?
99
+ next ## skip
100
+ elsif entry.file?
101
+ if EXCLUDE_RE.match( entry.name )
102
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
103
+ elsif pattern.match( entry.name )
104
+ yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
105
+ else
106
+ ## puts " skipping >#{entry.name}<"
107
+ end
108
+ else
109
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
110
+ exit 1
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ def find( name )
117
+ entries = match_entry( name )
118
+ if entries.empty?
119
+ puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
120
+ exit 1
121
+ elsif entries.size > 1
122
+ puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
123
+ pp entries
124
+ exit 1
125
+ else
126
+ Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
127
+ end
128
+ end
129
+
130
+ private
131
+ def match_entry( name )
132
+ ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
133
+
134
+ pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
135
+ $
136
+ }x
137
+
138
+ entries = []
139
+ Zip::File.open( @path ) do |zipfile|
140
+ zipfile.each do |entry|
141
+ if entry.directory?
142
+ next ## skip
143
+ elsif entry.file?
144
+ if EXCLUDE_RE.match( entry.name )
145
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
146
+ elsif pattern.match( entry.name )
147
+ entries << entry
148
+ else
149
+ ## no match; skip too
150
+ end
151
+ else
152
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
153
+ exit 1
154
+ end
155
+ end
156
+ end
157
+ entries
158
+ end
159
+ end # class ZipPackage
160
+ end # module Datafile