sportdb-formats 0.4.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +24 -4
  3. data/Rakefile +3 -3
  4. data/lib/sportdb/formats.rb +25 -2
  5. data/lib/sportdb/formats/config.rb +40 -0
  6. data/lib/sportdb/formats/datafile.rb +42 -62
  7. data/lib/sportdb/formats/datafile_package.rb +160 -0
  8. data/lib/sportdb/formats/match/conf_parser.rb +120 -0
  9. data/lib/sportdb/formats/match/mapper.rb +319 -0
  10. data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
  11. data/lib/sportdb/formats/match/match_parser.rb +659 -0
  12. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
  13. data/lib/sportdb/formats/name_helper.rb +84 -0
  14. data/lib/sportdb/formats/outline_reader.rb +53 -15
  15. data/lib/sportdb/formats/package.rb +172 -160
  16. data/lib/sportdb/formats/parser_helper.rb +81 -0
  17. data/lib/sportdb/formats/score/score_formats.rb +180 -0
  18. data/lib/sportdb/formats/score/score_parser.rb +196 -0
  19. data/lib/sportdb/formats/structs/country.rb +1 -43
  20. data/lib/sportdb/formats/structs/group.rb +25 -0
  21. data/lib/sportdb/formats/structs/league.rb +7 -26
  22. data/lib/sportdb/formats/structs/match.rb +72 -51
  23. data/lib/sportdb/formats/structs/round.rb +14 -4
  24. data/lib/sportdb/formats/structs/season.rb +3 -0
  25. data/lib/sportdb/formats/structs/team.rb +144 -0
  26. data/lib/sportdb/formats/version.rb +2 -2
  27. data/test/helper.rb +83 -1
  28. data/test/test_clubs.rb +3 -3
  29. data/test/test_conf.rb +65 -0
  30. data/test/test_datafile.rb +21 -30
  31. data/test/test_match.rb +0 -6
  32. data/test/test_match_auto.rb +72 -0
  33. data/test/test_match_auto_champs.rb +45 -0
  34. data/test/test_match_auto_euro.rb +37 -0
  35. data/test/test_match_auto_worldcup.rb +61 -0
  36. data/test/test_match_champs.rb +27 -0
  37. data/test/test_match_eng.rb +26 -0
  38. data/test/test_match_euro.rb +27 -0
  39. data/test/test_match_worldcup.rb +27 -0
  40. data/test/test_name_helper.rb +67 -0
  41. data/test/test_outline_reader.rb +3 -3
  42. data/test/test_package.rb +21 -2
  43. data/test/test_package_match.rb +78 -0
  44. data/test/test_scores.rb +67 -51
  45. metadata +32 -12
  46. data/lib/sportdb/formats/scores.rb +0 -253
  47. data/lib/sportdb/formats/structs/club.rb +0 -213
  48. data/test/test_club_helpers.rb +0 -63
  49. data/test/test_datafile_match.rb +0 -65
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7bd2657cda87fd1024bec068f79453d73b0bbac5
4
- data.tar.gz: 7c35301f832b66bf691c9d1964de00c9a9b2371d
3
+ metadata.gz: 1188cc0edf55cd83ccdee10638662c64b302a44e
4
+ data.tar.gz: 06b6b09902513298357a9f47deef83cdf6962b9e
5
5
  SHA512:
6
- metadata.gz: 90ef6ad29934fc9bb692045d9f5e2a447a2dcaf80ab8ca0a3e1b50ae34e95bae7a139e88ef454314c0a26bc978784abb6abd117bf1961e71c65a95046d21efe1
7
- data.tar.gz: 73e55608b287d974d40c4529c3f104496acde866dc3908509adcee6ed8a40dc0f10f8e48f193cd6184ef29c31bdfa3ebd9eb38a38c85924dde207cc0433ea318
6
+ metadata.gz: 337fdb0f298625e847e40a29e47f2ee2e6c638e1ed8a4e72e340008b16de6d10ea5907923530594c0e4739de1a477dd4fe847f10d062372a64c5e4103ef3c06b
7
+ data.tar.gz: 72c0281e3f32a74750f8eb494aafd26b25a3c5f497842f389ea714bcf430f41451346b9aec7aac005dba7b8e8ab283945aca58a49ef7ecc8df6f8116d87b6370
data/Manifest.txt CHANGED
@@ -3,31 +3,51 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/sportdb/formats.rb
6
+ lib/sportdb/formats/config.rb
6
7
  lib/sportdb/formats/datafile.rb
8
+ lib/sportdb/formats/datafile_package.rb
7
9
  lib/sportdb/formats/goals.rb
10
+ lib/sportdb/formats/match/conf_parser.rb
11
+ lib/sportdb/formats/match/mapper.rb
12
+ lib/sportdb/formats/match/mapper_teams.rb
13
+ lib/sportdb/formats/match/match_parser.rb
14
+ lib/sportdb/formats/match/match_parser_auto_conf.rb
15
+ lib/sportdb/formats/name_helper.rb
8
16
  lib/sportdb/formats/outline_reader.rb
9
17
  lib/sportdb/formats/package.rb
10
- lib/sportdb/formats/scores.rb
18
+ lib/sportdb/formats/parser_helper.rb
19
+ lib/sportdb/formats/score/score_formats.rb
20
+ lib/sportdb/formats/score/score_parser.rb
11
21
  lib/sportdb/formats/season_utils.rb
12
- lib/sportdb/formats/structs/club.rb
13
22
  lib/sportdb/formats/structs/country.rb
23
+ lib/sportdb/formats/structs/group.rb
14
24
  lib/sportdb/formats/structs/league.rb
15
25
  lib/sportdb/formats/structs/match.rb
16
26
  lib/sportdb/formats/structs/matchlist.rb
17
27
  lib/sportdb/formats/structs/round.rb
18
28
  lib/sportdb/formats/structs/season.rb
19
29
  lib/sportdb/formats/structs/standings.rb
30
+ lib/sportdb/formats/structs/team.rb
20
31
  lib/sportdb/formats/structs/team_usage.rb
21
32
  lib/sportdb/formats/version.rb
22
33
  test/helper.rb
23
- test/test_club_helpers.rb
24
34
  test/test_clubs.rb
35
+ test/test_conf.rb
25
36
  test/test_csv_reader.rb
26
37
  test/test_datafile.rb
27
- test/test_datafile_match.rb
28
38
  test/test_goals.rb
29
39
  test/test_match.rb
40
+ test/test_match_auto.rb
41
+ test/test_match_auto_champs.rb
42
+ test/test_match_auto_euro.rb
43
+ test/test_match_auto_worldcup.rb
44
+ test/test_match_champs.rb
45
+ test/test_match_eng.rb
46
+ test/test_match_euro.rb
47
+ test/test_match_worldcup.rb
48
+ test/test_name_helper.rb
30
49
  test/test_outline_reader.rb
31
50
  test/test_package.rb
51
+ test/test_package_match.rb
32
52
  test/test_scores.rb
33
53
  test/test_season.rb
data/Rakefile CHANGED
@@ -20,10 +20,10 @@ Hoe.spec 'sportdb-formats' do
20
20
  self.licenses = ['Public Domain']
21
21
 
22
22
  self.extra_deps = [
23
- ['alphabets', '>= 0.1.3'],
24
- ['date-formats', '>= 0.2.4'],
23
+ ['alphabets', '>= 1.0.0'],
24
+ ['date-formats', '>= 1.0.0'],
25
25
  ['csvreader', '>= 1.2.4'],
26
- ['sportdb-langs', '>= 0.0.1'],
26
+ ['sportdb-langs', '>= 0.1.0'],
27
27
 
28
28
  ['rubyzip', '>= 1.2.4' ],
29
29
  ]
@@ -23,29 +23,52 @@ end
23
23
  require 'sportdb/langs'
24
24
 
25
25
 
26
+ ## todo/fix: move shortcut to sportdb/langs!!!
27
+ module SportDb
28
+ Logging = LogUtils::Logging ## logging machinery shortcut; use LogUtils for now
29
+ end
30
+
31
+
32
+
26
33
  ###
27
34
  # our own code
28
35
  require 'sportdb/formats/version' # let version always go first
36
+
37
+ require 'sportdb/formats/config' # let "global" config "framework" go next - why? why not?
38
+
29
39
  require 'sportdb/formats/outline_reader'
30
40
  require 'sportdb/formats/datafile'
41
+ require 'sportdb/formats/datafile_package'
31
42
  require 'sportdb/formats/package'
32
43
  require 'sportdb/formats/season_utils'
33
44
 
45
+ require 'sportdb/formats/name_helper'
46
+ require 'sportdb/formats/parser_helper'
47
+
34
48
  require 'sportdb/formats/structs/country'
35
49
  require 'sportdb/formats/structs/season'
36
50
  require 'sportdb/formats/structs/league'
37
- require 'sportdb/formats/structs/club'
51
+ require 'sportdb/formats/structs/team'
38
52
  require 'sportdb/formats/structs/round'
53
+ require 'sportdb/formats/structs/group'
39
54
  require 'sportdb/formats/structs/match'
40
55
  require 'sportdb/formats/structs/matchlist'
41
56
  require 'sportdb/formats/structs/standings'
42
57
  require 'sportdb/formats/structs/team_usage'
43
58
 
44
59
 
45
- require 'sportdb/formats/scores'
60
+ require 'sportdb/formats/score/score_formats'
61
+ require 'sportdb/formats/score/score_parser'
46
62
  require 'sportdb/formats/goals'
47
63
 
48
64
 
65
+ require 'sportdb/formats/match/mapper'
66
+ require 'sportdb/formats/match/mapper_teams'
67
+ require 'sportdb/formats/match/match_parser'
68
+ require 'sportdb/formats/match/match_parser_auto_conf'
69
+ require 'sportdb/formats/match/conf_parser'
70
+
71
+
49
72
  ## let's put test configuration in its own namespace / module
50
73
  module SportDb
51
74
  class Test ## todo/check: works with module too? use a module - why? why not?
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+ class Configuration
7
+ ##
8
+ ## todo: allow configure of countries_dir like clubs_dir
9
+ ## "fallback" and use a default built-in world/countries.txt
10
+
11
+ attr_accessor :catalog
12
+
13
+ attr_reader :lang
14
+ def lang=(value)
15
+ ## check/todo: always use to_sym - why? needed?
16
+ DateFormats.lang = value
17
+ ScoreFormats.lang = value
18
+ SportDb.lang.lang = value
19
+
20
+ ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
21
+ ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
22
+ end
23
+
24
+ end # class Configuration
25
+
26
+
27
+ ## lets you use
28
+ ## SportDb::Import.configure do |config|
29
+ ## config.lang = 'it'
30
+ ## end
31
+
32
+ def self.configure() yield( config ); end
33
+
34
+ def self.config() @config ||= Configuration.new; end
35
+
36
+ ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
37
+ def self.catalog() config.catalog; end
38
+
39
+ end # module Import
40
+ end # module SportDb
@@ -3,77 +3,57 @@
3
3
 
4
4
  module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
5
5
 
6
- def self.find( path, pattern )
7
- datafiles = []
8
-
9
- ## check all txt files
10
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
11
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
12
- pp candidates
13
- candidates.each do |candidate|
14
- datafiles << candidate if pattern.match( candidate )
15
- end
16
-
17
- pp datafiles
18
- datafiles
6
+ def self.read( path ) ## todo/check: use as a shortcut helper - why? why not?
7
+ ## note: always assume utf-8 for now!!!
8
+ File.open( path, 'r:utf-8') {|f| f.read }
19
9
  end
20
10
 
21
11
 
22
- CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
23
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
24
- clubs\.txt$
25
- }x
26
-
27
- CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
28
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
29
- clubs\.wiki\.txt$
30
- }x
31
-
32
- CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
33
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
34
- clubs\.props\.txt$
35
- }x
36
-
37
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
38
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
39
-
40
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
41
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
42
- def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
43
-
44
-
45
- LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
46
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
47
- leagues\.txt$
48
- }x
49
-
50
- def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
51
- def self.match_leagues( path ) LEAGUES_RE.match( path ); end
52
-
53
-
54
- CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
55
- \.conf\.txt$
56
- }x
12
+ ########################
13
+ ## todo/fix: turn into Datafile::Bundle.new and Bundle#write/save -why? why not?
14
+ class Bundle
15
+ def initialize( path )
16
+ @path = path
17
+ @buf = String.new('')
18
+ end
57
19
 
58
- def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
59
- def self.match_conf( path ) CONF_RE.match( path ); end
20
+ def <<(value)
21
+ if value.is_a?( Array ) ## assume array of datafiles (file paths)
22
+ datafiles = value
23
+ datafiles.each do |datafile|
24
+ text = Datafile.read( datafile )
25
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
26
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
27
+ @buf << text
28
+ @buf << "\n\n"
29
+ end
30
+ else ## assume string (e.g. header, comments, etc.)
31
+ text = value
32
+ @buf << text
33
+ @buf << "\n\n"
34
+ end
35
+ end
36
+ alias_method :write, :<<
60
37
 
38
+ ## todo/fix/check: write only on close? or write on every write and use close for close?
39
+ def close
40
+ File.open( @path, 'w:utf-8' ) do |f|
41
+ f.write @buf
42
+ end
43
+ end
44
+ end # class Bundle
61
45
 
62
46
 
63
47
  def self.write_bundle( path, datafiles:, header: nil )
64
- File.open( path, 'w:utf-8') do |fout|
65
- if header
66
- fout.write( header )
67
- fout.write( "\n\n" )
68
- end
69
- datafiles.each do |datafile|
70
- File.open( datafile, 'r:utf-8') do |fin|
71
- text = fin.read
72
- text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
73
- fout.write( text )
74
- end
75
- end
48
+ bundle = Bundle.new( path )
49
+ bundle.write( header ) if header
50
+ datafiles.each do |datafile|
51
+ text = read( datafile )
52
+ ## todo/fix/check: move sub __END__ to Datafile.read and turn it always on - why? why not?
53
+ text = text.sub( /__END__.*/m, '' ) ## note: add/allow support for __END__; use m-multiline flag
54
+ bundle.write( text )
76
55
  end
56
+ bundle.close
77
57
  end
78
58
 
79
59
  end # module Datafile
@@ -0,0 +1,160 @@
1
+
2
+ module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
3
+
4
+
5
+ class Package ## use a shared base class for DirPackage, ZipPackage, etc.
6
+ ## exclude pattern
7
+ ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
8
+ ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
9
+ EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
11
+ /
12
+ }x
13
+ def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
14
+ class << self ## check if module << self is possible? (like class << self) - check if there's a better / more idomatic way??
15
+ alias_method :match_exclude?, :match_exclude
16
+ alias_method :exclude?, :match_exclude
17
+ end
18
+ end # class Package
19
+
20
+
21
+ class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
22
+ class Entry
23
+ def initialize( pack, path )
24
+ @pack = pack ## parent package
25
+ @path = path
26
+ ## note: calculate name (cut-off pack.path!!!), that is, make path relative (to pack)
27
+ ## e.g.
28
+ ## ../../../openfootball/austria/2011-12/1-bundesliga-i.txt
29
+ ## becomes => 2011-12/1-bundesliga-i.txt
30
+ @name = path[ pack.path.length+1..-1 ]
31
+ end
32
+ def name() @name; end
33
+ def read() File.open( @path, 'r:utf-8' ).read; end
34
+ end # class DirPackage::Entry
35
+
36
+
37
+ attr_reader :name, :path
38
+
39
+ def initialize( path )
40
+ ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
+ @path = path ## rename to root_path or base_path or somehting - why? why not?
42
+
43
+ basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
+ @name = basename
45
+ end
46
+
47
+ def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
+ ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
+ Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
+ ## todo/fix: (auto) skip and check for directories
52
+ if EXCLUDE_RE.match( path )
53
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
+ elsif pattern.match( path )
55
+ yield( Entry.new( self, path ))
56
+ else
57
+ ## puts " skipping >#{path}<"
58
+ end
59
+ end
60
+ end
61
+
62
+ def find( name )
63
+ Entry.new( self, "#{@path}/#{name}" )
64
+ end
65
+ end # class DirPackage
66
+
67
+
68
+ ## helper wrapper for datafiles in zips
69
+ class ZipPackage < Package
70
+ class Entry
71
+ def initialize( pack, entry )
72
+ @pack = pack
73
+ @entry = entry
74
+ end
75
+
76
+ def name() @entry.name; end
77
+ def read
78
+ txt = @entry.get_input_stream.read
79
+ ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
80
+ txt = txt.force_encoding( Encoding::UTF_8 )
81
+ txt
82
+ end
83
+ end # class ZipPackage::Entry
84
+
85
+ attr_reader :name, :path
86
+
87
+ def initialize( path )
88
+ @path = path
89
+
90
+ extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
91
+ basename = File.basename( path, extname )
92
+ @name = basename
93
+ end
94
+
95
+ def each( pattern: )
96
+ Zip::File.open( @path ) do |zipfile|
97
+ zipfile.each do |entry|
98
+ if entry.directory?
99
+ next ## skip
100
+ elsif entry.file?
101
+ if EXCLUDE_RE.match( entry.name )
102
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
103
+ elsif pattern.match( entry.name )
104
+ yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
105
+ else
106
+ ## puts " skipping >#{entry.name}<"
107
+ end
108
+ else
109
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
110
+ exit 1
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ def find( name )
117
+ entries = match_entry( name )
118
+ if entries.empty?
119
+ puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
120
+ exit 1
121
+ elsif entries.size > 1
122
+ puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
123
+ pp entries
124
+ exit 1
125
+ else
126
+ Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
127
+ end
128
+ end
129
+
130
+ private
131
+ def match_entry( name )
132
+ ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
133
+
134
+ pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
135
+ $
136
+ }x
137
+
138
+ entries = []
139
+ Zip::File.open( @path ) do |zipfile|
140
+ zipfile.each do |entry|
141
+ if entry.directory?
142
+ next ## skip
143
+ elsif entry.file?
144
+ if EXCLUDE_RE.match( entry.name )
145
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
146
+ elsif pattern.match( entry.name )
147
+ entries << entry
148
+ else
149
+ ## no match; skip too
150
+ end
151
+ else
152
+ puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
153
+ exit 1
154
+ end
155
+ end
156
+ end
157
+ entries
158
+ end
159
+ end # class ZipPackage
160
+ end # module Datafile