datafile 0.2.5 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,7 +6,7 @@
6
6
 
7
7
  def read_known_datasets( path )
8
8
  ary = []
9
- lines = File.read( path ) ### fix: use File.read_utf8 ??
9
+ lines = File.open( path, 'r:utf-8' ).read
10
10
  lines.each_line do |line|
11
11
  ## skip blank and comments lines
12
12
  next if /^\s*#/ =~ line || /^\s*$/ =~ line
@@ -27,13 +27,12 @@ class Dataset
27
27
  @opts = opts
28
28
  end
29
29
 
30
- attr_reader :name
31
- attr_reader :opts
30
+ attr_reader :name, :opts
31
+
32
+ ## convenience helpers for known opts
33
+ def setup() @opts[:setup]; end ## note: return nil if not found/set
34
+ def format() @opts[:format] || 'txt'; end ## note: assume default is txt (other formats incl. csv) for now - why? wh not?
32
35
 
33
- def setup
34
- value = @opts[:setup] || 'all'
35
- "setups/#{value}"
36
- end
37
36
 
38
37
  def file? # note: use file? (not exit? might use zip? later to check if zip exists? -why? why not?)
39
38
  ## hack/convenience shortcut:
@@ -49,7 +48,7 @@ class Dataset
49
48
  basename = parts[1]
50
49
  ## e.g.
51
50
  ## ./ (working folder) => at-austria
52
- ## openfootball/at-austria
51
+ ## openfootball/at-austria
53
52
  if File.basename( Dir.getwd ) == basename
54
53
  puts " bingo!! working folder >#{basename}< matches dataset"
55
54
  true ## return true
@@ -75,23 +74,18 @@ class WorldDataset < Dataset
75
74
 
76
75
  super( name, opts ) ## todo/check: just juse super (e.g. pass along all params - why? why not?)
77
76
  end
78
-
79
- def zip_worker() WorldZipDataset.new( self ); end ## check: change (rename) just use zip or use worker_zip?? - why, why not?
80
- def file_worker() WorldFileDataset.new( self ); end
81
77
  end # class WorldDataset
82
78
 
83
79
 
84
80
 
85
81
  class FootballDataset < Dataset
86
82
 
87
- @@known_football_datasets = nil
83
+ def self.build_known_datasets
84
+ read_known_datasets( "#{::Datafile.data_path}/football.txt" )
85
+ end
88
86
 
89
87
  def self.known_datasets
90
- ## return array of known datasets
91
- ### todo/fix - use \\= idiom - why, why not??
92
- if @@known_football_datasets.nil?
93
- @@known_football_datasets = read_known_datasets( "#{::Datafile.data_path}/football.txt" )
94
- end
88
+ @@known_football_datasets ||= build_known_datasets
95
89
  @@known_football_datasets
96
90
  end
97
91
 
@@ -101,7 +95,11 @@ class FootballDataset < Dataset
101
95
  ## check if name include slash (e.g. /)
102
96
  ## - if not auto-add openfootball/ (default)
103
97
  if name_easy.index( '/' ).nil?
104
- name = "openfootball/#{name_easy}"
98
+ if opts[:format] == 'csv'
99
+ name = "footballcsv/#{name_easy}"
100
+ else
101
+ name = "openfootball/#{name_easy}"
102
+ end
105
103
  else
106
104
  name = name_easy ## just pass through for now
107
105
  end
@@ -109,14 +107,11 @@ class FootballDataset < Dataset
109
107
  super( name, opts )
110
108
 
111
109
  ### check for known datasets; warn: if not known (might be a typo)
112
- unless FootballDataset.known_datasets.include?( name )
110
+ unless self.class.known_datasets.include?( name )
113
111
  ## todo: use logger - why, why not??
114
112
  puts "*** warn: unknown football dataset '#{name}', typo ???"
115
113
  end
116
114
  end
117
-
118
- def zip_worker() FootballZipDataset.new( self ); end
119
- def file_worker() FootballFileDataset.new( self ); end
120
115
  end # class FootballDataset
121
116
 
122
117
 
@@ -133,9 +128,6 @@ class BeerDataset < Dataset
133
128
 
134
129
  super( name, opts )
135
130
  end
136
-
137
- def zip_worker() BeerZipDataset.new( self ); end
138
- def file_worker() BeerFileDataset.new( self ); end
139
131
  end # class BeerDataset
140
132
 
141
133
 
@@ -3,8 +3,8 @@
3
3
  module Datafile
4
4
 
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 2
7
- PATCH = 5
6
+ MINOR = 3
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -17,7 +17,10 @@ module Datafile
17
17
 
18
18
  def self.root
19
19
  "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20
- end
20
+ end
21
21
 
22
- end # module Datafile
22
+ def self.data_path
23
+ "#{root}/data"
24
+ end
23
25
 
26
+ end # module Datafile
@@ -2,28 +2,26 @@
2
2
 
3
3
  module Datafile
4
4
 
5
- class FileDataset < DatasetNode
5
+ class FileDataset
6
6
  ## read dataset from file(system)
7
7
 
8
- @@registry = nil
8
+ include LogUtils::Logging
9
+
9
10
 
10
11
  def self.registry
11
- ## use ||= why, why not?? - add Registry as nested class, why, why not ??
12
- if @@registry.nil?
13
- @@registry = FileDatasetRegistry.new
14
- end
12
+ @@registry ||= FileDatasetRegistry.new
15
13
  @@registry
16
14
  end
17
15
 
18
16
  def initialize( dataset )
19
- super( dataset )
17
+ @dataset = dataset
20
18
  end
21
19
 
22
- def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
20
+ def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
23
21
  ## note: for easy testing allow "in situ" datasets
24
22
  ## e.g. ./ (e.g. mu-mauritius) is openfootball/mu-mauritius
25
23
  ## split name in org/user + project (e.g. openfootball/at-austria)
26
- parts = name.split( '/' )
24
+ parts = @dataset.name.split( '/' )
27
25
 
28
26
  basename = parts[1]
29
27
  if File.basename( Dir.getwd ) == basename
@@ -31,63 +29,28 @@ class FileDataset < DatasetNode
31
29
  return Dir.getwd ## assume working directory/folder is repo dir
32
30
  end
33
31
 
34
- registry.lookup( name )
32
+ registry.lookup( @dataset.name )
35
33
  end
36
34
 
37
35
  def dump
38
36
  ## for debuggin dump dataset -- todo (also check if folder exits ??)
39
- puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
37
+ puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.inspect}" ## use opts.inspect instead of to_json - why? why not?
40
38
  puts " repo-dir '#{repo_dir}'"
41
39
  end
42
40
 
43
- private
44
- def registry ## convenience method to access "static" shared class variable
45
- FileDataset.registry ## self.registry not working?? - or self.registry() -why, why not??
46
- end
47
- end # class FileDataset
48
-
49
-
50
-
51
- class FootballFileDataset < FileDataset
52
-
53
- def initialize( dataset )
54
- super( dataset )
55
- end
56
-
57
- def read()
58
- logger.info( "read football-dataset (file) '#{name}', '#{setup}'" )
59
-
60
- SportDb.read_setup( setup, repo_dir )
61
- end
62
- end # class FootballFileDataset
63
-
41
+ def read
42
+ if @dataset.is_a?( FootballDataset )
43
+ logger.info( "read football dataset (file) '#{@dataset.name}', '#{@dataset.setup}'" )
64
44
 
65
- class WorldFileDataset < FileDataset
66
-
67
- def initialize( dataset )
68
- super( dataset )
69
- end
70
-
71
- def read()
72
- logger.info( "read world-dataset (file) '#{name}', '#{setup}'" )
73
-
74
- ## WorldDb.read_setup( 'setups/countries', WORLD_DB_INCLUDE_PATH, skip_tags: true )
75
- WorldDb.read_setup( setup, repo_dir, skip_tags: true )
76
- end
77
- end # class WorldFileDataset
78
-
79
- class BeerFileDataset < FileDataset
80
-
81
- def initialize( dataset )
82
- super( dataset )
83
- end
84
-
85
- def read()
86
- logger.info( "read beer-dataset (file) '#{name}', '#{setup}'" )
87
-
88
- BeerDb.read_setup( setup, repo_dir )
45
+ pack = SportDb::DirPackage.new( repo_dir )
46
+ pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now
47
+ else
48
+ logger.info( "TODO/FIX: read dataset (file) '#{@dataset.name}', '#{@dataset.setup}'; sorry" )
49
+ end
89
50
  end
90
- end # class BeerFileDataset
91
51
 
92
52
 
53
+ private
54
+ def registry() self.class.registry; end ## convenience method to access "static" shared class variable
55
+ end # class FileDataset
93
56
  end # module Datafile
@@ -15,22 +15,30 @@ class FileDatasetRegistry
15
15
  @roots[:openmundi] = '../../openmundi' ## OPENMUNDI_ROOT = "../../openmundi"
16
16
  @roots[:openfootball] = '..' ## OPENFOOTBALL_ROOT = ".."
17
17
  @roots[:openbeer] = '..'
18
+
19
+ @roots[:footballcsv] = '..'
18
20
  end
19
21
 
20
22
  def merge( hash )
21
23
  ## todo: add support for merging project mappings too
22
24
  ## use merge_roots and merge_projects ?? why, why not??
23
-
24
25
  @roots = @roots.merge( hash )
25
26
  end
26
27
 
27
- def lookup( name ) lookup_worker( name, false ); end ## false=>return nil; do NOT fail w/ excep
28
- def lookup!(name ) lookup_worker( name, true ); end ## true=>throw except;
28
+ def lookup( name )
29
+ path, _ = lookup_path( name ) ## note: ignore error message passed along in return
30
+ path
31
+ end
32
+
33
+ def lookup!( name )
34
+ path, error = lookup_path( name )
35
+ raise error if error
36
+ path
37
+ end
29
38
 
30
- private
31
- def lookup_worker( name, fail_on_error )
32
- ### fix: use lookup! version for exption and lookup (w/ returning nil) - why, why not??
33
39
 
40
+ private
41
+ def lookup_path( name )
34
42
  ## split name in org/user + project (e.g. openfootball/at-austria)
35
43
  parts = name.split( '/' )
36
44
  ## check/todo: assert parts == 2 -- why, why not??
@@ -38,11 +46,7 @@ private
38
46
  if root.nil?
39
47
  msg = "no mapping found for '#{parts[0]}' in '#{name}'"
40
48
  logger.error( msg )
41
- if fail_on_error
42
- raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
43
- else
44
- return nil
45
- end
49
+ return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ??
46
50
  end
47
51
 
48
52
  path = "#{root}/#{parts[1]}"
@@ -50,16 +54,12 @@ private
50
54
  unless File.exist?( path )
51
55
  msg = "no file found for '#{name}'; expected '#{path}'"
52
56
  logger.error( msg )
53
- if fail_on_error
54
- raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
55
- else
56
- return nil
57
- end
57
+ return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ??
58
58
  end
59
+
59
60
  ### check for File.directory?( path ) too - why, why not???
60
- path
61
+ [path, nil] ## use go-style returns with error as second argument (as error as value)
61
62
  end
62
-
63
63
  end # class FileDatasetRegistry
64
64
 
65
65
  end # module Datafile
@@ -15,38 +15,18 @@ class FileWorker ## check: rename to FileDatafileWorker?? or FileDatafile -wh
15
15
  end
16
16
 
17
17
  def read
18
- ## note: also run inlines (setup script) before
19
- @datafile.inlines.each do |inline|
20
- inline.call
21
- end
22
-
23
18
  @datafile.datasets.each do |dataset|
24
- dataset.file_worker.read
25
- end
26
- end
27
-
28
- def calc
29
- @datafile.scripts.each do |script|
30
- script.call
19
+ f = FileDataset.new( dataset )
20
+ f.read
31
21
  end
32
22
  end
33
23
 
34
24
  def dump
35
- ## also dump inlines
36
- @datafile.inlines.each do |inline|
37
- inline.dump
38
- end
39
-
40
25
  @datafile.datasets.each do |dataset|
41
- dataset.file_worker.dump
42
- end
43
-
44
- ## also dump scripts
45
- @datafile.scripts.each do |script|
46
- script.dump
26
+ f = FileDataset.new( dataset )
27
+ f.dump
47
28
  end
48
29
  end
49
30
 
50
31
  end # class FileWorker
51
-
52
32
  end # module Datafile
@@ -3,23 +3,23 @@
3
3
  module Datafile
4
4
 
5
5
 
6
- class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why not ???
6
+ class ZipDataset ### use (rename to) ZipDatasetWorker/Helper/Wrapper/Fetcher/Downloader - why, why not ???
7
7
  ## read dataset from zip(archive)
8
8
 
9
+ include LogUtils::Logging
10
+
9
11
  def initialize( dataset )
10
- super( dataset )
12
+ @dataset = dataset
11
13
  end
12
14
 
13
15
  def remote_zip_url # remote zip url
14
- ### note: use http:// for now - lets us use (personal proxy NOT working w/ https) for now
15
- ## "https://github.com/#{@name}/archive/master.zip"
16
- "http://github.com/#{name}/archive/master.zip"
16
+ "https://github.com/#{@dataset.name}/archive/master.zip"
17
17
  end
18
18
 
19
19
  def local_zip_name
20
20
  ### note: replace / in name w/ --I--
21
21
  ## e.g. flatten the filename, that is, do NOT include any folders
22
- name.gsub('/', '--I--') # note: will NOT include/return .zip extension
22
+ @dataset.name.gsub('/', '--I--') # note: will NOT include/return .zip extension
23
23
  end
24
24
 
25
25
  def local_zip_root
@@ -32,7 +32,7 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
32
32
 
33
33
 
34
34
  def download
35
- logger.info( "download dataset '#{name}'" )
35
+ logger.info( "download dataset '#{@dataset.name}'" )
36
36
  logger.info( " from '#{remote_zip_url}'" )
37
37
  logger.info( " to '#{local_zip_path}'..." )
38
38
 
@@ -42,7 +42,7 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
42
42
 
43
43
  def dump
44
44
  ## for debuggin dump dataset (also check if zip exits)
45
- puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
45
+ puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
46
46
  puts " local '#{local_zip_name}' (#{local_zip_path})"
47
47
  if File.exist?( local_zip_path )
48
48
  puts " size: #{File.size(local_zip_path)} bytes"
@@ -52,6 +52,17 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
52
52
  puts " remote '#{remote_zip_url}'"
53
53
  end
54
54
 
55
+ def read
56
+ if @dataset.is_a?( FootballDataset )
57
+ logger.info( "read football dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'" )
58
+
59
+ pack = SportDb::ZipPackage.new( local_zip_path )
60
+ pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now
61
+ else
62
+ logger.info( "TODO/FIX: read dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'; sorry" )
63
+ end
64
+ end
65
+
55
66
 
56
67
  private
57
68
  ####
@@ -12,38 +12,22 @@ class ZipWorker ## check: rename to ZipDatafileWorker?? or ZipDatafile -why,
12
12
 
13
13
  def download
14
14
  @datafile.datasets.each do |dataset|
15
- dataset.zip_worker.download
15
+ z = ZipDataset.new( dataset )
16
+ z.download
16
17
  end
17
18
  end
18
19
 
19
20
  def read
20
- ## note: also run inlines (setup script) before
21
- @datafile.inlines.each do |inline|
22
- inline.call
23
- end
24
-
25
21
  @datafile.datasets.each do |dataset|
26
- dataset.zip_worker.read
27
- end
28
- end
29
-
30
- def calc
31
- @datafile.scripts.each do |script|
32
- script.call
22
+ z = ZipDataset.new( dataset )
23
+ z.read
33
24
  end
34
25
  end
35
26
 
36
27
  def dump
37
- ## also dump inlines
38
- @datafile.inlines.each do |inline|
39
- inline.dump
40
- end
41
28
  @datafile.datasets.each do |dataset|
42
- dataset.zip_worker.dump
43
- end
44
- ## also dump scripts
45
- @datafile.scripts.each do |script|
46
- script.dump
29
+ z = ZipDataset.new( dataset )
30
+ z.dump
47
31
  end
48
32
  end
49
33