datafile 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@
6
6
 
7
7
  def read_known_datasets( path )
8
8
  ary = []
9
- lines = File.read( path ) ### fix: use File.read_utf8 ??
9
+ lines = File.open( path, 'r:utf-8' ).read
10
10
  lines.each_line do |line|
11
11
  ## skip blank and comments lines
12
12
  next if /^\s*#/ =~ line || /^\s*$/ =~ line
@@ -27,13 +27,12 @@ class Dataset
27
27
  @opts = opts
28
28
  end
29
29
 
30
- attr_reader :name
31
- attr_reader :opts
30
+ attr_reader :name, :opts
31
+
32
+ ## convenience helpers for known opts
33
+ def setup() @opts[:setup]; end ## note: return nil if not found/set
34
+ def format() @opts[:format] || 'txt'; end ## note: assume default is txt (other formats incl. csv) for now - why? wh not?
32
35
 
33
- def setup
34
- value = @opts[:setup] || 'all'
35
- "setups/#{value}"
36
- end
37
36
 
38
37
  def file? # note: use file? (not exit? might use zip? later to check if zip exists? -why? why not?)
39
38
  ## hack/convenience shortcut:
@@ -49,7 +48,7 @@ class Dataset
49
48
  basename = parts[1]
50
49
  ## e.g.
51
50
  ## ./ (working folder) => at-austria
52
- ## openfootball/at-austria
51
+ ## openfootball/at-austria
53
52
  if File.basename( Dir.getwd ) == basename
54
53
  puts " bingo!! working folder >#{basename}< matches dataset"
55
54
  true ## return true
@@ -75,23 +74,18 @@ class WorldDataset < Dataset
75
74
 
76
75
  super( name, opts ) ## todo/check: just juse super (e.g. pass along all params - why? why not?)
77
76
  end
78
-
79
- def zip_worker() WorldZipDataset.new( self ); end ## check: change (rename) just use zip or use worker_zip?? - why, why not?
80
- def file_worker() WorldFileDataset.new( self ); end
81
77
  end # class WorldDataset
82
78
 
83
79
 
84
80
 
85
81
  class FootballDataset < Dataset
86
82
 
87
- @@known_football_datasets = nil
83
+ def self.build_known_datasets
84
+ read_known_datasets( "#{::Datafile.data_path}/football.txt" )
85
+ end
88
86
 
89
87
  def self.known_datasets
90
- ## return array of known datasets
91
- ### todo/fix - use \\= idiom - why, why not??
92
- if @@known_football_datasets.nil?
93
- @@known_football_datasets = read_known_datasets( "#{::Datafile.data_path}/football.txt" )
94
- end
88
+ @@known_football_datasets ||= build_known_datasets
95
89
  @@known_football_datasets
96
90
  end
97
91
 
@@ -101,7 +95,11 @@ class FootballDataset < Dataset
101
95
  ## check if name include slash (e.g. /)
102
96
  ## - if not auto-add openfootball/ (default)
103
97
  if name_easy.index( '/' ).nil?
104
- name = "openfootball/#{name_easy}"
98
+ if opts[:format] == 'csv'
99
+ name = "footballcsv/#{name_easy}"
100
+ else
101
+ name = "openfootball/#{name_easy}"
102
+ end
105
103
  else
106
104
  name = name_easy ## just pass through for now
107
105
  end
@@ -109,14 +107,11 @@ class FootballDataset < Dataset
109
107
  super( name, opts )
110
108
 
111
109
  ### check for known datasets; warn: if not known (might be a typo)
112
- unless FootballDataset.known_datasets.include?( name )
110
+ unless self.class.known_datasets.include?( name )
113
111
  ## todo: use logger - why, why not??
114
112
  puts "*** warn: unknown football dataset '#{name}', typo ???"
115
113
  end
116
114
  end
117
-
118
- def zip_worker() FootballZipDataset.new( self ); end
119
- def file_worker() FootballFileDataset.new( self ); end
120
115
  end # class FootballDataset
121
116
 
122
117
 
@@ -133,9 +128,6 @@ class BeerDataset < Dataset
133
128
 
134
129
  super( name, opts )
135
130
  end
136
-
137
- def zip_worker() BeerZipDataset.new( self ); end
138
- def file_worker() BeerFileDataset.new( self ); end
139
131
  end # class BeerDataset
140
132
 
141
133
 
@@ -3,8 +3,8 @@
3
3
  module Datafile
4
4
 
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 2
7
- PATCH = 5
6
+ MINOR = 3
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -17,7 +17,10 @@ module Datafile
17
17
 
18
18
  def self.root
19
19
  "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20
- end
20
+ end
21
21
 
22
- end # module Datafile
22
+ def self.data_path
23
+ "#{root}/data"
24
+ end
23
25
 
26
+ end # module Datafile
@@ -2,28 +2,26 @@
2
2
 
3
3
  module Datafile
4
4
 
5
- class FileDataset < DatasetNode
5
+ class FileDataset
6
6
  ## read dataset from file(system)
7
7
 
8
- @@registry = nil
8
+ include LogUtils::Logging
9
+
9
10
 
10
11
  def self.registry
11
- ## use ||= why, why not?? - add Registry as nested class, why, why not ??
12
- if @@registry.nil?
13
- @@registry = FileDatasetRegistry.new
14
- end
12
+ @@registry ||= FileDatasetRegistry.new
15
13
  @@registry
16
14
  end
17
15
 
18
16
  def initialize( dataset )
19
- super( dataset )
17
+ @dataset = dataset
20
18
  end
21
19
 
22
- def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
20
+ def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
23
21
  ## note: for easy testing allow "in situ" datasets
24
22
  ## e.g. ./ (e.g. mu-mauritius) is openfootball/mu-mauritius
25
23
  ## split name in org/user + project (e.g. openfootball/at-austria)
26
- parts = name.split( '/' )
24
+ parts = @dataset.name.split( '/' )
27
25
 
28
26
  basename = parts[1]
29
27
  if File.basename( Dir.getwd ) == basename
@@ -31,63 +29,28 @@ class FileDataset < DatasetNode
31
29
  return Dir.getwd ## assume working directory/folder is repo dir
32
30
  end
33
31
 
34
- registry.lookup( name )
32
+ registry.lookup( @dataset.name )
35
33
  end
36
34
 
37
35
  def dump
38
36
  ## for debuggin dump dataset -- todo (also check if folder exits ??)
39
- puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
37
+ puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.inspect}" ## use opts.inspect instead of to_json - why? why not?
40
38
  puts " repo-dir '#{repo_dir}'"
41
39
  end
42
40
 
43
- private
44
- def registry ## convenience method to access "static" shared class variable
45
- FileDataset.registry ## self.registry not working?? - or self.registry() -why, why not??
46
- end
47
- end # class FileDataset
48
-
49
-
50
-
51
- class FootballFileDataset < FileDataset
52
-
53
- def initialize( dataset )
54
- super( dataset )
55
- end
56
-
57
- def read()
58
- logger.info( "read football-dataset (file) '#{name}', '#{setup}'" )
59
-
60
- SportDb.read_setup( setup, repo_dir )
61
- end
62
- end # class FootballFileDataset
63
-
41
+ def read
42
+ if @dataset.is_a?( FootballDataset )
43
+ logger.info( "read football dataset (file) '#{@dataset.name}', '#{@dataset.setup}'" )
64
44
 
65
- class WorldFileDataset < FileDataset
66
-
67
- def initialize( dataset )
68
- super( dataset )
69
- end
70
-
71
- def read()
72
- logger.info( "read world-dataset (file) '#{name}', '#{setup}'" )
73
-
74
- ## WorldDb.read_setup( 'setups/countries', WORLD_DB_INCLUDE_PATH, skip_tags: true )
75
- WorldDb.read_setup( setup, repo_dir, skip_tags: true )
76
- end
77
- end # class WorldFileDataset
78
-
79
- class BeerFileDataset < FileDataset
80
-
81
- def initialize( dataset )
82
- super( dataset )
83
- end
84
-
85
- def read()
86
- logger.info( "read beer-dataset (file) '#{name}', '#{setup}'" )
87
-
88
- BeerDb.read_setup( setup, repo_dir )
45
+ pack = SportDb::DirPackage.new( repo_dir )
46
+ pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now
47
+ else
48
+ logger.info( "TODO/FIX: read dataset (file) '#{@dataset.name}', '#{@dataset.setup}'; sorry" )
49
+ end
89
50
  end
90
- end # class BeerFileDataset
91
51
 
92
52
 
53
+ private
54
+ def registry() self.class.registry; end ## convenience method to access "static" shared class variable
55
+ end # class FileDataset
93
56
  end # module Datafile
@@ -15,22 +15,30 @@ class FileDatasetRegistry
15
15
  @roots[:openmundi] = '../../openmundi' ## OPENMUNDI_ROOT = "../../openmundi"
16
16
  @roots[:openfootball] = '..' ## OPENFOOTBALL_ROOT = ".."
17
17
  @roots[:openbeer] = '..'
18
+
19
+ @roots[:footballcsv] = '..'
18
20
  end
19
21
 
20
22
  def merge( hash )
21
23
  ## todo: add support for merging project mappings too
22
24
  ## use merge_roots and merge_projects ?? why, why not??
23
-
24
25
  @roots = @roots.merge( hash )
25
26
  end
26
27
 
27
- def lookup( name ) lookup_worker( name, false ); end ## false=>return nil; do NOT fail w/ excep
28
- def lookup!(name ) lookup_worker( name, true ); end ## true=>throw except;
28
+ def lookup( name )
29
+ path, _ = lookup_path( name ) ## note: ignore error message passed along in return
30
+ path
31
+ end
32
+
33
+ def lookup!( name )
34
+ path, error = lookup_path( name )
35
+ raise error if error
36
+ path
37
+ end
29
38
 
30
- private
31
- def lookup_worker( name, fail_on_error )
32
- ### fix: use lookup! version for exption and lookup (w/ returning nil) - why, why not??
33
39
 
40
+ private
41
+ def lookup_path( name )
34
42
  ## split name in org/user + project (e.g. openfootball/at-austria)
35
43
  parts = name.split( '/' )
36
44
  ## check/todo: assert parts == 2 -- why, why not??
@@ -38,11 +46,7 @@ private
38
46
  if root.nil?
39
47
  msg = "no mapping found for '#{parts[0]}' in '#{name}'"
40
48
  logger.error( msg )
41
- if fail_on_error
42
- raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
43
- else
44
- return nil
45
- end
49
+ return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ??
46
50
  end
47
51
 
48
52
  path = "#{root}/#{parts[1]}"
@@ -50,16 +54,12 @@ private
50
54
  unless File.exist?( path )
51
55
  msg = "no file found for '#{name}'; expected '#{path}'"
52
56
  logger.error( msg )
53
- if fail_on_error
54
- raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
55
- else
56
- return nil
57
- end
57
+ return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ??
58
58
  end
59
+
59
60
  ### check for File.directory?( path ) too - why, why not???
60
- path
61
+ [path, nil] ## use go-style returns with error as second argument (as error as value)
61
62
  end
62
-
63
63
  end # class FileDatasetRegistry
64
64
 
65
65
  end # module Datafile
@@ -15,38 +15,18 @@ class FileWorker ## check: rename to FileDatafileWorker?? or FileDatafile -wh
15
15
  end
16
16
 
17
17
  def read
18
- ## note: also run inlines (setup script) before
19
- @datafile.inlines.each do |inline|
20
- inline.call
21
- end
22
-
23
18
  @datafile.datasets.each do |dataset|
24
- dataset.file_worker.read
25
- end
26
- end
27
-
28
- def calc
29
- @datafile.scripts.each do |script|
30
- script.call
19
+ f = FileDataset.new( dataset )
20
+ f.read
31
21
  end
32
22
  end
33
23
 
34
24
  def dump
35
- ## also dump inlines
36
- @datafile.inlines.each do |inline|
37
- inline.dump
38
- end
39
-
40
25
  @datafile.datasets.each do |dataset|
41
- dataset.file_worker.dump
42
- end
43
-
44
- ## also dump scripts
45
- @datafile.scripts.each do |script|
46
- script.dump
26
+ f = FileDataset.new( dataset )
27
+ f.dump
47
28
  end
48
29
  end
49
30
 
50
31
  end # class FileWorker
51
-
52
32
  end # module Datafile
@@ -3,23 +3,23 @@
3
3
  module Datafile
4
4
 
5
5
 
6
- class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why not ???
6
+ class ZipDataset ### use (rename to) ZipDatasetWorker/Helper/Wrapper/Fetcher/Downloader - why, why not ???
7
7
  ## read dataset from zip(archive)
8
8
 
9
+ include LogUtils::Logging
10
+
9
11
  def initialize( dataset )
10
- super( dataset )
12
+ @dataset = dataset
11
13
  end
12
14
 
13
15
  def remote_zip_url # remote zip url
14
- ### note: use http:// for now - lets us use (personal proxy NOT working w/ https) for now
15
- ## "https://github.com/#{@name}/archive/master.zip"
16
- "http://github.com/#{name}/archive/master.zip"
16
+ "https://github.com/#{@dataset.name}/archive/master.zip"
17
17
  end
18
18
 
19
19
  def local_zip_name
20
20
  ### note: replace / in name w/ --I--
21
21
  ## e.g. flatten the filename, that is, do NOT include any folders
22
- name.gsub('/', '--I--') # note: will NOT include/return .zip extension
22
+ @dataset.name.gsub('/', '--I--') # note: will NOT include/return .zip extension
23
23
  end
24
24
 
25
25
  def local_zip_root
@@ -32,7 +32,7 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
32
32
 
33
33
 
34
34
  def download
35
- logger.info( "download dataset '#{name}'" )
35
+ logger.info( "download dataset '#{@dataset.name}'" )
36
36
  logger.info( " from '#{remote_zip_url}'" )
37
37
  logger.info( " to '#{local_zip_path}'..." )
38
38
 
@@ -42,7 +42,7 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
42
42
 
43
43
  def dump
44
44
  ## for debuggin dump dataset (also check if zip exits)
45
- puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
45
+ puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
46
46
  puts " local '#{local_zip_name}' (#{local_zip_path})"
47
47
  if File.exist?( local_zip_path )
48
48
  puts " size: #{File.size(local_zip_path)} bytes"
@@ -52,6 +52,17 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
52
52
  puts " remote '#{remote_zip_url}'"
53
53
  end
54
54
 
55
+ def read
56
+ if @dataset.is_a?( FootballDataset )
57
+ logger.info( "read football dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'" )
58
+
59
+ pack = SportDb::ZipPackage.new( local_zip_path )
60
+ pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now
61
+ else
62
+ logger.info( "TODO/FIX: read dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'; sorry" )
63
+ end
64
+ end
65
+
55
66
 
56
67
  private
57
68
  ####
@@ -12,38 +12,22 @@ class ZipWorker ## check: rename to ZipDatafileWorker?? or ZipDatafile -why,
12
12
 
13
13
  def download
14
14
  @datafile.datasets.each do |dataset|
15
- dataset.zip_worker.download
15
+ z = ZipDataset.new( dataset )
16
+ z.download
16
17
  end
17
18
  end
18
19
 
19
20
  def read
20
- ## note: also run inlines (setup script) before
21
- @datafile.inlines.each do |inline|
22
- inline.call
23
- end
24
-
25
21
  @datafile.datasets.each do |dataset|
26
- dataset.zip_worker.read
27
- end
28
- end
29
-
30
- def calc
31
- @datafile.scripts.each do |script|
32
- script.call
22
+ z = ZipDataset.new( dataset )
23
+ z.read
33
24
  end
34
25
  end
35
26
 
36
27
  def dump
37
- ## also dump inlines
38
- @datafile.inlines.each do |inline|
39
- inline.dump
40
- end
41
28
  @datafile.datasets.each do |dataset|
42
- dataset.zip_worker.dump
43
- end
44
- ## also dump scripts
45
- @datafile.scripts.each do |script|
46
- script.dump
29
+ z = ZipDataset.new( dataset )
30
+ z.dump
47
31
  end
48
32
  end
49
33