datafile 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26b6312f588cc22d664d552ca2f01e6e11482f3d
4
- data.tar.gz: 3afcbe6af0340685f5358a9f0713a4631a59e5ee
3
+ metadata.gz: 62443719632bb6ef1b75ce5b79ef146e1ddca9b1
4
+ data.tar.gz: 5d39b5537c4f280cb395d2be2e9abec702d910b4
5
5
  SHA512:
6
- metadata.gz: 78dddc2a46795eb19b25819c9cc95e3d5202b81dcd11851ddbce92ac7efd883815495a6224546baaa53fbefaeb944abbdc686645b16ac3c9464bec7ccb491f5c
7
- data.tar.gz: 48ae309c57d5a09b77d81d40e9542473427928c26ffabb28b9e4e7f18af941de1196f0f17a43440d9456a155111ddec61c9dfef7ae1854a3e8cb2c25ecbbc30c
6
+ metadata.gz: 04882e94add84a5c478025535375760c21ea006714a1f6e3d8b92660d98477d460065306082ac4ebf321621c7782d99beac8ae0ab7f75f28c3d430ef80658e84
7
+ data.tar.gz: 1ece9f87e091abf9fb494777aaf626a647188a788b4ede359bfb0a940eb53dffb50db90e6354d59cb128b383a980968928ccbfdfcc86eacd8fef766f11bf55b7
data/Manifest.txt CHANGED
@@ -4,11 +4,25 @@ README.md
4
4
  Rakefile
5
5
  lib/datafile.rb
6
6
  lib/datafile/builder.rb
7
+ lib/datafile/builder2.rb
7
8
  lib/datafile/datafile.rb
8
- lib/datafile/datasets/beer.rb
9
9
  lib/datafile/datasets/dataset.rb
10
- lib/datafile/datasets/football.rb
11
- lib/datafile/datasets/world.rb
12
10
  lib/datafile/version.rb
11
+ lib/datafile/workers/dataset.rb
12
+ lib/datafile/workers/file/dataset.rb
13
+ lib/datafile/workers/file/registry.rb
14
+ lib/datafile/workers/file/worker.rb
15
+ lib/datafile/workers/zip/beer.rb
16
+ lib/datafile/workers/zip/dataset.rb
17
+ lib/datafile/workers/zip/football.rb
18
+ lib/datafile/workers/zip/worker.rb
19
+ lib/datafile/workers/zip/world.rb
20
+ test/datafile/eurocup.rb
21
+ test/datafile/stadiums.rb
22
+ test/datafile/world.rb
23
+ test/datafile2/at.rb
13
24
  test/helper.rb
14
25
  test/test_builder.rb
26
+ test/test_builder2.rb
27
+ test/test_file_dataset_registry.rb
28
+ test/test_file_worker.rb
data/lib/datafile.rb CHANGED
@@ -8,12 +8,24 @@ require 'logutils'
8
8
  require 'datafile/version' # let it always go first
9
9
 
10
10
  require 'datafile/datasets/dataset'
11
- require 'datafile/datasets/football'
12
- require 'datafile/datasets/beer'
13
- require 'datafile/datasets/world'
11
+
12
+ require 'datafile/workers/dataset'
13
+
14
+ require 'datafile/workers/file/dataset'
15
+ require 'datafile/workers/file/registry'
16
+ require 'datafile/workers/file/worker'
17
+
18
+ require 'datafile/workers/zip/dataset'
19
+ require 'datafile/workers/zip/beer'
20
+ require 'datafile/workers/zip/football'
21
+ require 'datafile/workers/zip/world'
22
+ require 'datafile/workers/zip/worker'
23
+
14
24
  require 'datafile/datafile'
15
25
  require 'datafile/builder'
26
+ require 'datafile/builder2'
16
27
 
17
28
 
18
29
  # say hello
19
- puts Datafile.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
30
+ puts Datafile.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
31
+
@@ -2,15 +2,7 @@
2
2
 
3
3
  module Datafile
4
4
 
5
- class Builder
6
-
7
- include LogUtils::Logging
8
-
9
- attr_reader :datafile
10
-
11
- def initialize()
12
- @datafile = Datafile.new
13
- end
5
+ class Builder ## "simple" builder (one file, one datafile)
14
6
 
15
7
  def self.load_file( path )
16
8
  code = File.read_utf8( path )
@@ -24,6 +16,14 @@ class Builder
24
16
  end
25
17
 
26
18
 
19
+ include LogUtils::Logging
20
+
21
+ def initialize
22
+ @datafile = Datafile.new
23
+ end
24
+
25
+ attr_reader :datafile
26
+
27
27
  def beer( name, opts={} )
28
28
  logger.info( "[builder] add beer-dataset '#{name}'" )
29
29
  @datafile.datasets << BeerDataset.new( name, opts )
@@ -41,3 +41,4 @@ class Builder
41
41
 
42
42
  end # class Builder
43
43
  end # module Datafile
44
+
@@ -0,0 +1,90 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+
6
+ ###
7
+ ## check/todo: ename to BatchBuilder, MultiBuilder,etc - find better name - why, why not??
8
+
9
+ class BuilderEx
10
+
11
+ def self.load_file( path )
12
+ code = File.read_utf8( path )
13
+ self.load( code )
14
+ end
15
+
16
+ def self.load( code )
17
+ builder = BuilderEx.new
18
+ builder.instance_eval( code )
19
+ builder
20
+ end
21
+
22
+
23
+ include LogUtils::Logging
24
+
25
+ def initialize
26
+ @datafiles = []
27
+ @datafile = nil
28
+ end
29
+
30
+ attr_reader :datafiles
31
+
32
+
33
+ def task( arg )
34
+
35
+ logger.info( "[builder] add task '#{arg.inspect}' : #{arg.class.name}" )
36
+
37
+ if arg.kind_of?( String ) || arg.kind_of?( Symbol ) # e.g. 'at' or :at
38
+ name = arg.to_s
39
+ ## note: always default to FileWorker for now
40
+ ## -- use file: true -- find better name e.g. worker/source: file - why? why not??
41
+ @datafile = Datafile.new( name: name, deps: [], file: true )
42
+ yield ### execute block in context
43
+ ## b = Builder.new
44
+ ## block.call( b ) ## same as b.instance_eval( &block) ???
45
+ ## b.instance_eval( code )
46
+ ## b = Builder.load( &block )
47
+ elsif arg.kind_of?( Hash ) ## Hash e.g. :at_calc => :at etc.
48
+ key = arg.keys.first
49
+ value = arg[key] ## todo: check if single value? always turn into array
50
+
51
+ name = key.to_s ## get first key (assume it's name)
52
+ if value.kind_of?( Array )
53
+ deps = value.map { |v| v.to_s } ## convert to strings
54
+ else ## assume single string/symbol -- convert to array
55
+ deps = [value.to_s]
56
+ end
57
+ @datafile = Datafile.new( name: name, deps: deps, file: true ) ## note: always default to FileWorker for now
58
+ yield ### execute block in context
59
+ ## to be done
60
+ else
61
+ ## fix: report error: unknown type
62
+ end
63
+
64
+ @datafiles << @datafile
65
+ end
66
+
67
+ def calc( &block )
68
+ logger.info( "[builder] add script calc-block" )
69
+ @datafile.scripts << Script.new( block )
70
+ end
71
+
72
+ ################################
73
+ # "classic/standard" datasets
74
+ def beer( name, opts={} )
75
+ logger.info( "[builder] add beer-dataset '#{name}'" )
76
+ @datafile.datasets << BeerDataset.new( name, opts )
77
+ end
78
+
79
+ def football( name, opts={} )
80
+ logger.info( "[builder] add football-dataset '#{name}'" )
81
+ @datafile.datasets << FootballDataset.new( name, opts )
82
+ end
83
+
84
+ def world( name, opts={} )
85
+ logger.info( "[builder] add world-dataset '#{name}'" )
86
+ @datafile.datasets << WorldDataset.new( name, opts )
87
+ end
88
+
89
+ end # class Builder2
90
+ end # module Datafile
@@ -2,13 +2,26 @@
2
2
 
3
3
  module Datafile
4
4
 
5
- class Datafile
6
-
5
+ class Script
7
6
  include LogUtils::Logging
8
7
 
9
- attr_reader :datasets
8
+ def initialize( proc )
9
+ @proc = proc
10
+ end
11
+
12
+ def call
13
+ logger.info( "[script] calling calc block" )
14
+ @proc.call
15
+ end
16
+
17
+ def dump
18
+ puts " script: #{@proc.inspect}"
19
+ end
20
+ end ## class Script
10
21
 
11
22
 
23
+ class Datafile
24
+
12
25
  ## convenience method - use like Datafile.load_file()
13
26
  def self.load_file( path='./Datafile' )
14
27
  code = File.read_utf8( path )
@@ -26,27 +39,67 @@ class Datafile
26
39
  end
27
40
 
28
41
 
29
- def initialize
42
+ include LogUtils::Logging
43
+
44
+ def initialize( opts={} )
45
+ @opts = opts
30
46
  @datasets = []
47
+ @scripts = [] ## calculation scripts (calc blocks)
48
+
49
+ ## (target)name - return nil if noname (set/defined/assigned)
50
+ @name = opts[:name] || nil
51
+ ## deps (dependencies) - note: always returns an array (empty array if no deps)
52
+ @deps = opts[:deps] || []
53
+
54
+ if opts[:file]
55
+ @worker = FileWorker.new( self )
56
+ else
57
+ ## default to zip worker for now
58
+ @worker = ZipWorker.new( self )
59
+ end
31
60
  end
32
61
 
62
+ attr_reader :datasets
63
+ attr_reader :scripts ## calc(ulation) scripts (calc blocks)
64
+ attr_reader :name
65
+ attr_reader :deps ## dep(endencies)
66
+
67
+ attr_accessor :worker # lets you change worker - find a better way - how, why, why not??
68
+
33
69
 
34
70
  def run
35
71
  logger.info( "[datafile] begin - run" )
36
- download() # step 1 - download zips for datasets
37
- read() # step 2 - read in datasets from zips
72
+ download # step 1 - download zips for datasets
73
+ read # step 2 - read in datasets from zips
74
+ calc # step 3 - run calc(ulations) scripts
38
75
  logger.info( "[datafile] end - run" )
39
76
  end
40
77
 
41
78
 
42
-
43
79
  def download
44
80
  logger.info( "[datafile] dowload" )
45
- @datasets.each do |dataset|
46
- dataset.download()
47
- end
81
+ @worker.download
82
+ ## check: use @worker.download( @datasets) - why, why not?? link worker w/ datafile - why, why not??
83
+ end
84
+
85
+ def read
86
+ logger.info( "[datafile] read" )
87
+ @worker.read
88
+ end
89
+
90
+ def calc
91
+ logger.info( "[datafile] calc" )
92
+ @worker.calc
48
93
  end
49
94
 
95
+ def dump
96
+ ## for debugging dump datasets (note: will/might also check if zip exits)
97
+ logger.info( "[datafile] dump datasets (for debugging)" )
98
+ @worker.dump
99
+ end
100
+
101
+
102
+ =begin
50
103
  def download_world ## only dl world datasets (skip all others)
51
104
  logger.info( "[datafile] dowload world datasets" )
52
105
  @datasets.each do |dataset|
@@ -79,21 +132,14 @@ class Datafile
79
132
  end
80
133
  end
81
134
  end
135
+ =end
82
136
 
83
137
 
84
-
85
-
86
- def read
87
- logger.info( "[datafile] read" )
88
- @datasets.each do |dataset|
89
- dataset.read()
90
- end
91
- end
92
-
138
+ =begin
93
139
  def read_world
94
140
  logger.info( "[datafile] read world datasets" )
95
141
  @datasets.each do |dataset|
96
- if dataset.kind_of? WorldDataset
142
+ if dataset.kind_of?( WorldDataset )
97
143
  dataset.read()
98
144
  else
99
145
  # skip all others
@@ -104,7 +150,7 @@ class Datafile
104
150
  def read_beer
105
151
  logger.info( "[datafile] read beer datasets" )
106
152
  @datasets.each do |dataset|
107
- if dataset.kind_of? BeerDataset
153
+ if dataset.kind_of?( BeerDataset )
108
154
  dataset.read()
109
155
  else
110
156
  # skip all others
@@ -115,23 +161,14 @@ class Datafile
115
161
  def read_football
116
162
  logger.info( "[datafile] read football datasets" )
117
163
  @datasets.each do |dataset|
118
- if dataset.kind_of? FootballDataset
164
+ if dataset.kind_of?( FootballDataset )
119
165
  dataset.read()
120
166
  else
121
167
  # skip all others
122
168
  end
123
169
  end
124
170
  end
125
-
126
-
127
- def dump
128
- ## for debugging dump datasets (note: will/might also check if zip exits)
129
- logger.info( "[datafile] dump datasets (for debugging)" )
130
- @datasets.each do |dataset|
131
- dataset.dump()
132
- end
133
- end
134
-
171
+ =end
135
172
 
136
173
  end # class Datafile
137
174
  end # module Datafile
@@ -10,71 +10,42 @@ class Dataset
10
10
  @opts = opts
11
11
  end
12
12
 
13
+ attr_reader :name
14
+ attr_reader :opts
13
15
 
14
16
  def setup
15
17
  value = @opts[:setup] || 'all'
16
18
  "setups/#{value}"
17
19
  end
20
+ end # class Dataset
18
21
 
19
- def remote_zip_url # remote zip url
20
- ### note: use http:// for now - lets us use (personal proxy NOT working w/ https) for now
21
- ## "https://github.com/#{@name}/archive/master.zip"
22
- "http://github.com/#{@name}/archive/master.zip"
23
- end
24
-
25
- def local_zip_name
26
- ### note: replace / in name w/ --I--
27
- ## e.g. flatten the filename, that is, do NOT include any folders
28
- @name.gsub('/', '--I--') # note: will NOT include/return .zip extension
29
- end
30
22
 
31
- def local_zip_root
32
- "./tmp"
33
- end
34
-
35
- def local_zip_path # local zip path
36
- "#{local_zip_root}/#{local_zip_name}.zip"
23
+ class WorldDataset < Dataset
24
+ def initialize( name, opts={} )
25
+ super( name, opts ) ## todo/check: just juse super (e.g. pass along all params - why? why not?)
37
26
  end
27
+
28
+ def zip_worker() WorldZipDataset.new( self ); end ## check: change (rename) just use zip or use worker_zip?? - why, why not?
29
+ def file_worker() WorldFileDataset.new( self ); end
30
+ end # class WorldDataset
38
31
 
39
-
40
- def download
41
- logger.info( "download dataset '#{@name}'" )
42
- logger.info( " from '#{remote_zip_url}'" )
43
- logger.info( " to '#{local_zip_path}'..." )
44
-
45
- download_blob( remote_zip_url, local_zip_path )
32
+ class FootballDataset < Dataset
33
+ def initialize( name, opts={} )
34
+ super( name, opts )
46
35
  end
47
36
 
37
+ def zip_worker() FootballZipDataset.new( self ); end
38
+ def file_worker() FootballFileDataset.new( self ); end
39
+ end # class FootballDataset
48
40
 
49
- def dump
50
- ## for debuggin dump dataset (also check if zip exits)
51
- puts "dataset '#{@name}' opts=#{@opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
52
- puts " local '#{local_zip_name}' (#{local_zip_path})"
53
- if File.exist?( local_zip_path )
54
- puts " size: #{File.size(local_zip_path)} bytes"
55
- else
56
- puts " (file not found)"
57
- end
58
- puts " remote '#{remote_zip_url}'"
41
+ class BeerDataset < Dataset
42
+ def initialize( name, opts={} )
43
+ super( name, opts )
59
44
  end
60
45
 
46
+ def zip_worker() BeerZipDataset.new( self ); end
47
+ def file_worker() BeerFileDataset.new( self ); end
48
+ end # class BeerDataset
61
49
 
62
- private
63
- ####
64
- # download tasks for zips
65
- def download_blob( url, dest )
66
- logger.info "downloading #{url} to #{dest}..."
67
-
68
- ## make sure dest path exists
69
- dest_p = File.dirname( dest )
70
- FileUtils.mkdir_p( dest_p ) unless File.exists?( dest_p ) ## use Dir.exists?? why? why not??
71
-
72
- worker = Fetcher::Worker.new
73
- worker.copy( url, dest )
74
- ## print some file stats
75
- logger.debug " size: #{File.size(dest)} bytes"
76
- end
77
-
78
- end # class Dataset
79
50
 
80
51
  end # module Datafile
@@ -3,8 +3,8 @@
3
3
  module Datafile
4
4
 
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 1
7
- PATCH = 2
6
+ MINOR = 2
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class Error < StandardError
6
+ end
7
+
8
+ ####
9
+ # todo/check:
10
+ # rename to DatasetNotFound or similar??
11
+ # use "common" error class - why? why not?
12
+ class DatasetNotFoundError < Error
13
+ attr_reader :message
14
+
15
+ def initialize( message )
16
+ @message = message
17
+ end
18
+
19
+ def to_s
20
+ "datset not found => #{@message}"
21
+ end
22
+ end
23
+
24
+
25
+ class DatasetNode ### find a better name (e.g. DatasetWorker, DatasetBase, DatasetRef, DatasetWrapper ???) ???
26
+
27
+ include LogUtils::Logging
28
+
29
+ def initialize( dataset )
30
+ @dataset = dataset
31
+ end
32
+
33
+ def name() @dataset.name; end
34
+ def opts() @dataset.opts; end
35
+ def setup() @dataset.setup; end
36
+
37
+ end # class DatasetNode
38
+
39
+
40
+ end # module Datafile
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FileDataset < DatasetNode
6
+ ## read dataset from file(system)
7
+
8
+ @@registry = nil
9
+
10
+ def self.registry
11
+ ## use ||= why, why not?? - add Registry as nested class, why, why not ??
12
+ if @@registry.nil?
13
+ @@registry = FileDatasetRegistry.new
14
+ end
15
+ @@registry
16
+ end
17
+
18
+ def initialize( dataset )
19
+ super( dataset )
20
+ end
21
+
22
+ def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
23
+ registry.lookup( name )
24
+ end
25
+
26
+ def dump
27
+ ## for debuggin dump dataset -- todo (also check if folder exits ??)
28
+ puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
29
+ puts " repo-dir '#{repo_dir}'"
30
+ end
31
+
32
+ private
33
+ def registry ## convenience method to access "static" shared class variable
34
+ FileDataset.registry ## self.registry not working?? - or self.registry() -why, why not??
35
+ end
36
+ end # class FileDataset
37
+
38
+
39
+
40
+ class FootballFileDataset < FileDataset
41
+
42
+ def initialize( dataset )
43
+ super( dataset )
44
+ end
45
+
46
+ def read()
47
+ logger.info( "read football-dataset (file) '#{name}', '#{setup}'" )
48
+
49
+ SportDb.read_setup( setup, repo_dir )
50
+ end
51
+ end # class FootballFileDataset
52
+
53
+
54
+ class WorldFileDataset < FileDataset
55
+
56
+ def initialize( dataset )
57
+ super( dataset )
58
+ end
59
+
60
+ def read()
61
+ logger.info( "read world-dataset (file) '#{name}', '#{setup}'" )
62
+
63
+ ## WorldDb.read_setup( 'setups/countries', WORLD_DB_INCLUDE_PATH, skip_tags: true )
64
+ WorldDb.read_setup( setup, repo_dir, skip_tags: true )
65
+ end
66
+ end # class WorldFileDataset
67
+
68
+ class BeerFileDataset < FileDataset
69
+
70
+ def initialize( dataset )
71
+ super( dataset )
72
+ end
73
+
74
+ def read()
75
+ logger.info( "read beer-dataset (file) '#{name}', '#{setup}'" )
76
+
77
+ BeerDb.read_setup( setup, repo_dir )
78
+ end
79
+ end # class BeerFileDataset
80
+
81
+
82
+ end # module Datafile
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FileDatasetRegistry
6
+ ## store mapping for (local) datasets e.g.
7
+ ## map github handle e.g. openfootball/at-austria to local path
8
+
9
+ include LogUtils::Logging
10
+
11
+ def initialize
12
+ ## org rootpaths
13
+ @roots = {}
14
+ ## org defaults (use merge to overwrite for now)
15
+ @roots[:openmundi] = '../../openmundi' ## OPENMUNDI_ROOT = "../../openmundi"
16
+ @roots[:openfootball] = '..' ## OPENFOOTBALL_ROOT = ".."
17
+ @roots[:openbeer] = '..'
18
+ end
19
+
20
+ def merge( hash )
21
+ ## todo: add support for merging project mappings too
22
+ ## use merge_roots and merge_projects ?? why, why not??
23
+
24
+ @roots = @roots.merge( hash )
25
+ end
26
+
27
+ def lookup( name ) lookup_worker( name, false ); end ## false=>return nil; do NOT fail w/ excep
28
+ def lookup!(name ) lookup_worker( name, true ); end ## true=>throw except;
29
+
30
+ private
31
+ def lookup_worker( name, fail_on_error )
32
+ ### fix: use lookup! version for exption and lookup (w/ returning nil) - why, why not??
33
+
34
+ ## split name in org/user + project (e.g. openfootball/at-austria)
35
+ parts = name.split( '/' )
36
+ ## check/todo: assert parts == 2 -- why, why not??
37
+ root = @roots[ parts[0].to_sym ]
38
+ if root.nil?
39
+ msg = "no mapping found for '#{parts[0]}' in '#{name}'"
40
+ logger.error( msg )
41
+ if fail_on_error
42
+ raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
43
+ else
44
+ return nil
45
+ end
46
+ end
47
+
48
+ path = "#{root}/#{parts[1]}"
49
+ ## check if folder/directory exists
50
+ unless File.exist?( path )
51
+ msg = "no file found for '#{name}'; expected '#{path}'"
52
+ logger.error( msg )
53
+ if fail_on_error
54
+ raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
55
+ else
56
+ return nil
57
+ end
58
+ end
59
+ ### check for File.directory?( path ) too - why, why not???
60
+ path
61
+ end
62
+
63
+ end # class FileDatasetRegistry
64
+
65
+ end # module Datafile
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FileWorker ## check: rename to FileDatafileWorker?? or FileDatafile -why, why not ??
6
+
7
+ include LogUtils::Logging
8
+
9
+ def initialize( datafile )
10
+ @datafile = datafile
11
+ end
12
+
13
+ def download
14
+ ## note: do NOTHING for now; assume repo already present (unpacked) on local filesystem
15
+ end
16
+
17
+ def read
18
+ @datafile.datasets.each do |dataset|
19
+ dataset.file_worker.read
20
+ end
21
+ end
22
+
23
+ def calc
24
+ @datafile.scripts.each do |script|
25
+ script.call
26
+ end
27
+ end
28
+
29
+ def dump
30
+ @datafile.datasets.each do |dataset|
31
+ dataset.file_worker.dump
32
+ end
33
+
34
+ ## also dump scripts
35
+ @datafile.scripts.each do |script|
36
+ script.dump
37
+ end
38
+ end
39
+
40
+ end # class FileWorker
41
+
42
+ end # module Datafile
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class BeerZipDataset < ZipDataset
6
+
7
+ def initialize( dataset )
8
+ super( dataset )
9
+ end
10
+
11
+ def read
12
+ logger.info( "read beer-dataset (zip) '#{name}', '#{setup}'" )
13
+
14
+ BeerDb.read_setup_from_zip( local_zip_name, setup, local_zip_root )
15
+ end
16
+ end # class BeerZipDataset
17
+
18
+ end # module Datafile
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+
6
+ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why not ???
7
+ ## read dataset from zip(archive)
8
+
9
+ def initialize( dataset )
10
+ super( dataset )
11
+ end
12
+
13
+ def remote_zip_url # remote zip url
14
+ ### note: use http:// for now - lets us use (personal proxy NOT working w/ https) for now
15
+ ## "https://github.com/#{@name}/archive/master.zip"
16
+ "http://github.com/#{name}/archive/master.zip"
17
+ end
18
+
19
+ def local_zip_name
20
+ ### note: replace / in name w/ --I--
21
+ ## e.g. flatten the filename, that is, do NOT include any folders
22
+ name.gsub('/', '--I--') # note: will NOT include/return .zip extension
23
+ end
24
+
25
+ def local_zip_root
26
+ "./tmp"
27
+ end
28
+
29
+ def local_zip_path # local zip path
30
+ "#{local_zip_root}/#{local_zip_name}.zip"
31
+ end
32
+
33
+
34
+ def download
35
+ logger.info( "download dataset '#{name}'" )
36
+ logger.info( " from '#{remote_zip_url}'" )
37
+ logger.info( " to '#{local_zip_path}'..." )
38
+
39
+ download_blob( remote_zip_url, local_zip_path )
40
+ end
41
+
42
+
43
+ def dump
44
+ ## for debuggin dump dataset (also check if zip exits)
45
+ puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
46
+ puts " local '#{local_zip_name}' (#{local_zip_path})"
47
+ if File.exist?( local_zip_path )
48
+ puts " size: #{File.size(local_zip_path)} bytes"
49
+ else
50
+ puts " (file not found)"
51
+ end
52
+ puts " remote '#{remote_zip_url}'"
53
+ end
54
+
55
+
56
+ private
57
+ ####
58
+ # download tasks for zips
59
+ def download_blob( url, dest )
60
+ logger.info "downloading #{url} to #{dest}..."
61
+
62
+ ## make sure dest path exists
63
+ dest_p = File.dirname( dest )
64
+ FileUtils.mkdir_p( dest_p ) unless File.exists?( dest_p ) ## use Dir.exists?? why? why not??
65
+
66
+ worker = Fetcher::Worker.new
67
+ worker.copy( url, dest )
68
+ ## print some file stats
69
+ logger.debug " size: #{File.size(dest)} bytes"
70
+ end
71
+
72
+ end # class DatasetZip
73
+
74
+ end # module Datafile
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FootballZipDataset < ZipDataset
6
+
7
+ def initialize( dataset )
8
+ super( dataset )
9
+ end
10
+
11
+ def read
12
+ logger.info( "read football-dataset (zip) '#{name}', '#{setup}'" )
13
+
14
+ SportDb.read_setup_from_zip( local_zip_name, setup, local_zip_root )
15
+ end
16
+ end # class FootballZipDataset
17
+
18
+ end # module Datafile
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class ZipWorker ## check: rename to ZipDatafileWorker?? or ZipDatafile -why, why not ??
6
+
7
+ include LogUtils::Logging
8
+
9
+ def initialize( datafile )
10
+ @datafile = datafile
11
+ end
12
+
13
+ def download
14
+ @datafile.datasets.each do |dataset|
15
+ dataset.zip_worker.download
16
+ end
17
+ end
18
+
19
+ def read
20
+ @datafile.datasets.each do |dataset|
21
+ dataset.zip_worker.read
22
+ end
23
+ end
24
+
25
+ def calc
26
+ @datafile.scripts.each do |script|
27
+ script.call
28
+ end
29
+ end
30
+
31
+ def dump
32
+ @datafile.datasets.each do |dataset|
33
+ dataset.zip_worker.dump
34
+ end
35
+ ## also dump scripts
36
+ @datafile.scripts.each do |script|
37
+ script.dump
38
+ end
39
+ end
40
+
41
+ end # class ZipWorker
42
+
43
+ end # module Datafile
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class WorldZipDataset < ZipDataset
6
+
7
+ def initialize( dataset )
8
+ super( dataset )
9
+ end
10
+
11
+ def read
12
+ logger.info( "read world-dataset (zip) '#{name}', '#{setup}'" )
13
+
14
+ WorldDb.read_setup_from_zip( local_zip_name, setup, local_zip_root, { skip_tags: true } )
15
+ end
16
+ end # class WorldZipDataset
17
+
18
+ end # module Datafile
@@ -0,0 +1,6 @@
1
+ #####################
2
+ # national teams
3
+
4
+ football 'openfootball/national-teams'
5
+ football 'openfootball/euro-cup'
6
+
@@ -0,0 +1,2 @@
1
+
2
+ football 'openfootball/stadiums' ## NOTE: default is setup: 'all'
@@ -0,0 +1,3 @@
1
+
2
+ world 'openmundi/world.db', setup: 'countries'
3
+
@@ -0,0 +1,51 @@
1
+
2
+ puts "[eval] self in top = #{self.class.name}"
3
+
4
+ task :at => :importbuiltin do
5
+ puts "[eval] self in data (enter) = #{self.class.name}"
6
+ football 'openfootball/at-austria'
7
+ puts "[eval] self in data (leave) = #{self.class.name}"
8
+ end
9
+
10
+ task :at_2014_15 => :importbuiltin do
11
+ football 'openfootball/at-austria', setup: '2014-15'
12
+ end
13
+
14
+
15
+ task :at_recalc => :at do
16
+ calc do
17
+ [['at.2012/13'],
18
+ ['at.2013/14'],
19
+ ['at.2014/15', 'at.2.2014/15']].each do |event_key|
20
+ recalc_standings( event_key, out_root: './build/at-autria' )
21
+ end
22
+ end
23
+ end
24
+
25
+ task :at_2014_15_recalc => :at_2014_15 do
26
+ calc do
27
+ recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-autria' )
28
+ end
29
+ end
30
+
31
+
32
+ task :test_at_recalc => :env do
33
+ calc do
34
+ recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-autria' )
35
+ ## debug verison - write to ./build/at-austria
36
+ ## recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-austria' )
37
+ end
38
+ end
39
+
40
+
41
+ ## check if method def works too
42
+
43
+ puts "[eval] another self in top = #{self.class.name}"
44
+
45
+
46
+ def test_hello()
47
+ puts "[eval] self in method test_hello = #{self.class.name}"
48
+ puts "hello from test_hello"
49
+ end
50
+
51
+ test_hello()
data/test/helper.rb CHANGED
@@ -3,8 +3,9 @@
3
3
  require 'minitest/autorun'
4
4
 
5
5
  ## deps
6
- require 'worlddb'
7
- require 'sportdb' # note: will include worlddb
6
+
7
+ ### require 'worlddb'
8
+ require 'sportdb/models' # note: will include worlddb
8
9
 
9
10
 
10
11
  ## our own code
data/test/test_builder.rb CHANGED
@@ -34,6 +34,10 @@ EOS
34
34
  ## datafile.read
35
35
 
36
36
  datafile.dump
37
+
38
+ ## change worker (defaults to ZipWorker)
39
+ datafile.worker = Datafile::FileWorker.new( datafile )
40
+ datafile.dump
37
41
 
38
42
  assert true # if we get here - test success
39
43
  end
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_builder2.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ def recalc_standings( *args )
11
+ puts "[eval] self in method recal_standings = #{self.class.name}"
12
+ puts "hello from (global) recalc_standings"
13
+ end
14
+
15
+
16
+ class TestBuilder2 < MiniTest::Test
17
+
18
+ def test_builder
19
+
20
+ builder = Datafile::BuilderEx.load_file( "#{Datafile.root}/test/datafile2/at.rb" )
21
+
22
+ registry = Datafile::FileDataset.registry
23
+ registry.merge( openfootball: '../../openfootball' )
24
+
25
+ datafiles = builder.datafiles
26
+ datafiles.each do |datafile|
27
+ puts "=== datafile '#{datafile.name}' => #{datafile.deps.inspect}:"
28
+ puts " #{datafile.datasets.size} datasets, #{datafile.scripts.size} scripts"
29
+ datafile.dump
30
+ datafile.calc ## try calc (call dummy calculations for testing)
31
+ end
32
+
33
+ assert true # if we get here - test success
34
+ end
35
+
36
+ end # class TestBuilder2
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_file_dataset_registry.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestFileDatasetRegistry < MiniTest::Test
11
+
12
+ def test_lookup
13
+ registry = Datafile::FileDatasetRegistry.new
14
+ registry.merge( openfootball: '../../openfootball' )
15
+
16
+ assert_equal '../../openfootball/at-austria', registry.lookup( 'openfootball/at-austria')
17
+ assert_equal '../../openmundi/world.db', registry.lookup( 'openmundi/world.db' )
18
+
19
+ assert_equal '../../openfootball/at-austria', registry.lookup!( 'openfootball/at-austria')
20
+ assert_equal '../../openmundi/world.db', registry.lookup!( 'openmundi/world.db' )
21
+
22
+ assert_equal '../../openfootball/national-teams', registry.lookup( 'openfootball/national-teams')
23
+ assert_equal '../../openfootball/euro-cup', registry.lookup( 'openfootball/euro-cup' )
24
+ end
25
+
26
+ end # class TestFileDatasetRegistry
27
+
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_file_worker.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestFileWorker < MiniTest::Test
11
+
12
+ def test_eurocup
13
+
14
+ world_datafile = Datafile::Datafile.load_file( "#{Datafile.root}/test/datafile/world.rb" )
15
+ world_datafile.dump
16
+
17
+ eurocup_datafile = Datafile::Datafile.load_file( "#{Datafile.root}/test/datafile/eurocup.rb" )
18
+ eurocup_datafile.dump
19
+
20
+ # database setup 'n' config
21
+ ActiveRecord::Base.establish_connection( adapter: 'sqlite3',
22
+ database: ':memory:' )
23
+ SportDb.create_all
24
+ SportDb.read_builtin
25
+
26
+ ## change worker (defaults to ZipWorker)
27
+ world_datafile.worker = Datafile::FileWorker.new( world_datafile )
28
+ world_datafile.dump
29
+ world_datafile.read
30
+
31
+ registry = Datafile::FileDataset.registry
32
+ registry.merge( openfootball: '../../openfootball' )
33
+
34
+ eurocup_datafile.worker = Datafile::FileWorker.new( eurocup_datafile )
35
+ eurocup_datafile.dump
36
+ eurocup_datafile.read
37
+
38
+ assert true # if we get here - test success
39
+ end
40
+
41
+ end # class TestFileWorker
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datafile
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-29 00:00:00.000000000 Z
11
+ date: 2015-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logutils
@@ -96,14 +96,28 @@ files:
96
96
  - Rakefile
97
97
  - lib/datafile.rb
98
98
  - lib/datafile/builder.rb
99
+ - lib/datafile/builder2.rb
99
100
  - lib/datafile/datafile.rb
100
- - lib/datafile/datasets/beer.rb
101
101
  - lib/datafile/datasets/dataset.rb
102
- - lib/datafile/datasets/football.rb
103
- - lib/datafile/datasets/world.rb
104
102
  - lib/datafile/version.rb
103
+ - lib/datafile/workers/dataset.rb
104
+ - lib/datafile/workers/file/dataset.rb
105
+ - lib/datafile/workers/file/registry.rb
106
+ - lib/datafile/workers/file/worker.rb
107
+ - lib/datafile/workers/zip/beer.rb
108
+ - lib/datafile/workers/zip/dataset.rb
109
+ - lib/datafile/workers/zip/football.rb
110
+ - lib/datafile/workers/zip/worker.rb
111
+ - lib/datafile/workers/zip/world.rb
112
+ - test/datafile/eurocup.rb
113
+ - test/datafile/stadiums.rb
114
+ - test/datafile/world.rb
115
+ - test/datafile2/at.rb
105
116
  - test/helper.rb
106
117
  - test/test_builder.rb
118
+ - test/test_builder2.rb
119
+ - test/test_file_dataset_registry.rb
120
+ - test/test_file_worker.rb
107
121
  homepage: https://github.com/rubylibs/datafile
108
122
  licenses:
109
123
  - Public Domain
@@ -131,4 +145,7 @@ signing_key:
131
145
  specification_version: 4
132
146
  summary: datafile - builder for downloading n reading datasets
133
147
  test_files:
148
+ - test/test_builder2.rb
134
149
  - test/test_builder.rb
150
+ - test/test_file_dataset_registry.rb
151
+ - test/test_file_worker.rb
@@ -1,18 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Datafile
4
-
5
- class BeerDataset < Dataset
6
-
7
- def initialize( name, opts={} )
8
- super( name, opts )
9
- end
10
-
11
- def read()
12
- logger.info( "read beer-dataset '#{@name}', '#{setup}'" )
13
-
14
- BeerDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root() )
15
- end
16
- end # class BeerDataset
17
-
18
- end # module Datafile
@@ -1,18 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Datafile
4
-
5
- class FootballDataset < Dataset
6
-
7
- def initialize( name, opts={} )
8
- super( name, opts )
9
- end
10
-
11
- def read()
12
- logger.info( "read football-dataset '#{@name}', '#{setup}'" )
13
-
14
- SportDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root() )
15
- end
16
- end # class FootballDataset
17
-
18
- end # module Datafile
@@ -1,19 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Datafile
4
-
5
- class WorldDataset < Dataset
6
-
7
- def initialize( name, opts={} )
8
- super( name, opts )
9
- end
10
-
11
- def read()
12
- logger.info( "read world-dataset '#{@name}', '#{setup}'" )
13
-
14
- WorldDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root(), { skip_tags: true } )
15
- end
16
- end # class WorldDataset
17
-
18
- end # module Datafile
19
-