datafile 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26b6312f588cc22d664d552ca2f01e6e11482f3d
4
- data.tar.gz: 3afcbe6af0340685f5358a9f0713a4631a59e5ee
3
+ metadata.gz: 62443719632bb6ef1b75ce5b79ef146e1ddca9b1
4
+ data.tar.gz: 5d39b5537c4f280cb395d2be2e9abec702d910b4
5
5
  SHA512:
6
- metadata.gz: 78dddc2a46795eb19b25819c9cc95e3d5202b81dcd11851ddbce92ac7efd883815495a6224546baaa53fbefaeb944abbdc686645b16ac3c9464bec7ccb491f5c
7
- data.tar.gz: 48ae309c57d5a09b77d81d40e9542473427928c26ffabb28b9e4e7f18af941de1196f0f17a43440d9456a155111ddec61c9dfef7ae1854a3e8cb2c25ecbbc30c
6
+ metadata.gz: 04882e94add84a5c478025535375760c21ea006714a1f6e3d8b92660d98477d460065306082ac4ebf321621c7782d99beac8ae0ab7f75f28c3d430ef80658e84
7
+ data.tar.gz: 1ece9f87e091abf9fb494777aaf626a647188a788b4ede359bfb0a940eb53dffb50db90e6354d59cb128b383a980968928ccbfdfcc86eacd8fef766f11bf55b7
data/Manifest.txt CHANGED
@@ -4,11 +4,25 @@ README.md
4
4
  Rakefile
5
5
  lib/datafile.rb
6
6
  lib/datafile/builder.rb
7
+ lib/datafile/builder2.rb
7
8
  lib/datafile/datafile.rb
8
- lib/datafile/datasets/beer.rb
9
9
  lib/datafile/datasets/dataset.rb
10
- lib/datafile/datasets/football.rb
11
- lib/datafile/datasets/world.rb
12
10
  lib/datafile/version.rb
11
+ lib/datafile/workers/dataset.rb
12
+ lib/datafile/workers/file/dataset.rb
13
+ lib/datafile/workers/file/registry.rb
14
+ lib/datafile/workers/file/worker.rb
15
+ lib/datafile/workers/zip/beer.rb
16
+ lib/datafile/workers/zip/dataset.rb
17
+ lib/datafile/workers/zip/football.rb
18
+ lib/datafile/workers/zip/worker.rb
19
+ lib/datafile/workers/zip/world.rb
20
+ test/datafile/eurocup.rb
21
+ test/datafile/stadiums.rb
22
+ test/datafile/world.rb
23
+ test/datafile2/at.rb
13
24
  test/helper.rb
14
25
  test/test_builder.rb
26
+ test/test_builder2.rb
27
+ test/test_file_dataset_registry.rb
28
+ test/test_file_worker.rb
data/lib/datafile.rb CHANGED
@@ -8,12 +8,24 @@ require 'logutils'
8
8
  require 'datafile/version' # let it always go first
9
9
 
10
10
  require 'datafile/datasets/dataset'
11
- require 'datafile/datasets/football'
12
- require 'datafile/datasets/beer'
13
- require 'datafile/datasets/world'
11
+
12
+ require 'datafile/workers/dataset'
13
+
14
+ require 'datafile/workers/file/dataset'
15
+ require 'datafile/workers/file/registry'
16
+ require 'datafile/workers/file/worker'
17
+
18
+ require 'datafile/workers/zip/dataset'
19
+ require 'datafile/workers/zip/beer'
20
+ require 'datafile/workers/zip/football'
21
+ require 'datafile/workers/zip/world'
22
+ require 'datafile/workers/zip/worker'
23
+
14
24
  require 'datafile/datafile'
15
25
  require 'datafile/builder'
26
+ require 'datafile/builder2'
16
27
 
17
28
 
18
29
  # say hello
19
- puts Datafile.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
30
+ puts Datafile.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
31
+
@@ -2,15 +2,7 @@
2
2
 
3
3
  module Datafile
4
4
 
5
- class Builder
6
-
7
- include LogUtils::Logging
8
-
9
- attr_reader :datafile
10
-
11
- def initialize()
12
- @datafile = Datafile.new
13
- end
5
+ class Builder ## "simple" builder (one file, one datafile)
14
6
 
15
7
  def self.load_file( path )
16
8
  code = File.read_utf8( path )
@@ -24,6 +16,14 @@ class Builder
24
16
  end
25
17
 
26
18
 
19
+ include LogUtils::Logging
20
+
21
+ def initialize
22
+ @datafile = Datafile.new
23
+ end
24
+
25
+ attr_reader :datafile
26
+
27
27
  def beer( name, opts={} )
28
28
  logger.info( "[builder] add beer-dataset '#{name}'" )
29
29
  @datafile.datasets << BeerDataset.new( name, opts )
@@ -41,3 +41,4 @@ class Builder
41
41
 
42
42
  end # class Builder
43
43
  end # module Datafile
44
+
@@ -0,0 +1,90 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+
6
+ ###
7
+ ## check/todo: ename to BatchBuilder, MultiBuilder,etc - find better name - why, why not??
8
+
9
+ class BuilderEx
10
+
11
+ def self.load_file( path )
12
+ code = File.read_utf8( path )
13
+ self.load( code )
14
+ end
15
+
16
+ def self.load( code )
17
+ builder = BuilderEx.new
18
+ builder.instance_eval( code )
19
+ builder
20
+ end
21
+
22
+
23
+ include LogUtils::Logging
24
+
25
+ def initialize
26
+ @datafiles = []
27
+ @datafile = nil
28
+ end
29
+
30
+ attr_reader :datafiles
31
+
32
+
33
+ def task( arg )
34
+
35
+ logger.info( "[builder] add task '#{arg.inspect}' : #{arg.class.name}" )
36
+
37
+ if arg.kind_of?( String ) || arg.kind_of?( Symbol ) # e.g. 'at' or :at
38
+ name = arg.to_s
39
+ ## note: always default to FileWorker for now
40
+ ## -- use file: true -- find better name e.g. worker/source: file - why? why not??
41
+ @datafile = Datafile.new( name: name, deps: [], file: true )
42
+ yield ### execute block in context
43
+ ## b = Builder.new
44
+ ## block.call( b ) ## same as b.instance_eval( &block) ???
45
+ ## b.instance_eval( code )
46
+ ## b = Builder.load( &block )
47
+ elsif arg.kind_of?( Hash ) ## Hash e.g. :at_calc => :at etc.
48
+ key = arg.keys.first
49
+ value = arg[key] ## todo: check if single value? always turn into array
50
+
51
+ name = key.to_s ## get first key (assume it's name)
52
+ if value.kind_of?( Array )
53
+ deps = value.map { |v| v.to_s } ## convert to strings
54
+ else ## assume single string/symbol -- convert to array
55
+ deps = [value.to_s]
56
+ end
57
+ @datafile = Datafile.new( name: name, deps: deps, file: true ) ## note: always default to FileWorker for now
58
+ yield ### execute block in context
59
+ ## to be done
60
+ else
61
+ ## fix: report error: unknown type
62
+ end
63
+
64
+ @datafiles << @datafile
65
+ end
66
+
67
+ def calc( &block )
68
+ logger.info( "[builder] add script calc-block" )
69
+ @datafile.scripts << Script.new( block )
70
+ end
71
+
72
+ ################################
73
+ # "classic/standard" datasets
74
+ def beer( name, opts={} )
75
+ logger.info( "[builder] add beer-dataset '#{name}'" )
76
+ @datafile.datasets << BeerDataset.new( name, opts )
77
+ end
78
+
79
+ def football( name, opts={} )
80
+ logger.info( "[builder] add football-dataset '#{name}'" )
81
+ @datafile.datasets << FootballDataset.new( name, opts )
82
+ end
83
+
84
+ def world( name, opts={} )
85
+ logger.info( "[builder] add world-dataset '#{name}'" )
86
+ @datafile.datasets << WorldDataset.new( name, opts )
87
+ end
88
+
89
+ end # class Builder2
90
+ end # module Datafile
@@ -2,13 +2,26 @@
2
2
 
3
3
  module Datafile
4
4
 
5
- class Datafile
6
-
5
+ class Script
7
6
  include LogUtils::Logging
8
7
 
9
- attr_reader :datasets
8
+ def initialize( proc )
9
+ @proc = proc
10
+ end
11
+
12
+ def call
13
+ logger.info( "[script] calling calc block" )
14
+ @proc.call
15
+ end
16
+
17
+ def dump
18
+ puts " script: #{@proc.inspect}"
19
+ end
20
+ end ## class Script
10
21
 
11
22
 
23
+ class Datafile
24
+
12
25
  ## convenience method - use like Datafile.load_file()
13
26
  def self.load_file( path='./Datafile' )
14
27
  code = File.read_utf8( path )
@@ -26,27 +39,67 @@ class Datafile
26
39
  end
27
40
 
28
41
 
29
- def initialize
42
+ include LogUtils::Logging
43
+
44
+ def initialize( opts={} )
45
+ @opts = opts
30
46
  @datasets = []
47
+ @scripts = [] ## calculation scripts (calc blocks)
48
+
49
+ ## (target)name - return nil if noname (set/defined/assigned)
50
+ @name = opts[:name] || nil
51
+ ## deps (dependencies) - note: always returns an array (empty array if no deps)
52
+ @deps = opts[:deps] || []
53
+
54
+ if opts[:file]
55
+ @worker = FileWorker.new( self )
56
+ else
57
+ ## default to zip worker for now
58
+ @worker = ZipWorker.new( self )
59
+ end
31
60
  end
32
61
 
62
+ attr_reader :datasets
63
+ attr_reader :scripts ## calc(ulation) scripts (calc blocks)
64
+ attr_reader :name
65
+ attr_reader :deps ## dep(endencies)
66
+
67
+ attr_accessor :worker # lets you change worker - find a better way - how, why, why not??
68
+
33
69
 
34
70
  def run
35
71
  logger.info( "[datafile] begin - run" )
36
- download() # step 1 - download zips for datasets
37
- read() # step 2 - read in datasets from zips
72
+ download # step 1 - download zips for datasets
73
+ read # step 2 - read in datasets from zips
74
+ calc # step 3 - run calc(ulations) scripts
38
75
  logger.info( "[datafile] end - run" )
39
76
  end
40
77
 
41
78
 
42
-
43
79
  def download
44
80
  logger.info( "[datafile] dowload" )
45
- @datasets.each do |dataset|
46
- dataset.download()
47
- end
81
+ @worker.download
82
+ ## check: use @worker.download( @datasets) - why, why not?? link worker w/ datafile - why, why not??
83
+ end
84
+
85
+ def read
86
+ logger.info( "[datafile] read" )
87
+ @worker.read
88
+ end
89
+
90
+ def calc
91
+ logger.info( "[datafile] calc" )
92
+ @worker.calc
48
93
  end
49
94
 
95
+ def dump
96
+ ## for debugging dump datasets (note: will/might also check if zip exits)
97
+ logger.info( "[datafile] dump datasets (for debugging)" )
98
+ @worker.dump
99
+ end
100
+
101
+
102
+ =begin
50
103
  def download_world ## only dl world datasets (skip all others)
51
104
  logger.info( "[datafile] dowload world datasets" )
52
105
  @datasets.each do |dataset|
@@ -79,21 +132,14 @@ class Datafile
79
132
  end
80
133
  end
81
134
  end
135
+ =end
82
136
 
83
137
 
84
-
85
-
86
- def read
87
- logger.info( "[datafile] read" )
88
- @datasets.each do |dataset|
89
- dataset.read()
90
- end
91
- end
92
-
138
+ =begin
93
139
  def read_world
94
140
  logger.info( "[datafile] read world datasets" )
95
141
  @datasets.each do |dataset|
96
- if dataset.kind_of? WorldDataset
142
+ if dataset.kind_of?( WorldDataset )
97
143
  dataset.read()
98
144
  else
99
145
  # skip all others
@@ -104,7 +150,7 @@ class Datafile
104
150
  def read_beer
105
151
  logger.info( "[datafile] read beer datasets" )
106
152
  @datasets.each do |dataset|
107
- if dataset.kind_of? BeerDataset
153
+ if dataset.kind_of?( BeerDataset )
108
154
  dataset.read()
109
155
  else
110
156
  # skip all others
@@ -115,23 +161,14 @@ class Datafile
115
161
  def read_football
116
162
  logger.info( "[datafile] read football datasets" )
117
163
  @datasets.each do |dataset|
118
- if dataset.kind_of? FootballDataset
164
+ if dataset.kind_of?( FootballDataset )
119
165
  dataset.read()
120
166
  else
121
167
  # skip all others
122
168
  end
123
169
  end
124
170
  end
125
-
126
-
127
- def dump
128
- ## for debugging dump datasets (note: will/might also check if zip exits)
129
- logger.info( "[datafile] dump datasets (for debugging)" )
130
- @datasets.each do |dataset|
131
- dataset.dump()
132
- end
133
- end
134
-
171
+ =end
135
172
 
136
173
  end # class Datafile
137
174
  end # module Datafile
@@ -10,71 +10,42 @@ class Dataset
10
10
  @opts = opts
11
11
  end
12
12
 
13
+ attr_reader :name
14
+ attr_reader :opts
13
15
 
14
16
  def setup
15
17
  value = @opts[:setup] || 'all'
16
18
  "setups/#{value}"
17
19
  end
20
+ end # class Dataset
18
21
 
19
- def remote_zip_url # remote zip url
20
- ### note: use http:// for now - lets us use (personal proxy NOT working w/ https) for now
21
- ## "https://github.com/#{@name}/archive/master.zip"
22
- "http://github.com/#{@name}/archive/master.zip"
23
- end
24
-
25
- def local_zip_name
26
- ### note: replace / in name w/ --I--
27
- ## e.g. flatten the filename, that is, do NOT include any folders
28
- @name.gsub('/', '--I--') # note: will NOT include/return .zip extension
29
- end
30
22
 
31
- def local_zip_root
32
- "./tmp"
33
- end
34
-
35
- def local_zip_path # local zip path
36
- "#{local_zip_root}/#{local_zip_name}.zip"
23
+ class WorldDataset < Dataset
24
+ def initialize( name, opts={} )
25
+ super( name, opts ) ## todo/check: just juse super (e.g. pass along all params - why? why not?)
37
26
  end
27
+
28
+ def zip_worker() WorldZipDataset.new( self ); end ## check: change (rename) just use zip or use worker_zip?? - why, why not?
29
+ def file_worker() WorldFileDataset.new( self ); end
30
+ end # class WorldDataset
38
31
 
39
-
40
- def download
41
- logger.info( "download dataset '#{@name}'" )
42
- logger.info( " from '#{remote_zip_url}'" )
43
- logger.info( " to '#{local_zip_path}'..." )
44
-
45
- download_blob( remote_zip_url, local_zip_path )
32
+ class FootballDataset < Dataset
33
+ def initialize( name, opts={} )
34
+ super( name, opts )
46
35
  end
47
36
 
37
+ def zip_worker() FootballZipDataset.new( self ); end
38
+ def file_worker() FootballFileDataset.new( self ); end
39
+ end # class FootballDataset
48
40
 
49
- def dump
50
- ## for debuggin dump dataset (also check if zip exits)
51
- puts "dataset '#{@name}' opts=#{@opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
52
- puts " local '#{local_zip_name}' (#{local_zip_path})"
53
- if File.exist?( local_zip_path )
54
- puts " size: #{File.size(local_zip_path)} bytes"
55
- else
56
- puts " (file not found)"
57
- end
58
- puts " remote '#{remote_zip_url}'"
41
+ class BeerDataset < Dataset
42
+ def initialize( name, opts={} )
43
+ super( name, opts )
59
44
  end
60
45
 
46
+ def zip_worker() BeerZipDataset.new( self ); end
47
+ def file_worker() BeerFileDataset.new( self ); end
48
+ end # class BeerDataset
61
49
 
62
- private
63
- ####
64
- # download tasks for zips
65
- def download_blob( url, dest )
66
- logger.info "downloading #{url} to #{dest}..."
67
-
68
- ## make sure dest path exists
69
- dest_p = File.dirname( dest )
70
- FileUtils.mkdir_p( dest_p ) unless File.exists?( dest_p ) ## use Dir.exists?? why? why not??
71
-
72
- worker = Fetcher::Worker.new
73
- worker.copy( url, dest )
74
- ## print some file stats
75
- logger.debug " size: #{File.size(dest)} bytes"
76
- end
77
-
78
- end # class Dataset
79
50
 
80
51
  end # module Datafile
@@ -3,8 +3,8 @@
3
3
  module Datafile
4
4
 
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 1
7
- PATCH = 2
6
+ MINOR = 2
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class Error < StandardError
6
+ end
7
+
8
+ ####
9
+ # todo/check:
10
+ # rename to DatasetNotFound or similar??
11
+ # use "common" error class - why? why not?
12
+ class DatasetNotFoundError < Error
13
+ attr_reader :message
14
+
15
+ def initialize( message )
16
+ @message = message
17
+ end
18
+
19
+ def to_s
20
+ "datset not found => #{@message}"
21
+ end
22
+ end
23
+
24
+
25
+ class DatasetNode ### find a better name (e.g. DatasetWorker, DatasetBase, DatasetRef, DatasetWrapper ???) ???
26
+
27
+ include LogUtils::Logging
28
+
29
+ def initialize( dataset )
30
+ @dataset = dataset
31
+ end
32
+
33
+ def name() @dataset.name; end
34
+ def opts() @dataset.opts; end
35
+ def setup() @dataset.setup; end
36
+
37
+ end # class DatasetNode
38
+
39
+
40
+ end # module Datafile
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FileDataset < DatasetNode
6
+ ## read dataset from file(system)
7
+
8
+ @@registry = nil
9
+
10
+ def self.registry
11
+ ## use ||= why, why not?? - add Registry as nested class, why, why not ??
12
+ if @@registry.nil?
13
+ @@registry = FileDatasetRegistry.new
14
+ end
15
+ @@registry
16
+ end
17
+
18
+ def initialize( dataset )
19
+ super( dataset )
20
+ end
21
+
22
+ def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
23
+ registry.lookup( name )
24
+ end
25
+
26
+ def dump
27
+ ## for debuggin dump dataset -- todo (also check if folder exits ??)
28
+ puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
29
+ puts " repo-dir '#{repo_dir}'"
30
+ end
31
+
32
+ private
33
+ def registry ## convenience method to access "static" shared class variable
34
+ FileDataset.registry ## self.registry not working?? - or self.registry() -why, why not??
35
+ end
36
+ end # class FileDataset
37
+
38
+
39
+
40
+ class FootballFileDataset < FileDataset
41
+
42
+ def initialize( dataset )
43
+ super( dataset )
44
+ end
45
+
46
+ def read()
47
+ logger.info( "read football-dataset (file) '#{name}', '#{setup}'" )
48
+
49
+ SportDb.read_setup( setup, repo_dir )
50
+ end
51
+ end # class FootballFileDataset
52
+
53
+
54
+ class WorldFileDataset < FileDataset
55
+
56
+ def initialize( dataset )
57
+ super( dataset )
58
+ end
59
+
60
+ def read()
61
+ logger.info( "read world-dataset (file) '#{name}', '#{setup}'" )
62
+
63
+ ## WorldDb.read_setup( 'setups/countries', WORLD_DB_INCLUDE_PATH, skip_tags: true )
64
+ WorldDb.read_setup( setup, repo_dir, skip_tags: true )
65
+ end
66
+ end # class WorldFileDataset
67
+
68
+ class BeerFileDataset < FileDataset
69
+
70
+ def initialize( dataset )
71
+ super( dataset )
72
+ end
73
+
74
+ def read()
75
+ logger.info( "read beer-dataset (file) '#{name}', '#{setup}'" )
76
+
77
+ BeerDb.read_setup( setup, repo_dir )
78
+ end
79
+ end # class BeerFileDataset
80
+
81
+
82
+ end # module Datafile
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FileDatasetRegistry
6
+ ## store mapping for (local) datasets e.g.
7
+ ## map github handle e.g. openfootball/at-austria to local path
8
+
9
+ include LogUtils::Logging
10
+
11
+ def initialize
12
+ ## org rootpaths
13
+ @roots = {}
14
+ ## org defaults (use merge to overwrite for now)
15
+ @roots[:openmundi] = '../../openmundi' ## OPENMUNDI_ROOT = "../../openmundi"
16
+ @roots[:openfootball] = '..' ## OPENFOOTBALL_ROOT = ".."
17
+ @roots[:openbeer] = '..'
18
+ end
19
+
20
+ def merge( hash )
21
+ ## todo: add support for merging project mappings too
22
+ ## use merge_roots and merge_projects ?? why, why not??
23
+
24
+ @roots = @roots.merge( hash )
25
+ end
26
+
27
+ def lookup( name ) lookup_worker( name, false ); end ## false=>return nil; do NOT fail w/ excep
28
+ def lookup!(name ) lookup_worker( name, true ); end ## true=>throw except;
29
+
30
+ private
31
+ def lookup_worker( name, fail_on_error )
32
+ ### fix: use lookup! version for exption and lookup (w/ returning nil) - why, why not??
33
+
34
+ ## split name in org/user + project (e.g. openfootball/at-austria)
35
+ parts = name.split( '/' )
36
+ ## check/todo: assert parts == 2 -- why, why not??
37
+ root = @roots[ parts[0].to_sym ]
38
+ if root.nil?
39
+ msg = "no mapping found for '#{parts[0]}' in '#{name}'"
40
+ logger.error( msg )
41
+ if fail_on_error
42
+ raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
43
+ else
44
+ return nil
45
+ end
46
+ end
47
+
48
+ path = "#{root}/#{parts[1]}"
49
+ ## check if folder/directory exists
50
+ unless File.exist?( path )
51
+ msg = "no file found for '#{name}'; expected '#{path}'"
52
+ logger.error( msg )
53
+ if fail_on_error
54
+ raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
55
+ else
56
+ return nil
57
+ end
58
+ end
59
+ ### check for File.directory?( path ) too - why, why not???
60
+ path
61
+ end
62
+
63
+ end # class FileDatasetRegistry
64
+
65
+ end # module Datafile
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FileWorker ## check: rename to FileDatafileWorker?? or FileDatafile -why, why not ??
6
+
7
+ include LogUtils::Logging
8
+
9
+ def initialize( datafile )
10
+ @datafile = datafile
11
+ end
12
+
13
+ def download
14
+ ## note: do NOTHING for now; assume repo already present (unpacked) on local filesystem
15
+ end
16
+
17
+ def read
18
+ @datafile.datasets.each do |dataset|
19
+ dataset.file_worker.read
20
+ end
21
+ end
22
+
23
+ def calc
24
+ @datafile.scripts.each do |script|
25
+ script.call
26
+ end
27
+ end
28
+
29
+ def dump
30
+ @datafile.datasets.each do |dataset|
31
+ dataset.file_worker.dump
32
+ end
33
+
34
+ ## also dump scripts
35
+ @datafile.scripts.each do |script|
36
+ script.dump
37
+ end
38
+ end
39
+
40
+ end # class FileWorker
41
+
42
+ end # module Datafile
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class BeerZipDataset < ZipDataset
6
+
7
+ def initialize( dataset )
8
+ super( dataset )
9
+ end
10
+
11
+ def read
12
+ logger.info( "read beer-dataset (zip) '#{name}', '#{setup}'" )
13
+
14
+ BeerDb.read_setup_from_zip( local_zip_name, setup, local_zip_root )
15
+ end
16
+ end # class BeerZipDataset
17
+
18
+ end # module Datafile
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+
6
+ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why not ???
7
+ ## read dataset from zip(archive)
8
+
9
+ def initialize( dataset )
10
+ super( dataset )
11
+ end
12
+
13
+ def remote_zip_url # remote zip url
14
+ ### note: use http:// for now - lets us use (personal proxy NOT working w/ https) for now
15
+ ## "https://github.com/#{@name}/archive/master.zip"
16
+ "http://github.com/#{name}/archive/master.zip"
17
+ end
18
+
19
+ def local_zip_name
20
+ ### note: replace / in name w/ --I--
21
+ ## e.g. flatten the filename, that is, do NOT include any folders
22
+ name.gsub('/', '--I--') # note: will NOT include/return .zip extension
23
+ end
24
+
25
+ def local_zip_root
26
+ "./tmp"
27
+ end
28
+
29
+ def local_zip_path # local zip path
30
+ "#{local_zip_root}/#{local_zip_name}.zip"
31
+ end
32
+
33
+
34
+ def download
35
+ logger.info( "download dataset '#{name}'" )
36
+ logger.info( " from '#{remote_zip_url}'" )
37
+ logger.info( " to '#{local_zip_path}'..." )
38
+
39
+ download_blob( remote_zip_url, local_zip_path )
40
+ end
41
+
42
+
43
+ def dump
44
+ ## for debuggin dump dataset (also check if zip exits)
45
+ puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
46
+ puts " local '#{local_zip_name}' (#{local_zip_path})"
47
+ if File.exist?( local_zip_path )
48
+ puts " size: #{File.size(local_zip_path)} bytes"
49
+ else
50
+ puts " (file not found)"
51
+ end
52
+ puts " remote '#{remote_zip_url}'"
53
+ end
54
+
55
+
56
+ private
57
+ ####
58
+ # download tasks for zips
59
+ def download_blob( url, dest )
60
+ logger.info "downloading #{url} to #{dest}..."
61
+
62
+ ## make sure dest path exists
63
+ dest_p = File.dirname( dest )
64
+ FileUtils.mkdir_p( dest_p ) unless File.exists?( dest_p ) ## use Dir.exists?? why? why not??
65
+
66
+ worker = Fetcher::Worker.new
67
+ worker.copy( url, dest )
68
+ ## print some file stats
69
+ logger.debug " size: #{File.size(dest)} bytes"
70
+ end
71
+
72
+ end # class DatasetZip
73
+
74
+ end # module Datafile
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class FootballZipDataset < ZipDataset
6
+
7
+ def initialize( dataset )
8
+ super( dataset )
9
+ end
10
+
11
+ def read
12
+ logger.info( "read football-dataset (zip) '#{name}', '#{setup}'" )
13
+
14
+ SportDb.read_setup_from_zip( local_zip_name, setup, local_zip_root )
15
+ end
16
+ end # class FootballZipDataset
17
+
18
+ end # module Datafile
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class ZipWorker ## check: rename to ZipDatafileWorker?? or ZipDatafile -why, why not ??
6
+
7
+ include LogUtils::Logging
8
+
9
+ def initialize( datafile )
10
+ @datafile = datafile
11
+ end
12
+
13
+ def download
14
+ @datafile.datasets.each do |dataset|
15
+ dataset.zip_worker.download
16
+ end
17
+ end
18
+
19
+ def read
20
+ @datafile.datasets.each do |dataset|
21
+ dataset.zip_worker.read
22
+ end
23
+ end
24
+
25
+ def calc
26
+ @datafile.scripts.each do |script|
27
+ script.call
28
+ end
29
+ end
30
+
31
+ def dump
32
+ @datafile.datasets.each do |dataset|
33
+ dataset.zip_worker.dump
34
+ end
35
+ ## also dump scripts
36
+ @datafile.scripts.each do |script|
37
+ script.dump
38
+ end
39
+ end
40
+
41
+ end # class ZipWorker
42
+
43
+ end # module Datafile
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ module Datafile
4
+
5
+ class WorldZipDataset < ZipDataset
6
+
7
+ def initialize( dataset )
8
+ super( dataset )
9
+ end
10
+
11
+ def read
12
+ logger.info( "read world-dataset (zip) '#{name}', '#{setup}'" )
13
+
14
+ WorldDb.read_setup_from_zip( local_zip_name, setup, local_zip_root, { skip_tags: true } )
15
+ end
16
+ end # class WorldZipDataset
17
+
18
+ end # module Datafile
@@ -0,0 +1,6 @@
1
+ #####################
2
+ # national teams
3
+
4
+ football 'openfootball/national-teams'
5
+ football 'openfootball/euro-cup'
6
+
@@ -0,0 +1,2 @@
1
+
2
+ football 'openfootball/stadiums' ## NOTE: default is setup: 'all'
@@ -0,0 +1,3 @@
1
+
2
+ world 'openmundi/world.db', setup: 'countries'
3
+
@@ -0,0 +1,51 @@
1
+
2
+ puts "[eval] self in top = #{self.class.name}"
3
+
4
+ task :at => :importbuiltin do
5
+ puts "[eval] self in data (enter) = #{self.class.name}"
6
+ football 'openfootball/at-austria'
7
+ puts "[eval] self in data (leave) = #{self.class.name}"
8
+ end
9
+
10
+ task :at_2014_15 => :importbuiltin do
11
+ football 'openfootball/at-austria', setup: '2014-15'
12
+ end
13
+
14
+
15
+ task :at_recalc => :at do
16
+ calc do
17
+ [['at.2012/13'],
18
+ ['at.2013/14'],
19
+ ['at.2014/15', 'at.2.2014/15']].each do |event_key|
20
+ recalc_standings( event_key, out_root: './build/at-autria' )
21
+ end
22
+ end
23
+ end
24
+
25
+ task :at_2014_15_recalc => :at_2014_15 do
26
+ calc do
27
+ recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-autria' )
28
+ end
29
+ end
30
+
31
+
32
+ task :test_at_recalc => :env do
33
+ calc do
34
+ recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-autria' )
35
+ ## debug verison - write to ./build/at-austria
36
+ ## recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-austria' )
37
+ end
38
+ end
39
+
40
+
41
+ ## check if method def works too
42
+
43
+ puts "[eval] another self in top = #{self.class.name}"
44
+
45
+
46
+ def test_hello()
47
+ puts "[eval] self in method test_hello = #{self.class.name}"
48
+ puts "hello from test_hello"
49
+ end
50
+
51
+ test_hello()
data/test/helper.rb CHANGED
@@ -3,8 +3,9 @@
3
3
  require 'minitest/autorun'
4
4
 
5
5
  ## deps
6
- require 'worlddb'
7
- require 'sportdb' # note: will include worlddb
6
+
7
+ ### require 'worlddb'
8
+ require 'sportdb/models' # note: will include worlddb
8
9
 
9
10
 
10
11
  ## our own code
data/test/test_builder.rb CHANGED
@@ -34,6 +34,10 @@ EOS
34
34
  ## datafile.read
35
35
 
36
36
  datafile.dump
37
+
38
+ ## change worker (defaults to ZipWorker)
39
+ datafile.worker = Datafile::FileWorker.new( datafile )
40
+ datafile.dump
37
41
 
38
42
  assert true # if we get here - test success
39
43
  end
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_builder2.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ def recalc_standings( *args )
11
+ puts "[eval] self in method recal_standings = #{self.class.name}"
12
+ puts "hello from (global) recalc_standings"
13
+ end
14
+
15
+
16
+ class TestBuilder2 < MiniTest::Test
17
+
18
+ def test_builder
19
+
20
+ builder = Datafile::BuilderEx.load_file( "#{Datafile.root}/test/datafile2/at.rb" )
21
+
22
+ registry = Datafile::FileDataset.registry
23
+ registry.merge( openfootball: '../../openfootball' )
24
+
25
+ datafiles = builder.datafiles
26
+ datafiles.each do |datafile|
27
+ puts "=== datafile '#{datafile.name}' => #{datafile.deps.inspect}:"
28
+ puts " #{datafile.datasets.size} datasets, #{datafile.scripts.size} scripts"
29
+ datafile.dump
30
+ datafile.calc ## try calc (call dummy calculations for testing)
31
+ end
32
+
33
+ assert true # if we get here - test success
34
+ end
35
+
36
+ end # class TestBuilder2
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_file_dataset_registry.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestFileDatasetRegistry < MiniTest::Test
11
+
12
+ def test_lookup
13
+ registry = Datafile::FileDatasetRegistry.new
14
+ registry.merge( openfootball: '../../openfootball' )
15
+
16
+ assert_equal '../../openfootball/at-austria', registry.lookup( 'openfootball/at-austria')
17
+ assert_equal '../../openmundi/world.db', registry.lookup( 'openmundi/world.db' )
18
+
19
+ assert_equal '../../openfootball/at-austria', registry.lookup!( 'openfootball/at-austria')
20
+ assert_equal '../../openmundi/world.db', registry.lookup!( 'openmundi/world.db' )
21
+
22
+ assert_equal '../../openfootball/national-teams', registry.lookup( 'openfootball/national-teams')
23
+ assert_equal '../../openfootball/euro-cup', registry.lookup( 'openfootball/euro-cup' )
24
+ end
25
+
26
+ end # class TestFileDatasetRegistry
27
+
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_file_worker.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestFileWorker < MiniTest::Test
11
+
12
+ def test_eurocup
13
+
14
+ world_datafile = Datafile::Datafile.load_file( "#{Datafile.root}/test/datafile/world.rb" )
15
+ world_datafile.dump
16
+
17
+ eurocup_datafile = Datafile::Datafile.load_file( "#{Datafile.root}/test/datafile/eurocup.rb" )
18
+ eurocup_datafile.dump
19
+
20
+ # database setup 'n' config
21
+ ActiveRecord::Base.establish_connection( adapter: 'sqlite3',
22
+ database: ':memory:' )
23
+ SportDb.create_all
24
+ SportDb.read_builtin
25
+
26
+ ## change worker (defaults to ZipWorker)
27
+ world_datafile.worker = Datafile::FileWorker.new( world_datafile )
28
+ world_datafile.dump
29
+ world_datafile.read
30
+
31
+ registry = Datafile::FileDataset.registry
32
+ registry.merge( openfootball: '../../openfootball' )
33
+
34
+ eurocup_datafile.worker = Datafile::FileWorker.new( eurocup_datafile )
35
+ eurocup_datafile.dump
36
+ eurocup_datafile.read
37
+
38
+ assert true # if we get here - test success
39
+ end
40
+
41
+ end # class TestFileWorker
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datafile
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-29 00:00:00.000000000 Z
11
+ date: 2015-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logutils
@@ -96,14 +96,28 @@ files:
96
96
  - Rakefile
97
97
  - lib/datafile.rb
98
98
  - lib/datafile/builder.rb
99
+ - lib/datafile/builder2.rb
99
100
  - lib/datafile/datafile.rb
100
- - lib/datafile/datasets/beer.rb
101
101
  - lib/datafile/datasets/dataset.rb
102
- - lib/datafile/datasets/football.rb
103
- - lib/datafile/datasets/world.rb
104
102
  - lib/datafile/version.rb
103
+ - lib/datafile/workers/dataset.rb
104
+ - lib/datafile/workers/file/dataset.rb
105
+ - lib/datafile/workers/file/registry.rb
106
+ - lib/datafile/workers/file/worker.rb
107
+ - lib/datafile/workers/zip/beer.rb
108
+ - lib/datafile/workers/zip/dataset.rb
109
+ - lib/datafile/workers/zip/football.rb
110
+ - lib/datafile/workers/zip/worker.rb
111
+ - lib/datafile/workers/zip/world.rb
112
+ - test/datafile/eurocup.rb
113
+ - test/datafile/stadiums.rb
114
+ - test/datafile/world.rb
115
+ - test/datafile2/at.rb
105
116
  - test/helper.rb
106
117
  - test/test_builder.rb
118
+ - test/test_builder2.rb
119
+ - test/test_file_dataset_registry.rb
120
+ - test/test_file_worker.rb
107
121
  homepage: https://github.com/rubylibs/datafile
108
122
  licenses:
109
123
  - Public Domain
@@ -131,4 +145,7 @@ signing_key:
131
145
  specification_version: 4
132
146
  summary: datafile - builder for downloading n reading datasets
133
147
  test_files:
148
+ - test/test_builder2.rb
134
149
  - test/test_builder.rb
150
+ - test/test_file_dataset_registry.rb
151
+ - test/test_file_worker.rb
@@ -1,18 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Datafile
4
-
5
- class BeerDataset < Dataset
6
-
7
- def initialize( name, opts={} )
8
- super( name, opts )
9
- end
10
-
11
- def read()
12
- logger.info( "read beer-dataset '#{@name}', '#{setup}'" )
13
-
14
- BeerDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root() )
15
- end
16
- end # class BeerDataset
17
-
18
- end # module Datafile
@@ -1,18 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Datafile
4
-
5
- class FootballDataset < Dataset
6
-
7
- def initialize( name, opts={} )
8
- super( name, opts )
9
- end
10
-
11
- def read()
12
- logger.info( "read football-dataset '#{@name}', '#{setup}'" )
13
-
14
- SportDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root() )
15
- end
16
- end # class FootballDataset
17
-
18
- end # module Datafile
@@ -1,19 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Datafile
4
-
5
- class WorldDataset < Dataset
6
-
7
- def initialize( name, opts={} )
8
- super( name, opts )
9
- end
10
-
11
- def read()
12
- logger.info( "read world-dataset '#{@name}', '#{setup}'" )
13
-
14
- WorldDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root(), { skip_tags: true } )
15
- end
16
- end # class WorldDataset
17
-
18
- end # module Datafile
19
-