datafile 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gemtest +0 -0
- data/Manifest.txt +7 -0
- data/Rakefile +3 -3
- data/lib/datafile.rb +6 -14
- data/lib/datafile/builder.rb +38 -0
- data/lib/datafile/datafile.rb +39 -0
- data/lib/datafile/datasets/dataset.rb +63 -0
- data/lib/datafile/datasets/football.rb +18 -0
- data/lib/datafile/datasets/world.rb +19 -0
- data/lib/datafile/version.rb +19 -3
- data/test/helper.rb +12 -0
- data/test/test_builder.rb +39 -0
- metadata +39 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 650b215a771b6d6b984f2a65e187a08803c383ad
|
4
|
+
data.tar.gz: 15512a5c171fea567372ed5983599330ad6312cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee1d067e74de8bc1d6ba85cb1cc235b30954d7b0e88254d487ab40a4050143439aa0d994eb34e5801cbb61a1a8c2a28da974878d43a56615118d2bae1a31fd80
|
7
|
+
data.tar.gz: 1c019e305e059a04d3cd7a25909d5616e9d13c5bdbb8a4c360eb042f04484476655e701411c2ecc24e255647c8567171607c58532bb4dd684d90a941cc38c1b1
|
data/.gemtest
ADDED
File without changes
|
data/Manifest.txt
CHANGED
@@ -3,4 +3,11 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/datafile.rb
|
6
|
+
lib/datafile/builder.rb
|
7
|
+
lib/datafile/datafile.rb
|
8
|
+
lib/datafile/datasets/dataset.rb
|
9
|
+
lib/datafile/datasets/football.rb
|
10
|
+
lib/datafile/datasets/world.rb
|
6
11
|
lib/datafile/version.rb
|
12
|
+
test/helper.rb
|
13
|
+
test/test_builder.rb
|
data/Rakefile
CHANGED
@@ -18,7 +18,9 @@ Hoe.spec 'datafile' do
|
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
|
-
['logutils'
|
21
|
+
['logutils'],
|
22
|
+
['textutils'],
|
23
|
+
['fetcher'],
|
22
24
|
]
|
23
25
|
|
24
26
|
self.licenses = ['Public Domain']
|
@@ -26,6 +28,4 @@ Hoe.spec 'datafile' do
|
|
26
28
|
self.spec_extras = {
|
27
29
|
:required_ruby_version => '>= 1.9.2'
|
28
30
|
}
|
29
|
-
|
30
|
-
|
31
31
|
end
|
data/lib/datafile.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'logutils'
|
4
4
|
|
@@ -7,19 +7,11 @@ require 'logutils'
|
|
7
7
|
|
8
8
|
require 'datafile/version' # let it always go first
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
"datafile/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.root
|
19
|
-
"#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
|
20
|
-
end
|
21
|
-
|
22
|
-
end # module Datafile
|
10
|
+
require 'datafile/datasets/dataset'
|
11
|
+
require 'datafile/datasets/football'
|
12
|
+
require 'datafile/datasets/world'
|
13
|
+
require 'datafile/datafile'
|
14
|
+
require 'datafile/builder'
|
23
15
|
|
24
16
|
|
25
17
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class Builder
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
attr_reader :datafile
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@datafile = Datafile.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.load_file( path )
|
16
|
+
code = File.read_utf8( path )
|
17
|
+
self.load( code )
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.load( code )
|
21
|
+
builder = Builder.new
|
22
|
+
builder.instance_eval( code )
|
23
|
+
builder
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def football( name, opts={} )
|
28
|
+
logger.info( "[builder] add football-dataset '#{name}'" )
|
29
|
+
@datafile.datasets << FootballDataset.new( name, opts )
|
30
|
+
end
|
31
|
+
|
32
|
+
def world( name, opts={} )
|
33
|
+
logger.info( "[builder] add world-dataset '#{name}'" )
|
34
|
+
@datafile.datasets << WorldDataset.new( name, opts )
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class Builder
|
38
|
+
end # module Datafile
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class Datafile
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
attr_reader :datasets
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@datasets = []
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def run()
|
17
|
+
logger.info( "[datafile] begin - run" )
|
18
|
+
download() # step 1 - download zips for datasets
|
19
|
+
read() # step 2 - read in datasets from zips
|
20
|
+
logger.info( "[datafile] end - run" )
|
21
|
+
end
|
22
|
+
|
23
|
+
def download()
|
24
|
+
logger.info( "[datafile] dowload" )
|
25
|
+
@datasets.each do |dataset|
|
26
|
+
dataset.download()
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def read()
|
31
|
+
logger.info( "[datafile] read" )
|
32
|
+
@datasets.each do |dataset|
|
33
|
+
dataset.read()
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class Datafile
|
38
|
+
end # module Datafile
|
39
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class Dataset
|
6
|
+
include LogUtils::Logging
|
7
|
+
|
8
|
+
def initialize( name, opts={} )
|
9
|
+
@name = name
|
10
|
+
@opts = opts
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup()
|
14
|
+
value = @opts[:setup] || 'all'
|
15
|
+
"setups/#{value}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def remote_zip_url() # remote zip url
|
19
|
+
"https://github.com/#{@name}/archive/master.zip"
|
20
|
+
end
|
21
|
+
|
22
|
+
def local_zip_name()
|
23
|
+
### note: replace / in name w/ --I--
|
24
|
+
## e.g. flatten the filename, that is, do NOT include any folders
|
25
|
+
@name.gsub('/', '--I--') # note: will NOT include/return .zip extension
|
26
|
+
end
|
27
|
+
|
28
|
+
def local_zip_root()
|
29
|
+
"./tmp"
|
30
|
+
end
|
31
|
+
|
32
|
+
def local_zip_path() # local zip path
|
33
|
+
"#{local_zip_root}/#{local_zip_name}.zip"
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def download()
|
38
|
+
logger.info( "download dataset '#{@name}'" )
|
39
|
+
logger.info( " from '#{remote_zip_url}'" )
|
40
|
+
logger.info( " to '#{local_zip_path}'..." )
|
41
|
+
|
42
|
+
download_blob( remote_zip_url, local_zip_path )
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
####
|
47
|
+
# download tasks for zips
|
48
|
+
def download_blob( url, dest )
|
49
|
+
logger.info "downloading #{url} to #{dest}..."
|
50
|
+
|
51
|
+
## make sure dest path exists
|
52
|
+
dest_p = File.dirname( dest )
|
53
|
+
FileUtils.mkdir_p( dest_p ) unless File.exists?( dest_p ) ## use Dir.exists?? why? why not??
|
54
|
+
|
55
|
+
worker = Fetcher::Worker.new
|
56
|
+
worker.copy( url, dest )
|
57
|
+
## print some file stats
|
58
|
+
logger.debug " size: #{File.size(dest)} bytes"
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class Dataset
|
62
|
+
|
63
|
+
end # module Datafile
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class FootballDataset < Dataset
|
6
|
+
|
7
|
+
def initialize( name, opts={} )
|
8
|
+
super( name, opts )
|
9
|
+
end
|
10
|
+
|
11
|
+
def read()
|
12
|
+
logger.info( "read football-dataset '#{@name}', '#{setup}'" )
|
13
|
+
|
14
|
+
SportDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root() )
|
15
|
+
end
|
16
|
+
end # class FootballDataset
|
17
|
+
|
18
|
+
end # module Datafile
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class WorldDataset < Dataset
|
6
|
+
|
7
|
+
def initialize( name, opts={} )
|
8
|
+
super( name, opts )
|
9
|
+
end
|
10
|
+
|
11
|
+
def read()
|
12
|
+
logger.info( "read world-dataset '#{@name}', '#{setup}'" )
|
13
|
+
|
14
|
+
WorldDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root(), { skip_tags: true } )
|
15
|
+
end
|
16
|
+
end # class WorldDataset
|
17
|
+
|
18
|
+
end # module Datafile
|
19
|
+
|
data/lib/datafile/version.rb
CHANGED
@@ -1,7 +1,23 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
module Datafile
|
4
|
-
VERSION = '0.0.1'
|
5
|
-
end
|
6
4
|
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 1
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"datafile/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module Datafile
|
7
23
|
|
data/test/helper.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_builder.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestBuilder < MiniTest::Test
|
11
|
+
|
12
|
+
def test_builder
|
13
|
+
code =<<EOS
|
14
|
+
## comments
|
15
|
+
|
16
|
+
world 'openmundi/world.db', setup: 'countries'
|
17
|
+
|
18
|
+
football 'openfootball/national-teams' ## NOTE: default is setup: 'all'
|
19
|
+
|
20
|
+
### todo/fix: download archive only once(!!) even if included more than once
|
21
|
+
## football 'openfootball/world-cup', setup: '2014_quali'
|
22
|
+
|
23
|
+
football 'openfootball/world-cup', setup: '2014'
|
24
|
+
|
25
|
+
## more comments
|
26
|
+
|
27
|
+
EOS
|
28
|
+
|
29
|
+
builder = Datafile::Builder.load( code )
|
30
|
+
|
31
|
+
datafile = builder.datafile
|
32
|
+
## datafile.run
|
33
|
+
## datafile.download
|
34
|
+
## datafile.read
|
35
|
+
|
36
|
+
assert true # if we get here - test success
|
37
|
+
end
|
38
|
+
|
39
|
+
end # class TestBuilder
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datafile
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
@@ -24,6 +24,34 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: textutils
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: fetcher
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: rdoc
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -61,12 +89,20 @@ extra_rdoc_files:
|
|
61
89
|
- Manifest.txt
|
62
90
|
- README.md
|
63
91
|
files:
|
92
|
+
- ".gemtest"
|
64
93
|
- HISTORY.md
|
65
94
|
- Manifest.txt
|
66
95
|
- README.md
|
67
96
|
- Rakefile
|
68
97
|
- lib/datafile.rb
|
98
|
+
- lib/datafile/builder.rb
|
99
|
+
- lib/datafile/datafile.rb
|
100
|
+
- lib/datafile/datasets/dataset.rb
|
101
|
+
- lib/datafile/datasets/football.rb
|
102
|
+
- lib/datafile/datasets/world.rb
|
69
103
|
- lib/datafile/version.rb
|
104
|
+
- test/helper.rb
|
105
|
+
- test/test_builder.rb
|
70
106
|
homepage: https://github.com/rubylibs/datafile
|
71
107
|
licenses:
|
72
108
|
- Public Domain
|
@@ -93,4 +129,5 @@ rubygems_version: 2.4.2
|
|
93
129
|
signing_key:
|
94
130
|
specification_version: 4
|
95
131
|
summary: datafile - builder for downloading n reading datasets
|
96
|
-
test_files:
|
132
|
+
test_files:
|
133
|
+
- test/test_builder.rb
|