datafile 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gemtest +0 -0
- data/Manifest.txt +7 -0
- data/Rakefile +3 -3
- data/lib/datafile.rb +6 -14
- data/lib/datafile/builder.rb +38 -0
- data/lib/datafile/datafile.rb +39 -0
- data/lib/datafile/datasets/dataset.rb +63 -0
- data/lib/datafile/datasets/football.rb +18 -0
- data/lib/datafile/datasets/world.rb +19 -0
- data/lib/datafile/version.rb +19 -3
- data/test/helper.rb +12 -0
- data/test/test_builder.rb +39 -0
- metadata +39 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 650b215a771b6d6b984f2a65e187a08803c383ad
|
4
|
+
data.tar.gz: 15512a5c171fea567372ed5983599330ad6312cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee1d067e74de8bc1d6ba85cb1cc235b30954d7b0e88254d487ab40a4050143439aa0d994eb34e5801cbb61a1a8c2a28da974878d43a56615118d2bae1a31fd80
|
7
|
+
data.tar.gz: 1c019e305e059a04d3cd7a25909d5616e9d13c5bdbb8a4c360eb042f04484476655e701411c2ecc24e255647c8567171607c58532bb4dd684d90a941cc38c1b1
|
data/.gemtest
ADDED
File without changes
|
data/Manifest.txt
CHANGED
@@ -3,4 +3,11 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/datafile.rb
|
6
|
+
lib/datafile/builder.rb
|
7
|
+
lib/datafile/datafile.rb
|
8
|
+
lib/datafile/datasets/dataset.rb
|
9
|
+
lib/datafile/datasets/football.rb
|
10
|
+
lib/datafile/datasets/world.rb
|
6
11
|
lib/datafile/version.rb
|
12
|
+
test/helper.rb
|
13
|
+
test/test_builder.rb
|
data/Rakefile
CHANGED
@@ -18,7 +18,9 @@ Hoe.spec 'datafile' do
|
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
|
-
['logutils'
|
21
|
+
['logutils'],
|
22
|
+
['textutils'],
|
23
|
+
['fetcher'],
|
22
24
|
]
|
23
25
|
|
24
26
|
self.licenses = ['Public Domain']
|
@@ -26,6 +28,4 @@ Hoe.spec 'datafile' do
|
|
26
28
|
self.spec_extras = {
|
27
29
|
:required_ruby_version => '>= 1.9.2'
|
28
30
|
}
|
29
|
-
|
30
|
-
|
31
31
|
end
|
data/lib/datafile.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'logutils'
|
4
4
|
|
@@ -7,19 +7,11 @@ require 'logutils'
|
|
7
7
|
|
8
8
|
require 'datafile/version' # let it always go first
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
"datafile/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.root
|
19
|
-
"#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
|
20
|
-
end
|
21
|
-
|
22
|
-
end # module Datafile
|
10
|
+
require 'datafile/datasets/dataset'
|
11
|
+
require 'datafile/datasets/football'
|
12
|
+
require 'datafile/datasets/world'
|
13
|
+
require 'datafile/datafile'
|
14
|
+
require 'datafile/builder'
|
23
15
|
|
24
16
|
|
25
17
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class Builder
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
attr_reader :datafile
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@datafile = Datafile.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.load_file( path )
|
16
|
+
code = File.read_utf8( path )
|
17
|
+
self.load( code )
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.load( code )
|
21
|
+
builder = Builder.new
|
22
|
+
builder.instance_eval( code )
|
23
|
+
builder
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def football( name, opts={} )
|
28
|
+
logger.info( "[builder] add football-dataset '#{name}'" )
|
29
|
+
@datafile.datasets << FootballDataset.new( name, opts )
|
30
|
+
end
|
31
|
+
|
32
|
+
def world( name, opts={} )
|
33
|
+
logger.info( "[builder] add world-dataset '#{name}'" )
|
34
|
+
@datafile.datasets << WorldDataset.new( name, opts )
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class Builder
|
38
|
+
end # module Datafile
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class Datafile
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
attr_reader :datasets
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@datasets = []
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def run()
|
17
|
+
logger.info( "[datafile] begin - run" )
|
18
|
+
download() # step 1 - download zips for datasets
|
19
|
+
read() # step 2 - read in datasets from zips
|
20
|
+
logger.info( "[datafile] end - run" )
|
21
|
+
end
|
22
|
+
|
23
|
+
def download()
|
24
|
+
logger.info( "[datafile] dowload" )
|
25
|
+
@datasets.each do |dataset|
|
26
|
+
dataset.download()
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def read()
|
31
|
+
logger.info( "[datafile] read" )
|
32
|
+
@datasets.each do |dataset|
|
33
|
+
dataset.read()
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class Datafile
|
38
|
+
end # module Datafile
|
39
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class Dataset
|
6
|
+
include LogUtils::Logging
|
7
|
+
|
8
|
+
def initialize( name, opts={} )
|
9
|
+
@name = name
|
10
|
+
@opts = opts
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup()
|
14
|
+
value = @opts[:setup] || 'all'
|
15
|
+
"setups/#{value}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def remote_zip_url() # remote zip url
|
19
|
+
"https://github.com/#{@name}/archive/master.zip"
|
20
|
+
end
|
21
|
+
|
22
|
+
def local_zip_name()
|
23
|
+
### note: replace / in name w/ --I--
|
24
|
+
## e.g. flatten the filename, that is, do NOT include any folders
|
25
|
+
@name.gsub('/', '--I--') # note: will NOT include/return .zip extension
|
26
|
+
end
|
27
|
+
|
28
|
+
def local_zip_root()
|
29
|
+
"./tmp"
|
30
|
+
end
|
31
|
+
|
32
|
+
def local_zip_path() # local zip path
|
33
|
+
"#{local_zip_root}/#{local_zip_name}.zip"
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def download()
|
38
|
+
logger.info( "download dataset '#{@name}'" )
|
39
|
+
logger.info( " from '#{remote_zip_url}'" )
|
40
|
+
logger.info( " to '#{local_zip_path}'..." )
|
41
|
+
|
42
|
+
download_blob( remote_zip_url, local_zip_path )
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
####
|
47
|
+
# download tasks for zips
|
48
|
+
def download_blob( url, dest )
|
49
|
+
logger.info "downloading #{url} to #{dest}..."
|
50
|
+
|
51
|
+
## make sure dest path exists
|
52
|
+
dest_p = File.dirname( dest )
|
53
|
+
FileUtils.mkdir_p( dest_p ) unless File.exists?( dest_p ) ## use Dir.exists?? why? why not??
|
54
|
+
|
55
|
+
worker = Fetcher::Worker.new
|
56
|
+
worker.copy( url, dest )
|
57
|
+
## print some file stats
|
58
|
+
logger.debug " size: #{File.size(dest)} bytes"
|
59
|
+
end
|
60
|
+
|
61
|
+
end # class Dataset
|
62
|
+
|
63
|
+
end # module Datafile
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class FootballDataset < Dataset
|
6
|
+
|
7
|
+
def initialize( name, opts={} )
|
8
|
+
super( name, opts )
|
9
|
+
end
|
10
|
+
|
11
|
+
def read()
|
12
|
+
logger.info( "read football-dataset '#{@name}', '#{setup}'" )
|
13
|
+
|
14
|
+
SportDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root() )
|
15
|
+
end
|
16
|
+
end # class FootballDataset
|
17
|
+
|
18
|
+
end # module Datafile
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Datafile
|
4
|
+
|
5
|
+
class WorldDataset < Dataset
|
6
|
+
|
7
|
+
def initialize( name, opts={} )
|
8
|
+
super( name, opts )
|
9
|
+
end
|
10
|
+
|
11
|
+
def read()
|
12
|
+
logger.info( "read world-dataset '#{@name}', '#{setup}'" )
|
13
|
+
|
14
|
+
WorldDb.read_setup_from_zip( local_zip_name(), setup(), local_zip_root(), { skip_tags: true } )
|
15
|
+
end
|
16
|
+
end # class WorldDataset
|
17
|
+
|
18
|
+
end # module Datafile
|
19
|
+
|
data/lib/datafile/version.rb
CHANGED
@@ -1,7 +1,23 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
module Datafile
|
4
|
-
VERSION = '0.0.1'
|
5
|
-
end
|
6
4
|
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 1
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"datafile/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module Datafile
|
7
23
|
|
data/test/helper.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_builder.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestBuilder < MiniTest::Test
|
11
|
+
|
12
|
+
def test_builder
|
13
|
+
code =<<EOS
|
14
|
+
## comments
|
15
|
+
|
16
|
+
world 'openmundi/world.db', setup: 'countries'
|
17
|
+
|
18
|
+
football 'openfootball/national-teams' ## NOTE: default is setup: 'all'
|
19
|
+
|
20
|
+
### todo/fix: download archive only once(!!) even if included more than once
|
21
|
+
## football 'openfootball/world-cup', setup: '2014_quali'
|
22
|
+
|
23
|
+
football 'openfootball/world-cup', setup: '2014'
|
24
|
+
|
25
|
+
## more comments
|
26
|
+
|
27
|
+
EOS
|
28
|
+
|
29
|
+
builder = Datafile::Builder.load( code )
|
30
|
+
|
31
|
+
datafile = builder.datafile
|
32
|
+
## datafile.run
|
33
|
+
## datafile.download
|
34
|
+
## datafile.read
|
35
|
+
|
36
|
+
assert true # if we get here - test success
|
37
|
+
end
|
38
|
+
|
39
|
+
end # class TestBuilder
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datafile
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
@@ -24,6 +24,34 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: textutils
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: fetcher
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: rdoc
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -61,12 +89,20 @@ extra_rdoc_files:
|
|
61
89
|
- Manifest.txt
|
62
90
|
- README.md
|
63
91
|
files:
|
92
|
+
- ".gemtest"
|
64
93
|
- HISTORY.md
|
65
94
|
- Manifest.txt
|
66
95
|
- README.md
|
67
96
|
- Rakefile
|
68
97
|
- lib/datafile.rb
|
98
|
+
- lib/datafile/builder.rb
|
99
|
+
- lib/datafile/datafile.rb
|
100
|
+
- lib/datafile/datasets/dataset.rb
|
101
|
+
- lib/datafile/datasets/football.rb
|
102
|
+
- lib/datafile/datasets/world.rb
|
69
103
|
- lib/datafile/version.rb
|
104
|
+
- test/helper.rb
|
105
|
+
- test/test_builder.rb
|
70
106
|
homepage: https://github.com/rubylibs/datafile
|
71
107
|
licenses:
|
72
108
|
- Public Domain
|
@@ -93,4 +129,5 @@ rubygems_version: 2.4.2
|
|
93
129
|
signing_key:
|
94
130
|
specification_version: 4
|
95
131
|
summary: datafile - builder for downloading n reading datasets
|
96
|
-
test_files:
|
132
|
+
test_files:
|
133
|
+
- test/test_builder.rb
|