datafile 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +0 -0
- data/Manifest.txt +23 -30
- data/Rakefile +2 -3
- data/data/football.txt +20 -11
- data/lib/datafile.rb +29 -19
- data/lib/datafile/builder.rb +1 -24
- data/lib/datafile/datafile.rb +25 -144
- data/lib/datafile/{datasets/dataset.rb → dataset.rb} +17 -25
- data/lib/datafile/version.rb +7 -4
- data/lib/datafile/workers/file/dataset.rb +20 -57
- data/lib/datafile/workers/file/registry.rb +18 -18
- data/lib/datafile/workers/file/worker.rb +4 -24
- data/lib/datafile/workers/zip/dataset.rb +19 -8
- data/lib/datafile/workers/zip/worker.rb +6 -22
- data/test/test_builder.rb +9 -9
- data/test/test_file_dataset_registry.rb +2 -3
- data/test/test_file_worker.rb +4 -5
- data/test/test_football_dataset.rb +9 -9
- metadata +10 -37
- data/.gemtest +0 -0
- data/lib/datafile/builder2.rb +0 -90
- data/lib/datafile/workers/dataset.rb +0 -40
- data/lib/datafile/workers/zip/beer.rb +0 -18
- data/lib/datafile/workers/zip/football.rb +0 -18
- data/lib/datafile/workers/zip/world.rb +0 -18
- data/test/datafile2/at.rb +0 -51
- data/test/test_builder2.rb +0 -36
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
def read_known_datasets( path )
|
8
8
|
ary = []
|
9
|
-
lines = File.
|
9
|
+
lines = File.open( path, 'r:utf-8' ).read
|
10
10
|
lines.each_line do |line|
|
11
11
|
## skip blank and comments lines
|
12
12
|
next if /^\s*#/ =~ line || /^\s*$/ =~ line
|
@@ -27,13 +27,12 @@ class Dataset
|
|
27
27
|
@opts = opts
|
28
28
|
end
|
29
29
|
|
30
|
-
attr_reader :name
|
31
|
-
|
30
|
+
attr_reader :name, :opts
|
31
|
+
|
32
|
+
## convenience helpers for known opts
|
33
|
+
def setup() @opts[:setup]; end ## note: return nil if not found/set
|
34
|
+
def format() @opts[:format] || 'txt'; end ## note: assume default is txt (other formats incl. csv) for now - why? wh not?
|
32
35
|
|
33
|
-
def setup
|
34
|
-
value = @opts[:setup] || 'all'
|
35
|
-
"setups/#{value}"
|
36
|
-
end
|
37
36
|
|
38
37
|
def file? # note: use file? (not exit? might use zip? later to check if zip exists? -why? why not?)
|
39
38
|
## hack/convenience shortcut:
|
@@ -49,7 +48,7 @@ class Dataset
|
|
49
48
|
basename = parts[1]
|
50
49
|
## e.g.
|
51
50
|
## ./ (working folder) => at-austria
|
52
|
-
## openfootball/at-austria
|
51
|
+
## openfootball/at-austria
|
53
52
|
if File.basename( Dir.getwd ) == basename
|
54
53
|
puts " bingo!! working folder >#{basename}< matches dataset"
|
55
54
|
true ## return true
|
@@ -75,23 +74,18 @@ class WorldDataset < Dataset
|
|
75
74
|
|
76
75
|
super( name, opts ) ## todo/check: just juse super (e.g. pass along all params - why? why not?)
|
77
76
|
end
|
78
|
-
|
79
|
-
def zip_worker() WorldZipDataset.new( self ); end ## check: change (rename) just use zip or use worker_zip?? - why, why not?
|
80
|
-
def file_worker() WorldFileDataset.new( self ); end
|
81
77
|
end # class WorldDataset
|
82
78
|
|
83
79
|
|
84
80
|
|
85
81
|
class FootballDataset < Dataset
|
86
82
|
|
87
|
-
|
83
|
+
def self.build_known_datasets
|
84
|
+
read_known_datasets( "#{::Datafile.data_path}/football.txt" )
|
85
|
+
end
|
88
86
|
|
89
87
|
def self.known_datasets
|
90
|
-
|
91
|
-
### todo/fix - use \\= idiom - why, why not??
|
92
|
-
if @@known_football_datasets.nil?
|
93
|
-
@@known_football_datasets = read_known_datasets( "#{::Datafile.data_path}/football.txt" )
|
94
|
-
end
|
88
|
+
@@known_football_datasets ||= build_known_datasets
|
95
89
|
@@known_football_datasets
|
96
90
|
end
|
97
91
|
|
@@ -101,7 +95,11 @@ class FootballDataset < Dataset
|
|
101
95
|
## check if name include slash (e.g. /)
|
102
96
|
## - if not auto-add openfootball/ (default)
|
103
97
|
if name_easy.index( '/' ).nil?
|
104
|
-
|
98
|
+
if opts[:format] == 'csv'
|
99
|
+
name = "footballcsv/#{name_easy}"
|
100
|
+
else
|
101
|
+
name = "openfootball/#{name_easy}"
|
102
|
+
end
|
105
103
|
else
|
106
104
|
name = name_easy ## just pass through for now
|
107
105
|
end
|
@@ -109,14 +107,11 @@ class FootballDataset < Dataset
|
|
109
107
|
super( name, opts )
|
110
108
|
|
111
109
|
### check for known datasets; warn: if not known (might be a typo)
|
112
|
-
unless
|
110
|
+
unless self.class.known_datasets.include?( name )
|
113
111
|
## todo: use logger - why, why not??
|
114
112
|
puts "*** warn: unknown football dataset '#{name}', typo ???"
|
115
113
|
end
|
116
114
|
end
|
117
|
-
|
118
|
-
def zip_worker() FootballZipDataset.new( self ); end
|
119
|
-
def file_worker() FootballFileDataset.new( self ); end
|
120
115
|
end # class FootballDataset
|
121
116
|
|
122
117
|
|
@@ -133,9 +128,6 @@ class BeerDataset < Dataset
|
|
133
128
|
|
134
129
|
super( name, opts )
|
135
130
|
end
|
136
|
-
|
137
|
-
def zip_worker() BeerZipDataset.new( self ); end
|
138
|
-
def file_worker() BeerFileDataset.new( self ); end
|
139
131
|
end # class BeerDataset
|
140
132
|
|
141
133
|
|
data/lib/datafile/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
module Datafile
|
4
4
|
|
5
5
|
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
-
MINOR =
|
7
|
-
PATCH =
|
6
|
+
MINOR = 3
|
7
|
+
PATCH = 0
|
8
8
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
9
|
|
10
10
|
def self.version
|
@@ -17,7 +17,10 @@ module Datafile
|
|
17
17
|
|
18
18
|
def self.root
|
19
19
|
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
20
|
-
end
|
20
|
+
end
|
21
21
|
|
22
|
-
|
22
|
+
def self.data_path
|
23
|
+
"#{root}/data"
|
24
|
+
end
|
23
25
|
|
26
|
+
end # module Datafile
|
@@ -2,28 +2,26 @@
|
|
2
2
|
|
3
3
|
module Datafile
|
4
4
|
|
5
|
-
class FileDataset
|
5
|
+
class FileDataset
|
6
6
|
## read dataset from file(system)
|
7
7
|
|
8
|
-
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
9
10
|
|
10
11
|
def self.registry
|
11
|
-
|
12
|
-
if @@registry.nil?
|
13
|
-
@@registry = FileDatasetRegistry.new
|
14
|
-
end
|
12
|
+
@@registry ||= FileDatasetRegistry.new
|
15
13
|
@@registry
|
16
14
|
end
|
17
15
|
|
18
16
|
def initialize( dataset )
|
19
|
-
|
17
|
+
@dataset = dataset
|
20
18
|
end
|
21
19
|
|
22
|
-
def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
|
20
|
+
def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ???
|
23
21
|
## note: for easy testing allow "in situ" datasets
|
24
22
|
## e.g. ./ (e.g. mu-mauritius) is openfootball/mu-mauritius
|
25
23
|
## split name in org/user + project (e.g. openfootball/at-austria)
|
26
|
-
parts = name.split( '/' )
|
24
|
+
parts = @dataset.name.split( '/' )
|
27
25
|
|
28
26
|
basename = parts[1]
|
29
27
|
if File.basename( Dir.getwd ) == basename
|
@@ -31,63 +29,28 @@ class FileDataset < DatasetNode
|
|
31
29
|
return Dir.getwd ## assume working directory/folder is repo dir
|
32
30
|
end
|
33
31
|
|
34
|
-
registry.lookup( name )
|
32
|
+
registry.lookup( @dataset.name )
|
35
33
|
end
|
36
34
|
|
37
35
|
def dump
|
38
36
|
## for debuggin dump dataset -- todo (also check if folder exits ??)
|
39
|
-
puts "dataset '#{name}' opts=#{opts.
|
37
|
+
puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.inspect}" ## use opts.inspect instead of to_json - why? why not?
|
40
38
|
puts " repo-dir '#{repo_dir}'"
|
41
39
|
end
|
42
40
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
47
|
-
end # class FileDataset
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
class FootballFileDataset < FileDataset
|
52
|
-
|
53
|
-
def initialize( dataset )
|
54
|
-
super( dataset )
|
55
|
-
end
|
56
|
-
|
57
|
-
def read()
|
58
|
-
logger.info( "read football-dataset (file) '#{name}', '#{setup}'" )
|
59
|
-
|
60
|
-
SportDb.read_setup( setup, repo_dir )
|
61
|
-
end
|
62
|
-
end # class FootballFileDataset
|
63
|
-
|
41
|
+
def read
|
42
|
+
if @dataset.is_a?( FootballDataset )
|
43
|
+
logger.info( "read football dataset (file) '#{@dataset.name}', '#{@dataset.setup}'" )
|
64
44
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
def read()
|
72
|
-
logger.info( "read world-dataset (file) '#{name}', '#{setup}'" )
|
73
|
-
|
74
|
-
## WorldDb.read_setup( 'setups/countries', WORLD_DB_INCLUDE_PATH, skip_tags: true )
|
75
|
-
WorldDb.read_setup( setup, repo_dir, skip_tags: true )
|
76
|
-
end
|
77
|
-
end # class WorldFileDataset
|
78
|
-
|
79
|
-
class BeerFileDataset < FileDataset
|
80
|
-
|
81
|
-
def initialize( dataset )
|
82
|
-
super( dataset )
|
83
|
-
end
|
84
|
-
|
85
|
-
def read()
|
86
|
-
logger.info( "read beer-dataset (file) '#{name}', '#{setup}'" )
|
87
|
-
|
88
|
-
BeerDb.read_setup( setup, repo_dir )
|
45
|
+
pack = SportDb::DirPackage.new( repo_dir )
|
46
|
+
pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now
|
47
|
+
else
|
48
|
+
logger.info( "TODO/FIX: read dataset (file) '#{@dataset.name}', '#{@dataset.setup}'; sorry" )
|
49
|
+
end
|
89
50
|
end
|
90
|
-
end # class BeerFileDataset
|
91
51
|
|
92
52
|
|
53
|
+
private
|
54
|
+
def registry() self.class.registry; end ## convenience method to access "static" shared class variable
|
55
|
+
end # class FileDataset
|
93
56
|
end # module Datafile
|
@@ -15,22 +15,30 @@ class FileDatasetRegistry
|
|
15
15
|
@roots[:openmundi] = '../../openmundi' ## OPENMUNDI_ROOT = "../../openmundi"
|
16
16
|
@roots[:openfootball] = '..' ## OPENFOOTBALL_ROOT = ".."
|
17
17
|
@roots[:openbeer] = '..'
|
18
|
+
|
19
|
+
@roots[:footballcsv] = '..'
|
18
20
|
end
|
19
21
|
|
20
22
|
def merge( hash )
|
21
23
|
## todo: add support for merging project mappings too
|
22
24
|
## use merge_roots and merge_projects ?? why, why not??
|
23
|
-
|
24
25
|
@roots = @roots.merge( hash )
|
25
26
|
end
|
26
27
|
|
27
|
-
def lookup( name )
|
28
|
-
|
28
|
+
def lookup( name )
|
29
|
+
path, _ = lookup_path( name ) ## note: ignore error message passed along in return
|
30
|
+
path
|
31
|
+
end
|
32
|
+
|
33
|
+
def lookup!( name )
|
34
|
+
path, error = lookup_path( name )
|
35
|
+
raise error if error
|
36
|
+
path
|
37
|
+
end
|
29
38
|
|
30
|
-
private
|
31
|
-
def lookup_worker( name, fail_on_error )
|
32
|
-
### fix: use lookup! version for exption and lookup (w/ returning nil) - why, why not??
|
33
39
|
|
40
|
+
private
|
41
|
+
def lookup_path( name )
|
34
42
|
## split name in org/user + project (e.g. openfootball/at-austria)
|
35
43
|
parts = name.split( '/' )
|
36
44
|
## check/todo: assert parts == 2 -- why, why not??
|
@@ -38,11 +46,7 @@ private
|
|
38
46
|
if root.nil?
|
39
47
|
msg = "no mapping found for '#{parts[0]}' in '#{name}'"
|
40
48
|
logger.error( msg )
|
41
|
-
|
42
|
-
raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
|
43
|
-
else
|
44
|
-
return nil
|
45
|
-
end
|
49
|
+
return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ??
|
46
50
|
end
|
47
51
|
|
48
52
|
path = "#{root}/#{parts[1]}"
|
@@ -50,16 +54,12 @@ private
|
|
50
54
|
unless File.exist?( path )
|
51
55
|
msg = "no file found for '#{name}'; expected '#{path}'"
|
52
56
|
logger.error( msg )
|
53
|
-
|
54
|
-
raise DatasetNotFoundError.new( msg ) ## throw exception FileNotFound / DatasetNotFound ??
|
55
|
-
else
|
56
|
-
return nil
|
57
|
-
end
|
57
|
+
return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ??
|
58
58
|
end
|
59
|
+
|
59
60
|
### check for File.directory?( path ) too - why, why not???
|
60
|
-
path
|
61
|
+
[path, nil] ## use go-style returns with error as second argument (as error as value)
|
61
62
|
end
|
62
|
-
|
63
63
|
end # class FileDatasetRegistry
|
64
64
|
|
65
65
|
end # module Datafile
|
@@ -15,38 +15,18 @@ class FileWorker ## check: rename to FileDatafileWorker?? or FileDatafile -wh
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def read
|
18
|
-
## note: also run inlines (setup script) before
|
19
|
-
@datafile.inlines.each do |inline|
|
20
|
-
inline.call
|
21
|
-
end
|
22
|
-
|
23
18
|
@datafile.datasets.each do |dataset|
|
24
|
-
dataset
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
def calc
|
29
|
-
@datafile.scripts.each do |script|
|
30
|
-
script.call
|
19
|
+
f = FileDataset.new( dataset )
|
20
|
+
f.read
|
31
21
|
end
|
32
22
|
end
|
33
23
|
|
34
24
|
def dump
|
35
|
-
## also dump inlines
|
36
|
-
@datafile.inlines.each do |inline|
|
37
|
-
inline.dump
|
38
|
-
end
|
39
|
-
|
40
25
|
@datafile.datasets.each do |dataset|
|
41
|
-
dataset
|
42
|
-
|
43
|
-
|
44
|
-
## also dump scripts
|
45
|
-
@datafile.scripts.each do |script|
|
46
|
-
script.dump
|
26
|
+
f = FileDataset.new( dataset )
|
27
|
+
f.dump
|
47
28
|
end
|
48
29
|
end
|
49
30
|
|
50
31
|
end # class FileWorker
|
51
|
-
|
52
32
|
end # module Datafile
|
@@ -3,23 +3,23 @@
|
|
3
3
|
module Datafile
|
4
4
|
|
5
5
|
|
6
|
-
class ZipDataset
|
6
|
+
class ZipDataset ### use (rename to) ZipDatasetWorker/Helper/Wrapper/Fetcher/Downloader - why, why not ???
|
7
7
|
## read dataset from zip(archive)
|
8
8
|
|
9
|
+
include LogUtils::Logging
|
10
|
+
|
9
11
|
def initialize( dataset )
|
10
|
-
|
12
|
+
@dataset = dataset
|
11
13
|
end
|
12
14
|
|
13
15
|
def remote_zip_url # remote zip url
|
14
|
-
|
15
|
-
## "https://github.com/#{@name}/archive/master.zip"
|
16
|
-
"http://github.com/#{name}/archive/master.zip"
|
16
|
+
"https://github.com/#{@dataset.name}/archive/master.zip"
|
17
17
|
end
|
18
18
|
|
19
19
|
def local_zip_name
|
20
20
|
### note: replace / in name w/ --I--
|
21
21
|
## e.g. flatten the filename, that is, do NOT include any folders
|
22
|
-
name.gsub('/', '--I--') # note: will NOT include/return .zip extension
|
22
|
+
@dataset.name.gsub('/', '--I--') # note: will NOT include/return .zip extension
|
23
23
|
end
|
24
24
|
|
25
25
|
def local_zip_root
|
@@ -32,7 +32,7 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
|
|
32
32
|
|
33
33
|
|
34
34
|
def download
|
35
|
-
logger.info( "download dataset '#{name}'" )
|
35
|
+
logger.info( "download dataset '#{@dataset.name}'" )
|
36
36
|
logger.info( " from '#{remote_zip_url}'" )
|
37
37
|
logger.info( " to '#{local_zip_path}'..." )
|
38
38
|
|
@@ -42,7 +42,7 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
|
|
42
42
|
|
43
43
|
def dump
|
44
44
|
## for debuggin dump dataset (also check if zip exits)
|
45
|
-
puts "dataset '#{name}' opts=#{opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
|
45
|
+
puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.to_json}" ## use opts.inspect instead of to_json - why? why not?
|
46
46
|
puts " local '#{local_zip_name}' (#{local_zip_path})"
|
47
47
|
if File.exist?( local_zip_path )
|
48
48
|
puts " size: #{File.size(local_zip_path)} bytes"
|
@@ -52,6 +52,17 @@ class ZipDataset < DatasetNode ### use(rename to) ZipDatasetWorker - why, why n
|
|
52
52
|
puts " remote '#{remote_zip_url}'"
|
53
53
|
end
|
54
54
|
|
55
|
+
def read
|
56
|
+
if @dataset.is_a?( FootballDataset )
|
57
|
+
logger.info( "read football dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'" )
|
58
|
+
|
59
|
+
pack = SportDb::ZipPackage.new( local_zip_path )
|
60
|
+
pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now
|
61
|
+
else
|
62
|
+
logger.info( "TODO/FIX: read dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'; sorry" )
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
55
66
|
|
56
67
|
private
|
57
68
|
####
|
@@ -12,38 +12,22 @@ class ZipWorker ## check: rename to ZipDatafileWorker?? or ZipDatafile -why,
|
|
12
12
|
|
13
13
|
def download
|
14
14
|
@datafile.datasets.each do |dataset|
|
15
|
-
dataset
|
15
|
+
z = ZipDataset.new( dataset )
|
16
|
+
z.download
|
16
17
|
end
|
17
18
|
end
|
18
19
|
|
19
20
|
def read
|
20
|
-
## note: also run inlines (setup script) before
|
21
|
-
@datafile.inlines.each do |inline|
|
22
|
-
inline.call
|
23
|
-
end
|
24
|
-
|
25
21
|
@datafile.datasets.each do |dataset|
|
26
|
-
dataset
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def calc
|
31
|
-
@datafile.scripts.each do |script|
|
32
|
-
script.call
|
22
|
+
z = ZipDataset.new( dataset )
|
23
|
+
z.read
|
33
24
|
end
|
34
25
|
end
|
35
26
|
|
36
27
|
def dump
|
37
|
-
## also dump inlines
|
38
|
-
@datafile.inlines.each do |inline|
|
39
|
-
inline.dump
|
40
|
-
end
|
41
28
|
@datafile.datasets.each do |dataset|
|
42
|
-
dataset
|
43
|
-
|
44
|
-
## also dump scripts
|
45
|
-
@datafile.scripts.each do |script|
|
46
|
-
script.dump
|
29
|
+
z = ZipDataset.new( dataset )
|
30
|
+
z.dump
|
47
31
|
end
|
48
32
|
end
|
49
33
|
|