csvpack 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -4
- data/README.md +354 -354
- data/Rakefile +32 -32
- data/lib/csvpack.rb +52 -52
- data/lib/csvpack/downloader.rb +72 -62
- data/lib/csvpack/pack.rb +47 -7
- data/lib/csvpack/version.rb +22 -22
- data/test/helper.rb +7 -7
- data/test/test_companies.rb +62 -61
- data/test/test_countries.rb +41 -40
- data/test/test_downloader.rb +32 -32
- data/test/test_import.rb +22 -22
- metadata +2 -2
data/Rakefile
CHANGED
@@ -1,32 +1,32 @@
|
|
1
|
-
require 'hoe'
|
2
|
-
require './lib/csvpack/version.rb'
|
3
|
-
|
4
|
-
Hoe.spec 'csvpack' do
|
5
|
-
|
6
|
-
self.version = CsvPack::VERSION
|
7
|
-
|
8
|
-
self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
|
9
|
-
self.description = summary
|
10
|
-
|
11
|
-
self.urls = ['https://github.com/csv11/csvpack']
|
12
|
-
|
13
|
-
self.author = 'Gerald Bauer'
|
14
|
-
self.email = 'ruby-talk@ruby-lang.org'
|
15
|
-
|
16
|
-
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file = 'README.md'
|
18
|
-
self.history_file = 'HISTORY.md'
|
19
|
-
|
20
|
-
self.extra_deps = [
|
21
|
-
['logutils', '>=0.6.1'],
|
22
|
-
['fetcher', '>=0.4.5'],
|
23
|
-
['activerecord', '>=5.0.0'],
|
24
|
-
]
|
25
|
-
|
26
|
-
self.licenses = ['Public Domain']
|
27
|
-
|
28
|
-
self.spec_extras = {
|
29
|
-
required_ruby_version: '>= 2.2.2'
|
30
|
-
}
|
31
|
-
|
32
|
-
end
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/csvpack/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'csvpack' do
|
5
|
+
|
6
|
+
self.version = CsvPack::VERSION
|
7
|
+
|
8
|
+
self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/csv11/csvpack']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'ruby-talk@ruby-lang.org'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['logutils', '>=0.6.1'],
|
22
|
+
['fetcher', '>=0.4.5'],
|
23
|
+
['activerecord', '>=5.0.0'],
|
24
|
+
]
|
25
|
+
|
26
|
+
self.licenses = ['Public Domain']
|
27
|
+
|
28
|
+
self.spec_extras = {
|
29
|
+
required_ruby_version: '>= 2.2.2'
|
30
|
+
}
|
31
|
+
|
32
|
+
end
|
data/lib/csvpack.rb
CHANGED
@@ -1,52 +1,52 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
require 'pp'
|
5
|
-
require 'forwardable'
|
6
|
-
|
7
|
-
### csv
|
8
|
-
require 'csv'
|
9
|
-
require 'json'
|
10
|
-
require 'fileutils'
|
11
|
-
|
12
|
-
|
13
|
-
### downloader
|
14
|
-
require 'fetcher'
|
15
|
-
|
16
|
-
### activerecord w/ sqlite3
|
17
|
-
## require 'active_support/all' ## needed for String#binary? method
|
18
|
-
require 'active_record'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# our own code
|
23
|
-
|
24
|
-
require 'csvpack/version' ## let version always go first
|
25
|
-
require 'csvpack/pack'
|
26
|
-
require 'csvpack/downloader'
|
27
|
-
|
28
|
-
module CsvPack
|
29
|
-
|
30
|
-
def self.import( *args )
|
31
|
-
## step 1: download
|
32
|
-
dl = Downloader.new
|
33
|
-
args.each do |arg|
|
34
|
-
dl.fetch( arg )
|
35
|
-
end
|
36
|
-
|
37
|
-
## step 2: up 'n' import
|
38
|
-
args.each do |arg|
|
39
|
-
pack = Pack.new( "./pack/#{arg}/datapackage.json" )
|
40
|
-
pack.tables.each do |table|
|
41
|
-
table.up!
|
42
|
-
table.import!
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
end # module CsvPack
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
# say hello
|
52
|
-
puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'forwardable'
|
6
|
+
|
7
|
+
### csv
|
8
|
+
require 'csv'
|
9
|
+
require 'json'
|
10
|
+
require 'fileutils'
|
11
|
+
|
12
|
+
|
13
|
+
### downloader
|
14
|
+
require 'fetcher'
|
15
|
+
|
16
|
+
### activerecord w/ sqlite3
|
17
|
+
## require 'active_support/all' ## needed for String#binary? method
|
18
|
+
require 'active_record'
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
# our own code
|
23
|
+
|
24
|
+
require 'csvpack/version' ## let version always go first
|
25
|
+
require 'csvpack/pack'
|
26
|
+
require 'csvpack/downloader'
|
27
|
+
|
28
|
+
module CsvPack
|
29
|
+
|
30
|
+
def self.import( *args )
|
31
|
+
## step 1: download
|
32
|
+
dl = Downloader.new
|
33
|
+
args.each do |arg|
|
34
|
+
dl.fetch( arg )
|
35
|
+
end
|
36
|
+
|
37
|
+
## step 2: up 'n' import
|
38
|
+
args.each do |arg|
|
39
|
+
pack = Pack.new( "./pack/#{arg}/datapackage.json" )
|
40
|
+
pack.tables.each do |table|
|
41
|
+
table.up!
|
42
|
+
table.import!
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end # module CsvPack
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
# say hello
|
52
|
+
puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
data/lib/csvpack/downloader.rb
CHANGED
@@ -1,62 +1,72 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module CsvPack
|
4
|
-
|
5
|
-
class Downloader
|
6
|
-
|
7
|
-
def initialize( cache_dir='./pack' )
|
8
|
-
@cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
|
9
|
-
@worker = Fetcher::Worker.new
|
10
|
-
end
|
11
|
-
|
12
|
-
SHORTCUTS = {
|
13
|
-
## to be done
|
14
|
-
}
|
15
|
-
|
16
|
-
def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
|
17
|
-
|
18
|
-
name = name_or_shortcut_or_url
|
19
|
-
|
20
|
-
##
|
21
|
-
## e.g. try
|
22
|
-
## country-list
|
23
|
-
##
|
24
|
-
|
25
|
-
## url_base = "http://data.okfn.org/data/core/#{name}"
|
26
|
-
url_base = "https://datahub.io/core/#{name}"
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module CsvPack
|
4
|
+
|
5
|
+
class Downloader
|
6
|
+
|
7
|
+
def initialize( cache_dir='./pack' )
|
8
|
+
@cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
|
9
|
+
@worker = Fetcher::Worker.new
|
10
|
+
end
|
11
|
+
|
12
|
+
SHORTCUTS = {
|
13
|
+
## to be done
|
14
|
+
}
|
15
|
+
|
16
|
+
def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
|
17
|
+
|
18
|
+
name = name_or_shortcut_or_url
|
19
|
+
|
20
|
+
##
|
21
|
+
## e.g. try
|
22
|
+
## country-list
|
23
|
+
##
|
24
|
+
|
25
|
+
## url_base = "http://data.okfn.org/data/core/#{name}"
|
26
|
+
## url_base = "https://datahub.io/core/#{name}"
|
27
|
+
|
28
|
+
## or use "https://github.com/datasets/#{name}/raw/master"
|
29
|
+
url_base = "https://raw.githubusercontent.com/datasets/#{name}/master"
|
30
|
+
|
31
|
+
|
32
|
+
url = "#{url_base}/datapackage.json"
|
33
|
+
|
34
|
+
dest_dir = "#{@cache_dir}/#{name}"
|
35
|
+
FileUtils.mkdir_p( dest_dir )
|
36
|
+
|
37
|
+
pack_path = "#{dest_dir}/datapackage.json" ## todo/fix: rename to meta_path - why? why not?
|
38
|
+
@worker.copy( url, pack_path )
|
39
|
+
|
40
|
+
h = Meta.load_file( pack_path )
|
41
|
+
pp h
|
42
|
+
|
43
|
+
## copy resources (tables)
|
44
|
+
h.resources.each do |r|
|
45
|
+
puts "== resource:"
|
46
|
+
pp r
|
47
|
+
|
48
|
+
res_name = r['name']
|
49
|
+
res_relative_path = r['path'] ## fix/todo: might no contain the url - is now res_url_or_relative_path !!!!!
|
50
|
+
if res_relative_path.nil?
|
51
|
+
res_relative_path = "#{res_name}.csv"
|
52
|
+
end
|
53
|
+
|
54
|
+
res_url = r['url'] ## check - old package format - url NO longer used!!!!
|
55
|
+
if res_url.nil?
|
56
|
+
## build url
|
57
|
+
res_url = "#{url_base}/#{res_relative_path}"
|
58
|
+
end
|
59
|
+
|
60
|
+
## todo/fix: rename - use just res_path - why? why not?
|
61
|
+
local_res_path = "#{dest_dir}/#{res_relative_path}"
|
62
|
+
puts "[debug] local_res_path: >#{local_res_path}<"
|
63
|
+
local_res_dir = File.dirname( local_res_path )
|
64
|
+
FileUtils.mkdir_p( local_res_dir )
|
65
|
+
|
66
|
+
@worker.copy( res_url, local_res_path )
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end # class Downloader
|
71
|
+
|
72
|
+
end # module CsvPack
|
data/lib/csvpack/pack.rb
CHANGED
@@ -5,6 +5,47 @@
|
|
5
5
|
|
6
6
|
module CsvPack
|
7
7
|
|
8
|
+
|
9
|
+
|
10
|
+
class Meta ## Pack(age) Meta / Manifest / Descriptor
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
def self.load_file( path )
|
14
|
+
text = File.open( path, 'r:utf-8' ).read
|
15
|
+
load( text )
|
16
|
+
end
|
17
|
+
## todo: add alias method read
|
18
|
+
|
19
|
+
def self.load( text )
|
20
|
+
hash = JSON.parse( text )
|
21
|
+
new( hash )
|
22
|
+
end
|
23
|
+
## todo: add alias method parse
|
24
|
+
|
25
|
+
|
26
|
+
def initialize( h )
|
27
|
+
@h = h
|
28
|
+
end
|
29
|
+
|
30
|
+
def name() @h['name']; end
|
31
|
+
def title() @h['title']; end
|
32
|
+
def license() @h['license']; end
|
33
|
+
|
34
|
+
## todo/fix: wrap resource in a class - why? why not?
|
35
|
+
def resources() @h['resources']; end
|
36
|
+
|
37
|
+
##############
|
38
|
+
def_delegators :@h, :[] ## todo/fix: add some more hash delgates - why? why not?
|
39
|
+
|
40
|
+
def pretty_print( printer )
|
41
|
+
printer.text "Meta<#{object_id} @h.name=#{name}, ...>"
|
42
|
+
end
|
43
|
+
end # class Meta
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
8
49
|
class Pack
|
9
50
|
## load (tabular) datapackage into memory
|
10
51
|
def initialize( path )
|
@@ -13,16 +54,15 @@ class Pack
|
|
13
54
|
## - check: if path is a folder/directory
|
14
55
|
## (auto-)add /datapackage.json
|
15
56
|
|
16
|
-
|
17
|
-
@h = JSON.parse( text )
|
57
|
+
@meta = Meta.load_file( path )
|
18
58
|
|
19
59
|
pack_dir = File.dirname(path)
|
20
60
|
|
21
|
-
|
61
|
+
pp @meta
|
22
62
|
|
23
63
|
## read in tables
|
24
64
|
@tables = []
|
25
|
-
@
|
65
|
+
@meta.resources.each do |r|
|
26
66
|
## build table data
|
27
67
|
@tables << build_tab( r, pack_dir )
|
28
68
|
end
|
@@ -30,9 +70,8 @@ class Pack
|
|
30
70
|
## pp @tables
|
31
71
|
end
|
32
72
|
|
33
|
-
def
|
34
|
-
|
35
|
-
def license() @h['license']; end
|
73
|
+
def meta() @meta; end ## delegate known meta props (e.g. name, title, etc. - why? why not?)
|
74
|
+
|
36
75
|
|
37
76
|
def tables() @tables; end
|
38
77
|
## convenience method - return first table
|
@@ -189,6 +228,7 @@ class Tab
|
|
189
228
|
'datetime' => :datetime,
|
190
229
|
'date' => :date,
|
191
230
|
'time' => :time,
|
231
|
+
'year' => :string, ## note: map year for now to string - anything better? why? why not?
|
192
232
|
}
|
193
233
|
|
194
234
|
def dump_schema
|
data/lib/csvpack/version.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module CsvPack
|
4
|
-
|
5
|
-
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
-
MINOR =
|
7
|
-
PATCH = 0
|
8
|
-
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
-
|
10
|
-
def self.version
|
11
|
-
VERSION
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.banner
|
15
|
-
"csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.root
|
19
|
-
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
20
|
-
end
|
21
|
-
|
22
|
-
end # module CsvPack
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module CsvPack
|
4
|
+
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 2
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module CsvPack
|
data/test/helper.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
|
2
|
-
## minitest setup
|
3
|
-
require 'minitest/autorun'
|
4
|
-
|
5
|
-
|
6
|
-
## our own code
|
7
|
-
require 'csvpack'
|
1
|
+
|
2
|
+
## minitest setup
|
3
|
+
require 'minitest/autorun'
|
4
|
+
|
5
|
+
|
6
|
+
## our own code
|
7
|
+
require 'csvpack'
|