csvpack 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -4
- data/README.md +354 -354
- data/Rakefile +32 -32
- data/lib/csvpack.rb +52 -52
- data/lib/csvpack/downloader.rb +72 -62
- data/lib/csvpack/pack.rb +47 -7
- data/lib/csvpack/version.rb +22 -22
- data/test/helper.rb +7 -7
- data/test/test_companies.rb +62 -61
- data/test/test_countries.rb +41 -40
- data/test/test_downloader.rb +32 -32
- data/test/test_import.rb +22 -22
- metadata +2 -2
data/Rakefile
CHANGED
@@ -1,32 +1,32 @@
|
|
1
|
-
require 'hoe'
|
2
|
-
require './lib/csvpack/version.rb'
|
3
|
-
|
4
|
-
Hoe.spec 'csvpack' do
|
5
|
-
|
6
|
-
self.version = CsvPack::VERSION
|
7
|
-
|
8
|
-
self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
|
9
|
-
self.description = summary
|
10
|
-
|
11
|
-
self.urls = ['https://github.com/csv11/csvpack']
|
12
|
-
|
13
|
-
self.author = 'Gerald Bauer'
|
14
|
-
self.email = 'ruby-talk@ruby-lang.org'
|
15
|
-
|
16
|
-
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file = 'README.md'
|
18
|
-
self.history_file = 'HISTORY.md'
|
19
|
-
|
20
|
-
self.extra_deps = [
|
21
|
-
['logutils', '>=0.6.1'],
|
22
|
-
['fetcher', '>=0.4.5'],
|
23
|
-
['activerecord', '>=5.0.0'],
|
24
|
-
]
|
25
|
-
|
26
|
-
self.licenses = ['Public Domain']
|
27
|
-
|
28
|
-
self.spec_extras = {
|
29
|
-
required_ruby_version: '>= 2.2.2'
|
30
|
-
}
|
31
|
-
|
32
|
-
end
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/csvpack/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'csvpack' do
|
5
|
+
|
6
|
+
self.version = CsvPack::VERSION
|
7
|
+
|
8
|
+
self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/csv11/csvpack']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'ruby-talk@ruby-lang.org'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['logutils', '>=0.6.1'],
|
22
|
+
['fetcher', '>=0.4.5'],
|
23
|
+
['activerecord', '>=5.0.0'],
|
24
|
+
]
|
25
|
+
|
26
|
+
self.licenses = ['Public Domain']
|
27
|
+
|
28
|
+
self.spec_extras = {
|
29
|
+
required_ruby_version: '>= 2.2.2'
|
30
|
+
}
|
31
|
+
|
32
|
+
end
|
data/lib/csvpack.rb
CHANGED
@@ -1,52 +1,52 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
require 'pp'
|
5
|
-
require 'forwardable'
|
6
|
-
|
7
|
-
### csv
|
8
|
-
require 'csv'
|
9
|
-
require 'json'
|
10
|
-
require 'fileutils'
|
11
|
-
|
12
|
-
|
13
|
-
### downloader
|
14
|
-
require 'fetcher'
|
15
|
-
|
16
|
-
### activerecord w/ sqlite3
|
17
|
-
## require 'active_support/all' ## needed for String#binary? method
|
18
|
-
require 'active_record'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# our own code
|
23
|
-
|
24
|
-
require 'csvpack/version' ## let version always go first
|
25
|
-
require 'csvpack/pack'
|
26
|
-
require 'csvpack/downloader'
|
27
|
-
|
28
|
-
module CsvPack
|
29
|
-
|
30
|
-
def self.import( *args )
|
31
|
-
## step 1: download
|
32
|
-
dl = Downloader.new
|
33
|
-
args.each do |arg|
|
34
|
-
dl.fetch( arg )
|
35
|
-
end
|
36
|
-
|
37
|
-
## step 2: up 'n' import
|
38
|
-
args.each do |arg|
|
39
|
-
pack = Pack.new( "./pack/#{arg}/datapackage.json" )
|
40
|
-
pack.tables.each do |table|
|
41
|
-
table.up!
|
42
|
-
table.import!
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
end # module CsvPack
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
# say hello
|
52
|
-
puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'forwardable'
|
6
|
+
|
7
|
+
### csv
|
8
|
+
require 'csv'
|
9
|
+
require 'json'
|
10
|
+
require 'fileutils'
|
11
|
+
|
12
|
+
|
13
|
+
### downloader
|
14
|
+
require 'fetcher'
|
15
|
+
|
16
|
+
### activerecord w/ sqlite3
|
17
|
+
## require 'active_support/all' ## needed for String#binary? method
|
18
|
+
require 'active_record'
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
# our own code
|
23
|
+
|
24
|
+
require 'csvpack/version' ## let version always go first
|
25
|
+
require 'csvpack/pack'
|
26
|
+
require 'csvpack/downloader'
|
27
|
+
|
28
|
+
module CsvPack
|
29
|
+
|
30
|
+
def self.import( *args )
|
31
|
+
## step 1: download
|
32
|
+
dl = Downloader.new
|
33
|
+
args.each do |arg|
|
34
|
+
dl.fetch( arg )
|
35
|
+
end
|
36
|
+
|
37
|
+
## step 2: up 'n' import
|
38
|
+
args.each do |arg|
|
39
|
+
pack = Pack.new( "./pack/#{arg}/datapackage.json" )
|
40
|
+
pack.tables.each do |table|
|
41
|
+
table.up!
|
42
|
+
table.import!
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end # module CsvPack
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
# say hello
|
52
|
+
puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
data/lib/csvpack/downloader.rb
CHANGED
@@ -1,62 +1,72 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module CsvPack
|
4
|
-
|
5
|
-
class Downloader
|
6
|
-
|
7
|
-
def initialize( cache_dir='./pack' )
|
8
|
-
@cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
|
9
|
-
@worker = Fetcher::Worker.new
|
10
|
-
end
|
11
|
-
|
12
|
-
SHORTCUTS = {
|
13
|
-
## to be done
|
14
|
-
}
|
15
|
-
|
16
|
-
def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
|
17
|
-
|
18
|
-
name = name_or_shortcut_or_url
|
19
|
-
|
20
|
-
##
|
21
|
-
## e.g. try
|
22
|
-
## country-list
|
23
|
-
##
|
24
|
-
|
25
|
-
## url_base = "http://data.okfn.org/data/core/#{name}"
|
26
|
-
url_base = "https://datahub.io/core/#{name}"
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module CsvPack
|
4
|
+
|
5
|
+
class Downloader
|
6
|
+
|
7
|
+
def initialize( cache_dir='./pack' )
|
8
|
+
@cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
|
9
|
+
@worker = Fetcher::Worker.new
|
10
|
+
end
|
11
|
+
|
12
|
+
SHORTCUTS = {
|
13
|
+
## to be done
|
14
|
+
}
|
15
|
+
|
16
|
+
def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
|
17
|
+
|
18
|
+
name = name_or_shortcut_or_url
|
19
|
+
|
20
|
+
##
|
21
|
+
## e.g. try
|
22
|
+
## country-list
|
23
|
+
##
|
24
|
+
|
25
|
+
## url_base = "http://data.okfn.org/data/core/#{name}"
|
26
|
+
## url_base = "https://datahub.io/core/#{name}"
|
27
|
+
|
28
|
+
## or use "https://github.com/datasets/#{name}/raw/master"
|
29
|
+
url_base = "https://raw.githubusercontent.com/datasets/#{name}/master"
|
30
|
+
|
31
|
+
|
32
|
+
url = "#{url_base}/datapackage.json"
|
33
|
+
|
34
|
+
dest_dir = "#{@cache_dir}/#{name}"
|
35
|
+
FileUtils.mkdir_p( dest_dir )
|
36
|
+
|
37
|
+
pack_path = "#{dest_dir}/datapackage.json" ## todo/fix: rename to meta_path - why? why not?
|
38
|
+
@worker.copy( url, pack_path )
|
39
|
+
|
40
|
+
h = Meta.load_file( pack_path )
|
41
|
+
pp h
|
42
|
+
|
43
|
+
## copy resources (tables)
|
44
|
+
h.resources.each do |r|
|
45
|
+
puts "== resource:"
|
46
|
+
pp r
|
47
|
+
|
48
|
+
res_name = r['name']
|
49
|
+
res_relative_path = r['path'] ## fix/todo: might no contain the url - is now res_url_or_relative_path !!!!!
|
50
|
+
if res_relative_path.nil?
|
51
|
+
res_relative_path = "#{res_name}.csv"
|
52
|
+
end
|
53
|
+
|
54
|
+
res_url = r['url'] ## check - old package format - url NO longer used!!!!
|
55
|
+
if res_url.nil?
|
56
|
+
## build url
|
57
|
+
res_url = "#{url_base}/#{res_relative_path}"
|
58
|
+
end
|
59
|
+
|
60
|
+
## todo/fix: rename - use just res_path - why? why not?
|
61
|
+
local_res_path = "#{dest_dir}/#{res_relative_path}"
|
62
|
+
puts "[debug] local_res_path: >#{local_res_path}<"
|
63
|
+
local_res_dir = File.dirname( local_res_path )
|
64
|
+
FileUtils.mkdir_p( local_res_dir )
|
65
|
+
|
66
|
+
@worker.copy( res_url, local_res_path )
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end # class Downloader
|
71
|
+
|
72
|
+
end # module CsvPack
|
data/lib/csvpack/pack.rb
CHANGED
@@ -5,6 +5,47 @@
|
|
5
5
|
|
6
6
|
module CsvPack
|
7
7
|
|
8
|
+
|
9
|
+
|
10
|
+
class Meta ## Pack(age) Meta / Manifest / Descriptor
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
def self.load_file( path )
|
14
|
+
text = File.open( path, 'r:utf-8' ).read
|
15
|
+
load( text )
|
16
|
+
end
|
17
|
+
## todo: add alias method read
|
18
|
+
|
19
|
+
def self.load( text )
|
20
|
+
hash = JSON.parse( text )
|
21
|
+
new( hash )
|
22
|
+
end
|
23
|
+
## todo: add alias method parse
|
24
|
+
|
25
|
+
|
26
|
+
def initialize( h )
|
27
|
+
@h = h
|
28
|
+
end
|
29
|
+
|
30
|
+
def name() @h['name']; end
|
31
|
+
def title() @h['title']; end
|
32
|
+
def license() @h['license']; end
|
33
|
+
|
34
|
+
## todo/fix: wrap resource in a class - why? why not?
|
35
|
+
def resources() @h['resources']; end
|
36
|
+
|
37
|
+
##############
|
38
|
+
def_delegators :@h, :[] ## todo/fix: add some more hash delgates - why? why not?
|
39
|
+
|
40
|
+
def pretty_print( printer )
|
41
|
+
printer.text "Meta<#{object_id} @h.name=#{name}, ...>"
|
42
|
+
end
|
43
|
+
end # class Meta
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
8
49
|
class Pack
|
9
50
|
## load (tabular) datapackage into memory
|
10
51
|
def initialize( path )
|
@@ -13,16 +54,15 @@ class Pack
|
|
13
54
|
## - check: if path is a folder/directory
|
14
55
|
## (auto-)add /datapackage.json
|
15
56
|
|
16
|
-
|
17
|
-
@h = JSON.parse( text )
|
57
|
+
@meta = Meta.load_file( path )
|
18
58
|
|
19
59
|
pack_dir = File.dirname(path)
|
20
60
|
|
21
|
-
|
61
|
+
pp @meta
|
22
62
|
|
23
63
|
## read in tables
|
24
64
|
@tables = []
|
25
|
-
@
|
65
|
+
@meta.resources.each do |r|
|
26
66
|
## build table data
|
27
67
|
@tables << build_tab( r, pack_dir )
|
28
68
|
end
|
@@ -30,9 +70,8 @@ class Pack
|
|
30
70
|
## pp @tables
|
31
71
|
end
|
32
72
|
|
33
|
-
def
|
34
|
-
|
35
|
-
def license() @h['license']; end
|
73
|
+
def meta() @meta; end ## delegate known meta props (e.g. name, title, etc. - why? why not?)
|
74
|
+
|
36
75
|
|
37
76
|
def tables() @tables; end
|
38
77
|
## convenience method - return first table
|
@@ -189,6 +228,7 @@ class Tab
|
|
189
228
|
'datetime' => :datetime,
|
190
229
|
'date' => :date,
|
191
230
|
'time' => :time,
|
231
|
+
'year' => :string, ## note: map year for now to string - anything better? why? why not?
|
192
232
|
}
|
193
233
|
|
194
234
|
def dump_schema
|
data/lib/csvpack/version.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module CsvPack
|
4
|
-
|
5
|
-
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
-
MINOR =
|
7
|
-
PATCH = 0
|
8
|
-
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
-
|
10
|
-
def self.version
|
11
|
-
VERSION
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.banner
|
15
|
-
"csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.root
|
19
|
-
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
20
|
-
end
|
21
|
-
|
22
|
-
end # module CsvPack
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module CsvPack
|
4
|
+
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 2
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module CsvPack
|
data/test/helper.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
|
2
|
-
## minitest setup
|
3
|
-
require 'minitest/autorun'
|
4
|
-
|
5
|
-
|
6
|
-
## our own code
|
7
|
-
require 'csvpack'
|
1
|
+
|
2
|
+
## minitest setup
|
3
|
+
require 'minitest/autorun'
|
4
|
+
|
5
|
+
|
6
|
+
## our own code
|
7
|
+
require 'csvpack'
|