csvpack 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,32 +1,32 @@
1
- require 'hoe'
2
- require './lib/csvpack/version.rb'
3
-
4
- Hoe.spec 'csvpack' do
5
-
6
- self.version = CsvPack::VERSION
7
-
8
- self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
9
- self.description = summary
10
-
11
- self.urls = ['https://github.com/csv11/csvpack']
12
-
13
- self.author = 'Gerald Bauer'
14
- self.email = 'ruby-talk@ruby-lang.org'
15
-
16
- # switch extension to .markdown for gihub formatting
17
- self.readme_file = 'README.md'
18
- self.history_file = 'HISTORY.md'
19
-
20
- self.extra_deps = [
21
- ['logutils', '>=0.6.1'],
22
- ['fetcher', '>=0.4.5'],
23
- ['activerecord', '>=5.0.0'],
24
- ]
25
-
26
- self.licenses = ['Public Domain']
27
-
28
- self.spec_extras = {
29
- required_ruby_version: '>= 2.2.2'
30
- }
31
-
32
- end
1
+ require 'hoe'
2
+ require './lib/csvpack/version.rb'
3
+
4
+ Hoe.spec 'csvpack' do
5
+
6
+ self.version = CsvPack::VERSION
7
+
8
+ self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/csv11/csvpack']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'ruby-talk@ruby-lang.org'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'HISTORY.md'
19
+
20
+ self.extra_deps = [
21
+ ['logutils', '>=0.6.1'],
22
+ ['fetcher', '>=0.4.5'],
23
+ ['activerecord', '>=5.0.0'],
24
+ ]
25
+
26
+ self.licenses = ['Public Domain']
27
+
28
+ self.spec_extras = {
29
+ required_ruby_version: '>= 2.2.2'
30
+ }
31
+
32
+ end
@@ -1,52 +1,52 @@
1
- # encoding: utf-8
2
-
3
-
4
- require 'pp'
5
- require 'forwardable'
6
-
7
- ### csv
8
- require 'csv'
9
- require 'json'
10
- require 'fileutils'
11
-
12
-
13
- ### downloader
14
- require 'fetcher'
15
-
16
- ### activerecord w/ sqlite3
17
- ## require 'active_support/all' ## needed for String#binary? method
18
- require 'active_record'
19
-
20
-
21
-
22
- # our own code
23
-
24
- require 'csvpack/version' ## let version always go first
25
- require 'csvpack/pack'
26
- require 'csvpack/downloader'
27
-
28
- module CsvPack
29
-
30
- def self.import( *args )
31
- ## step 1: download
32
- dl = Downloader.new
33
- args.each do |arg|
34
- dl.fetch( arg )
35
- end
36
-
37
- ## step 2: up 'n' import
38
- args.each do |arg|
39
- pack = Pack.new( "./pack/#{arg}/datapackage.json" )
40
- pack.tables.each do |table|
41
- table.up!
42
- table.import!
43
- end
44
- end
45
- end
46
-
47
- end # module CsvPack
48
-
49
-
50
-
51
- # say hello
52
- puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'pp'
5
+ require 'forwardable'
6
+
7
+ ### csv
8
+ require 'csv'
9
+ require 'json'
10
+ require 'fileutils'
11
+
12
+
13
+ ### downloader
14
+ require 'fetcher'
15
+
16
+ ### activerecord w/ sqlite3
17
+ ## require 'active_support/all' ## needed for String#binary? method
18
+ require 'active_record'
19
+
20
+
21
+
22
+ # our own code
23
+
24
+ require 'csvpack/version' ## let version always go first
25
+ require 'csvpack/pack'
26
+ require 'csvpack/downloader'
27
+
28
+ module CsvPack
29
+
30
+ def self.import( *args )
31
+ ## step 1: download
32
+ dl = Downloader.new
33
+ args.each do |arg|
34
+ dl.fetch( arg )
35
+ end
36
+
37
+ ## step 2: up 'n' import
38
+ args.each do |arg|
39
+ pack = Pack.new( "./pack/#{arg}/datapackage.json" )
40
+ pack.tables.each do |table|
41
+ table.up!
42
+ table.import!
43
+ end
44
+ end
45
+ end
46
+
47
+ end # module CsvPack
48
+
49
+
50
+
51
+ # say hello
52
+ puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
@@ -1,62 +1,72 @@
1
- # encoding: utf-8
2
-
3
- module CsvPack
4
-
5
- class Downloader
6
-
7
- def initialize( cache_dir='./pack' )
8
- @cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
9
- @worker = Fetcher::Worker.new
10
- end
11
-
12
- SHORTCUTS = {
13
- ## to be done
14
- }
15
-
16
- def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
17
-
18
- name = name_or_shortcut_or_url
19
-
20
- ##
21
- ## e.g. try
22
- ## country-list
23
- ##
24
-
25
- ## url_base = "http://data.okfn.org/data/core/#{name}"
26
- url_base = "https://datahub.io/core/#{name}"
27
- url = "#{url_base}/datapackage.json"
28
-
29
- dest_dir = "#{@cache_dir}/#{name}"
30
- FileUtils.mkdir_p( dest_dir )
31
-
32
- pack_path = "#{dest_dir}/datapackage.json"
33
- @worker.copy( url, pack_path )
34
-
35
- h = JSON.parse( File.read( pack_path ) )
36
- pp h
37
-
38
- ## copy resources (tables)
39
- h['resources'].each do |r|
40
- puts "== resource:"
41
- pp r
42
-
43
- res_url = r['url']
44
-
45
- res_name = r['name']
46
- res_relative_path = r['path']
47
- if res_relative_path.nil?
48
- res_relative_path = "#{res_name}.csv"
49
- end
50
-
51
- res_path = "#{dest_dir}/#{res_relative_path}"
52
- puts "[debug] res_path: >#{res_path}<"
53
- res_dir = File.dirname( res_path )
54
- FileUtils.mkdir_p( res_dir )
55
-
56
- @worker.copy( res_url, res_path )
57
- end
58
- end
59
-
60
- end # class Downloader
61
-
62
- end # module CsvPack
1
+ # encoding: utf-8
2
+
3
+ module CsvPack
4
+
5
+ class Downloader
6
+
7
+ def initialize( cache_dir='./pack' )
8
+ @cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
9
+ @worker = Fetcher::Worker.new
10
+ end
11
+
12
+ SHORTCUTS = {
13
+ ## to be done
14
+ }
15
+
16
+ def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
17
+
18
+ name = name_or_shortcut_or_url
19
+
20
+ ##
21
+ ## e.g. try
22
+ ## country-list
23
+ ##
24
+
25
+ ## url_base = "http://data.okfn.org/data/core/#{name}"
26
+ ## url_base = "https://datahub.io/core/#{name}"
27
+
28
+ ## or use "https://github.com/datasets/#{name}/raw/master"
29
+ url_base = "https://raw.githubusercontent.com/datasets/#{name}/master"
30
+
31
+
32
+ url = "#{url_base}/datapackage.json"
33
+
34
+ dest_dir = "#{@cache_dir}/#{name}"
35
+ FileUtils.mkdir_p( dest_dir )
36
+
37
+ pack_path = "#{dest_dir}/datapackage.json" ## todo/fix: rename to meta_path - why? why not?
38
+ @worker.copy( url, pack_path )
39
+
40
+ h = Meta.load_file( pack_path )
41
+ pp h
42
+
43
+ ## copy resources (tables)
44
+ h.resources.each do |r|
45
+ puts "== resource:"
46
+ pp r
47
+
48
+ res_name = r['name']
49
+ res_relative_path = r['path'] ## fix/todo: might no contain the url - is now res_url_or_relative_path !!!!!
50
+ if res_relative_path.nil?
51
+ res_relative_path = "#{res_name}.csv"
52
+ end
53
+
54
+ res_url = r['url'] ## check - old package format - url NO longer used!!!!
55
+ if res_url.nil?
56
+ ## build url
57
+ res_url = "#{url_base}/#{res_relative_path}"
58
+ end
59
+
60
+ ## todo/fix: rename - use just res_path - why? why not?
61
+ local_res_path = "#{dest_dir}/#{res_relative_path}"
62
+ puts "[debug] local_res_path: >#{local_res_path}<"
63
+ local_res_dir = File.dirname( local_res_path )
64
+ FileUtils.mkdir_p( local_res_dir )
65
+
66
+ @worker.copy( res_url, local_res_path )
67
+ end
68
+ end
69
+
70
+ end # class Downloader
71
+
72
+ end # module CsvPack
@@ -5,6 +5,47 @@
5
5
 
6
6
  module CsvPack
7
7
 
8
+
9
+
10
+ class Meta ## Pack(age) Meta / Manifest / Descriptor
11
+ extend Forwardable
12
+
13
+ def self.load_file( path )
14
+ text = File.open( path, 'r:utf-8' ).read
15
+ load( text )
16
+ end
17
+ ## todo: add alias method read
18
+
19
+ def self.load( text )
20
+ hash = JSON.parse( text )
21
+ new( hash )
22
+ end
23
+ ## todo: add alias method parse
24
+
25
+
26
+ def initialize( h )
27
+ @h = h
28
+ end
29
+
30
+ def name() @h['name']; end
31
+ def title() @h['title']; end
32
+ def license() @h['license']; end
33
+
34
+ ## todo/fix: wrap resource in a class - why? why not?
35
+ def resources() @h['resources']; end
36
+
37
+ ##############
38
+ def_delegators :@h, :[] ## todo/fix: add some more hash delgates - why? why not?
39
+
40
+ def pretty_print( printer )
41
+ printer.text "Meta<#{object_id} @h.name=#{name}, ...>"
42
+ end
43
+ end # class Meta
44
+
45
+
46
+
47
+
48
+
8
49
  class Pack
9
50
  ## load (tabular) datapackage into memory
10
51
  def initialize( path )
@@ -13,16 +54,15 @@ class Pack
13
54
  ## - check: if path is a folder/directory
14
55
  ## (auto-)add /datapackage.json
15
56
 
16
- text = File.open( path, 'r:utf-8' ).read
17
- @h = JSON.parse( text )
57
+ @meta = Meta.load_file( path )
18
58
 
19
59
  pack_dir = File.dirname(path)
20
60
 
21
- ## pp @h
61
+ pp @meta
22
62
 
23
63
  ## read in tables
24
64
  @tables = []
25
- @h['resources'].each do |r|
65
+ @meta.resources.each do |r|
26
66
  ## build table data
27
67
  @tables << build_tab( r, pack_dir )
28
68
  end
@@ -30,9 +70,8 @@ class Pack
30
70
  ## pp @tables
31
71
  end
32
72
 
33
- def name() @h['name']; end
34
- def title() @h['title']; end
35
- def license() @h['license']; end
73
+ def meta() @meta; end ## delegate known meta props (e.g. name, title, etc. - why? why not?)
74
+
36
75
 
37
76
  def tables() @tables; end
38
77
  ## convenience method - return first table
@@ -189,6 +228,7 @@ class Tab
189
228
  'datetime' => :datetime,
190
229
  'date' => :date,
191
230
  'time' => :time,
231
+ 'year' => :string, ## note: map year for now to string - anything better? why? why not?
192
232
  }
193
233
 
194
234
  def dump_schema
@@ -1,22 +1,22 @@
1
- # encoding: utf-8
2
-
3
- module CsvPack
4
-
5
- MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 1
7
- PATCH = 0
8
- VERSION = [MAJOR,MINOR,PATCH].join('.')
9
-
10
- def self.version
11
- VERSION
12
- end
13
-
14
- def self.banner
15
- "csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16
- end
17
-
18
- def self.root
19
- File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
20
- end
21
-
22
- end # module CsvPack
1
+ # encoding: utf-8
2
+
3
+ module CsvPack
4
+
5
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 2
7
+ PATCH = 0
8
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.banner
15
+ "csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16
+ end
17
+
18
+ def self.root
19
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
20
+ end
21
+
22
+ end # module CsvPack
@@ -1,7 +1,7 @@
1
-
2
- ## minitest setup
3
- require 'minitest/autorun'
4
-
5
-
6
- ## our own code
7
- require 'csvpack'
1
+
2
+ ## minitest setup
3
+ require 'minitest/autorun'
4
+
5
+
6
+ ## our own code
7
+ require 'csvpack'