csvpack 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,32 +1,32 @@
1
- require 'hoe'
2
- require './lib/csvpack/version.rb'
3
-
4
- Hoe.spec 'csvpack' do
5
-
6
- self.version = CsvPack::VERSION
7
-
8
- self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
9
- self.description = summary
10
-
11
- self.urls = ['https://github.com/csv11/csvpack']
12
-
13
- self.author = 'Gerald Bauer'
14
- self.email = 'ruby-talk@ruby-lang.org'
15
-
16
- # switch extension to .markdown for gihub formatting
17
- self.readme_file = 'README.md'
18
- self.history_file = 'HISTORY.md'
19
-
20
- self.extra_deps = [
21
- ['logutils', '>=0.6.1'],
22
- ['fetcher', '>=0.4.5'],
23
- ['activerecord', '>=5.0.0'],
24
- ]
25
-
26
- self.licenses = ['Public Domain']
27
-
28
- self.spec_extras = {
29
- required_ruby_version: '>= 2.2.2'
30
- }
31
-
32
- end
1
+ require 'hoe'
2
+ require './lib/csvpack/version.rb'
3
+
4
+ Hoe.spec 'csvpack' do
5
+
6
+ self.version = CsvPack::VERSION
7
+
8
+ self.summary = 'csvpack - work with tabular data packages using comma-separated values (CSV) datafiles in text with datapackage.json; download, read into and query comma-separated values (CSV) datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more'
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/csv11/csvpack']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'ruby-talk@ruby-lang.org'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'HISTORY.md'
19
+
20
+ self.extra_deps = [
21
+ ['logutils', '>=0.6.1'],
22
+ ['fetcher', '>=0.4.5'],
23
+ ['activerecord', '>=5.0.0'],
24
+ ]
25
+
26
+ self.licenses = ['Public Domain']
27
+
28
+ self.spec_extras = {
29
+ required_ruby_version: '>= 2.2.2'
30
+ }
31
+
32
+ end
@@ -1,52 +1,52 @@
1
- # encoding: utf-8
2
-
3
-
4
- require 'pp'
5
- require 'forwardable'
6
-
7
- ### csv
8
- require 'csv'
9
- require 'json'
10
- require 'fileutils'
11
-
12
-
13
- ### downloader
14
- require 'fetcher'
15
-
16
- ### activerecord w/ sqlite3
17
- ## require 'active_support/all' ## needed for String#binary? method
18
- require 'active_record'
19
-
20
-
21
-
22
- # our own code
23
-
24
- require 'csvpack/version' ## let version always go first
25
- require 'csvpack/pack'
26
- require 'csvpack/downloader'
27
-
28
- module CsvPack
29
-
30
- def self.import( *args )
31
- ## step 1: download
32
- dl = Downloader.new
33
- args.each do |arg|
34
- dl.fetch( arg )
35
- end
36
-
37
- ## step 2: up 'n' import
38
- args.each do |arg|
39
- pack = Pack.new( "./pack/#{arg}/datapackage.json" )
40
- pack.tables.each do |table|
41
- table.up!
42
- table.import!
43
- end
44
- end
45
- end
46
-
47
- end # module CsvPack
48
-
49
-
50
-
51
- # say hello
52
- puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'pp'
5
+ require 'forwardable'
6
+
7
+ ### csv
8
+ require 'csv'
9
+ require 'json'
10
+ require 'fileutils'
11
+
12
+
13
+ ### downloader
14
+ require 'fetcher'
15
+
16
+ ### activerecord w/ sqlite3
17
+ ## require 'active_support/all' ## needed for String#binary? method
18
+ require 'active_record'
19
+
20
+
21
+
22
+ # our own code
23
+
24
+ require 'csvpack/version' ## let version always go first
25
+ require 'csvpack/pack'
26
+ require 'csvpack/downloader'
27
+
28
+ module CsvPack
29
+
30
+ def self.import( *args )
31
+ ## step 1: download
32
+ dl = Downloader.new
33
+ args.each do |arg|
34
+ dl.fetch( arg )
35
+ end
36
+
37
+ ## step 2: up 'n' import
38
+ args.each do |arg|
39
+ pack = Pack.new( "./pack/#{arg}/datapackage.json" )
40
+ pack.tables.each do |table|
41
+ table.up!
42
+ table.import!
43
+ end
44
+ end
45
+ end
46
+
47
+ end # module CsvPack
48
+
49
+
50
+
51
+ # say hello
52
+ puts CsvPack.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
@@ -1,62 +1,72 @@
1
- # encoding: utf-8
2
-
3
- module CsvPack
4
-
5
- class Downloader
6
-
7
- def initialize( cache_dir='./pack' )
8
- @cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
9
- @worker = Fetcher::Worker.new
10
- end
11
-
12
- SHORTCUTS = {
13
- ## to be done
14
- }
15
-
16
- def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
17
-
18
- name = name_or_shortcut_or_url
19
-
20
- ##
21
- ## e.g. try
22
- ## country-list
23
- ##
24
-
25
- ## url_base = "http://data.okfn.org/data/core/#{name}"
26
- url_base = "https://datahub.io/core/#{name}"
27
- url = "#{url_base}/datapackage.json"
28
-
29
- dest_dir = "#{@cache_dir}/#{name}"
30
- FileUtils.mkdir_p( dest_dir )
31
-
32
- pack_path = "#{dest_dir}/datapackage.json"
33
- @worker.copy( url, pack_path )
34
-
35
- h = JSON.parse( File.read( pack_path ) )
36
- pp h
37
-
38
- ## copy resources (tables)
39
- h['resources'].each do |r|
40
- puts "== resource:"
41
- pp r
42
-
43
- res_url = r['url']
44
-
45
- res_name = r['name']
46
- res_relative_path = r['path']
47
- if res_relative_path.nil?
48
- res_relative_path = "#{res_name}.csv"
49
- end
50
-
51
- res_path = "#{dest_dir}/#{res_relative_path}"
52
- puts "[debug] res_path: >#{res_path}<"
53
- res_dir = File.dirname( res_path )
54
- FileUtils.mkdir_p( res_dir )
55
-
56
- @worker.copy( res_url, res_path )
57
- end
58
- end
59
-
60
- end # class Downloader
61
-
62
- end # module CsvPack
1
+ # encoding: utf-8
2
+
3
+ module CsvPack
4
+
5
+ class Downloader
6
+
7
+ def initialize( cache_dir='./pack' )
8
+ @cache_dir = cache_dir # todo: check if folder exists now (or on demand)?
9
+ @worker = Fetcher::Worker.new
10
+ end
11
+
12
+ SHORTCUTS = {
13
+ ## to be done
14
+ }
15
+
16
+ def fetch( name_or_shortcut_or_url ) ## todo/check: use (re)name to get/update/etc. why? why not??
17
+
18
+ name = name_or_shortcut_or_url
19
+
20
+ ##
21
+ ## e.g. try
22
+ ## country-list
23
+ ##
24
+
25
+ ## url_base = "http://data.okfn.org/data/core/#{name}"
26
+ ## url_base = "https://datahub.io/core/#{name}"
27
+
28
+ ## or use "https://github.com/datasets/#{name}/raw/master"
29
+ url_base = "https://raw.githubusercontent.com/datasets/#{name}/master"
30
+
31
+
32
+ url = "#{url_base}/datapackage.json"
33
+
34
+ dest_dir = "#{@cache_dir}/#{name}"
35
+ FileUtils.mkdir_p( dest_dir )
36
+
37
+ pack_path = "#{dest_dir}/datapackage.json" ## todo/fix: rename to meta_path - why? why not?
38
+ @worker.copy( url, pack_path )
39
+
40
+ h = Meta.load_file( pack_path )
41
+ pp h
42
+
43
+ ## copy resources (tables)
44
+ h.resources.each do |r|
45
+ puts "== resource:"
46
+ pp r
47
+
48
+ res_name = r['name']
49
+ res_relative_path = r['path'] ## fix/todo: might no contain the url - is now res_url_or_relative_path !!!!!
50
+ if res_relative_path.nil?
51
+ res_relative_path = "#{res_name}.csv"
52
+ end
53
+
54
+ res_url = r['url'] ## check - old package format - url NO longer used!!!!
55
+ if res_url.nil?
56
+ ## build url
57
+ res_url = "#{url_base}/#{res_relative_path}"
58
+ end
59
+
60
+ ## todo/fix: rename - use just res_path - why? why not?
61
+ local_res_path = "#{dest_dir}/#{res_relative_path}"
62
+ puts "[debug] local_res_path: >#{local_res_path}<"
63
+ local_res_dir = File.dirname( local_res_path )
64
+ FileUtils.mkdir_p( local_res_dir )
65
+
66
+ @worker.copy( res_url, local_res_path )
67
+ end
68
+ end
69
+
70
+ end # class Downloader
71
+
72
+ end # module CsvPack
@@ -5,6 +5,47 @@
5
5
 
6
6
  module CsvPack
7
7
 
8
+
9
+
10
+ class Meta ## Pack(age) Meta / Manifest / Descriptor
11
+ extend Forwardable
12
+
13
+ def self.load_file( path )
14
+ text = File.open( path, 'r:utf-8' ).read
15
+ load( text )
16
+ end
17
+ ## todo: add alias method read
18
+
19
+ def self.load( text )
20
+ hash = JSON.parse( text )
21
+ new( hash )
22
+ end
23
+ ## todo: add alias method parse
24
+
25
+
26
+ def initialize( h )
27
+ @h = h
28
+ end
29
+
30
+ def name() @h['name']; end
31
+ def title() @h['title']; end
32
+ def license() @h['license']; end
33
+
34
+ ## todo/fix: wrap resource in a class - why? why not?
35
+ def resources() @h['resources']; end
36
+
37
+ ##############
38
+ def_delegators :@h, :[] ## todo/fix: add some more hash delgates - why? why not?
39
+
40
+ def pretty_print( printer )
41
+ printer.text "Meta<#{object_id} @h.name=#{name}, ...>"
42
+ end
43
+ end # class Meta
44
+
45
+
46
+
47
+
48
+
8
49
  class Pack
9
50
  ## load (tabular) datapackage into memory
10
51
  def initialize( path )
@@ -13,16 +54,15 @@ class Pack
13
54
  ## - check: if path is a folder/directory
14
55
  ## (auto-)add /datapackage.json
15
56
 
16
- text = File.open( path, 'r:utf-8' ).read
17
- @h = JSON.parse( text )
57
+ @meta = Meta.load_file( path )
18
58
 
19
59
  pack_dir = File.dirname(path)
20
60
 
21
- ## pp @h
61
+ pp @meta
22
62
 
23
63
  ## read in tables
24
64
  @tables = []
25
- @h['resources'].each do |r|
65
+ @meta.resources.each do |r|
26
66
  ## build table data
27
67
  @tables << build_tab( r, pack_dir )
28
68
  end
@@ -30,9 +70,8 @@ class Pack
30
70
  ## pp @tables
31
71
  end
32
72
 
33
- def name() @h['name']; end
34
- def title() @h['title']; end
35
- def license() @h['license']; end
73
+ def meta() @meta; end ## delegate known meta props (e.g. name, title, etc. - why? why not?)
74
+
36
75
 
37
76
  def tables() @tables; end
38
77
  ## convenience method - return first table
@@ -189,6 +228,7 @@ class Tab
189
228
  'datetime' => :datetime,
190
229
  'date' => :date,
191
230
  'time' => :time,
231
+ 'year' => :string, ## note: map year for now to string - anything better? why? why not?
192
232
  }
193
233
 
194
234
  def dump_schema
@@ -1,22 +1,22 @@
1
- # encoding: utf-8
2
-
3
- module CsvPack
4
-
5
- MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 1
7
- PATCH = 0
8
- VERSION = [MAJOR,MINOR,PATCH].join('.')
9
-
10
- def self.version
11
- VERSION
12
- end
13
-
14
- def self.banner
15
- "csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16
- end
17
-
18
- def self.root
19
- File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
20
- end
21
-
22
- end # module CsvPack
1
+ # encoding: utf-8
2
+
3
+ module CsvPack
4
+
5
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 2
7
+ PATCH = 0
8
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.banner
15
+ "csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16
+ end
17
+
18
+ def self.root
19
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
20
+ end
21
+
22
+ end # module CsvPack
@@ -1,7 +1,7 @@
1
-
2
- ## minitest setup
3
- require 'minitest/autorun'
4
-
5
-
6
- ## our own code
7
- require 'csvpack'
1
+
2
+ ## minitest setup
3
+ require 'minitest/autorun'
4
+
5
+
6
+ ## our own code
7
+ require 'csvpack'