jetel 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ea0e9c2a01bc364e5e46006540f86d4addda47be
4
- data.tar.gz: 71217244d4494fcaa12f480a0ea6180f35e5c418
3
+ metadata.gz: 08a134a7446127a80ff17bc70f7ee1312db161e0
4
+ data.tar.gz: 6c6b65b87afd99a4f05672d421eb33619b100420
5
5
  SHA512:
6
- metadata.gz: b07351ad0b02a17d45973de7491f55d68a6a8c47d288c034fec083fb2896cf5084ae380fd86cb478bcf41633c5ee12440e9db0a52a1f0ccde0b8f73f716c7ad4
7
- data.tar.gz: 907f2c48bfa520617933722899bd95c2428861cc891f482613ec12ff58fbf6f2cf467a0a7869cd6b19f70ab7621716ac1528669ef7d36ddcec7f8edbe0b62b68
6
+ metadata.gz: fa616721b387cbf35f0f586e9bac07dd2119e6f1133a7ee0a9574ab82efa2f285e3e16dc12dccda643e77ada5e7fa59b38c2f215d3737908200bc4ef19db1183
7
+ data.tar.gz: 4009e9ca9867ce8d2eb9ca2d6322cd5254b73334e729d283a4d1d81231bc34c982114a7b2926ba66e005578e1d3a014786bc23b02b76c64d0327e030290f4c73
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- jetel (0.0.14)
4
+ jetel (0.0.15)
5
5
  activesupport
6
6
  aws-sdk (~> 2)
7
7
  couchbase (~> 1.3, >= 1.3.14)
@@ -84,7 +84,7 @@ GEM
84
84
  multi_json (1.11.2)
85
85
  multipart-post (2.0.0)
86
86
  netrc (0.7.9)
87
- nokogiri (1.6.6.2)
87
+ nokogiri (1.6.6.3)
88
88
  mini_portile (~> 0.6.0)
89
89
  parser (2.2.3.0)
90
90
  ast (>= 1.1, < 3.0)
data/README.md CHANGED
@@ -102,6 +102,7 @@ $ jetel downloaders
102
102
  | aria | Jetel::Downloaders::Aria |
103
103
  | curl | Jetel::Downloaders::Curl |
104
104
  | ruby | Jetel::Downloaders::Ruby |
105
+ | wget | Jetel::Downloaders::Wget |
105
106
  +------+--------------------------+
106
107
  ```
107
108
 
@@ -200,14 +201,16 @@ COPY "geolite"
200
201
  ```
201
202
  .
202
203
  ├── bin
203
- ── lib
204
+ ├── lib
204
205
  │   └── jetel
205
206
  │   ├── cli
206
207
  │   │   └── cmd
207
208
  │   ├── config
208
209
  │   ├── downloaders
209
210
  │   │   ├── aria
210
- │   │   └── ruby
211
+ │   │   ├── curl
212
+ │   │   ├── ruby
213
+ │   │   └── wget
211
214
  │   ├── extensions
212
215
  │   ├── helpers
213
216
  │   ├── loaders
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'pmap'
4
+ require 'pty'
4
5
 
5
6
  require_relative '../../config/config'
6
7
  require_relative '../../modules/module'
@@ -0,0 +1,72 @@
1
+ # encoding: utf-8
2
+
3
+ require 'pmap'
4
+
5
+ require_relative '../../config/config'
6
+ require_relative '../../modules/module'
7
+
8
+ require 'nokogiri'
9
+ require 'open-uri'
10
+
11
+ module Jetel
12
+ module Modules
13
+ class Gadm < Module
14
+ class << self
15
+ def sources
16
+ page = Nokogiri::HTML(open('http://www.gadm.org/country'))
17
+
18
+ options = page.css('select[name="cnt"] > option')
19
+
20
+ res = options.map do |option|
21
+ full_name = option['value']
22
+ name = "#{full_name.split('_').first}"
23
+ filename = "#{name}_adm_shp.zip"
24
+ {
25
+ name: name,
26
+ url: "http://biogeo.ucdavis.edu/data/gadm2.8/shp/#{filename}",
27
+ filename_downloaded: filename
28
+ }
29
+ end
30
+
31
+ res
32
+ end
33
+ end
34
+
35
+ def download(global_options, options, args)
36
+ self.class.sources.pmap do |source|
37
+ download_source(source, global_options.merge(options))
38
+ end
39
+ end
40
+
41
+ def extract(global_options, options, args)
42
+ self.class.sources.pmap do |source|
43
+ unzip(source, global_options.merge(options))
44
+ end
45
+ end
46
+
47
+ def transform(global_options, options, args)
48
+ self.class.sources.pmap(8) do |source|
49
+ extracted_file = extracted_file(source, global_options.merge(options))
50
+ transformed_file = transformed_file(source, global_options.merge(options))
51
+ dest_dir = transform_dir(source, global_options.merge(options))
52
+ FileUtils.mkdir_p(dest_dir)
53
+
54
+ extracted_dir = extract_dir(source, global_options.merge(options))
55
+ Dir.glob("#{extracted_dir}/*.shp") do |shapefile|
56
+ puts "Transforming #{shapefile}"
57
+
58
+ # "topojson data/Gadm/AFG/extracted/AFG_adm0.shp -o data/Gadm/AFG/transformed/AFG_adm0.topo.json"
59
+ cmd = "topojson #{shapefile} -o #{shapefile.gsub(extracted_dir, dest_dir).gsub('.shp', '.topo.json')}"
60
+ puts cmd
61
+ PTY.spawn(cmd) do |stdout, stdin, pid|
62
+ begin
63
+ # Do stuff with the output here. Just printing to show it works
64
+ stdout.each { |line| print line }
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -1,5 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
+ require 'nokogiri'
4
+ require 'open-uri'
3
5
  require 'pmap'
4
6
  require 'zip'
5
7
  require 'csv'
@@ -14,10 +16,8 @@ module Jetel
14
16
  def sources
15
17
  [
16
18
  {
17
- name: 'iso366',
18
- filename_extracted: 'IP2LOCATION-ISO3166-2.CSV',
19
- filename_transformed: 'IP2LOCATION-ISO3166-2.CSV',
20
- url: 'http://www.ip2location.com/downloads/Hi3sL9bnXfe/IP2LOCATION-ISO3166-2.ZIP'
19
+ name: 'iso3166',
20
+ url: 'https://en.wikipedia.org/wiki/ISO_3166-1'
21
21
  }
22
22
  ]
23
23
  end
@@ -31,35 +31,52 @@ module Jetel
31
31
 
32
32
  def extract(global_options, options, args)
33
33
  self.class.sources.pmap do |source|
34
- unzip(source, global_options.merge(options))
34
+ downloaded_file = downloaded_file(source, global_options.merge(options))
35
+ dest_dir = extract_dir(source, global_options.merge(options))
36
+
37
+ puts "Extracting #{downloaded_file}"
38
+
39
+ FileUtils.mkdir_p(dest_dir)
40
+ FileUtils.cp(downloaded_file, dest_dir)
35
41
  end
36
42
  end
37
43
 
38
44
  def transform(global_options, options, args)
39
45
  self.class.sources.pmap do |source|
40
- opts = global_options.merge(options)
46
+ extracted_file = extracted_file(source, global_options.merge(options))
47
+ dest_dir = transform_dir(source, global_options.merge(options))
48
+ FileUtils.mkdir_p(dest_dir)
49
+
50
+ puts "Transforming #{extracted_file}"
41
51
 
42
- extracted_file = extracted_file(source, opts)
43
- transformed_file = transformed_file(source, opts)
52
+ page = Nokogiri::HTML(open(extracted_file))
53
+ rows = page.css('table.wikitable.sortable tr')
44
54
 
45
- FileUtils.mkdir_p(transform_dir(source, opts))
55
+ headers = [
56
+ 'name',
57
+ 'alpha2',
58
+ 'alpha3',
59
+ 'numeric'
60
+ ]
46
61
 
47
- csv_opts = {
48
- :headers => true
49
- }
62
+ transformed_file = "#{extracted_file.gsub(extract_dir(source, global_options.merge(options)), dest_dir)}.csv"
63
+ CSV.open(transformed_file, 'w', :write_headers => true, :headers => headers, :quote_char => '"', :force_quotes => true) do |csv|
64
+ rows.each do |row|
65
+ td = row.css('td')
50
66
 
51
- puts "Transforming #{extracted_file}"
52
- CSV.open(extracted_file, 'r', csv_opts) do |csv|
53
- headers = %w(
54
- country_code
55
- subdivision_name
56
- code
57
- )
58
- CSV.open(transformed_file, 'w', :write_headers => true, :headers => headers) do |csv_out|
59
- csv.each do |row|
60
- next if row.length < 3
61
- csv_out << row
62
- end
67
+ next if td.length < 4
68
+
69
+ name = td[0].css('a').text
70
+ alpha2 = td[1].text
71
+ alpha3 = td[2].text
72
+ numeric = td[3].text
73
+
74
+ csv << [
75
+ name,
76
+ alpha2,
77
+ alpha3,
78
+ numeric
79
+ ]
63
80
  end
64
81
  end
65
82
  end
data/lib/jetel/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Jetel
4
- VERSION = '0.0.14'
4
+ VERSION = '0.0.15'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jetel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomas Korcak
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-07 00:00:00.000000000 Z
11
+ date: 2015-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -425,6 +425,7 @@ files:
425
425
  - lib/jetel/loaders/pg/sql/schema.sql.erb
426
426
  - lib/jetel/loaders/pg/sql/truncate_table.sql.erb
427
427
  - lib/jetel/modules/alexa/alexa.rb
428
+ - lib/jetel/modules/gadm/gadm.rb
428
429
  - lib/jetel/modules/geolite/geolite.rb
429
430
  - lib/jetel/modules/ip/ip.rb
430
431
  - lib/jetel/modules/iso3166/iso3166.rb