jetel 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +5 -2
- data/lib/jetel/modules/alexa/alexa.rb +1 -0
- data/lib/jetel/modules/gadm/gadm.rb +72 -0
- data/lib/jetel/modules/iso3166/iso3166.rb +41 -24
- data/lib/jetel/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08a134a7446127a80ff17bc70f7ee1312db161e0
|
4
|
+
data.tar.gz: 6c6b65b87afd99a4f05672d421eb33619b100420
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa616721b387cbf35f0f586e9bac07dd2119e6f1133a7ee0a9574ab82efa2f285e3e16dc12dccda643e77ada5e7fa59b38c2f215d3737908200bc4ef19db1183
|
7
|
+
data.tar.gz: 4009e9ca9867ce8d2eb9ca2d6322cd5254b73334e729d283a4d1d81231bc34c982114a7b2926ba66e005578e1d3a014786bc23b02b76c64d0327e030290f4c73
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
jetel (0.0.
|
4
|
+
jetel (0.0.15)
|
5
5
|
activesupport
|
6
6
|
aws-sdk (~> 2)
|
7
7
|
couchbase (~> 1.3, >= 1.3.14)
|
@@ -84,7 +84,7 @@ GEM
|
|
84
84
|
multi_json (1.11.2)
|
85
85
|
multipart-post (2.0.0)
|
86
86
|
netrc (0.7.9)
|
87
|
-
nokogiri (1.6.6.
|
87
|
+
nokogiri (1.6.6.3)
|
88
88
|
mini_portile (~> 0.6.0)
|
89
89
|
parser (2.2.3.0)
|
90
90
|
ast (>= 1.1, < 3.0)
|
data/README.md
CHANGED
@@ -102,6 +102,7 @@ $ jetel downloaders
|
|
102
102
|
| aria | Jetel::Downloaders::Aria |
|
103
103
|
| curl | Jetel::Downloaders::Curl |
|
104
104
|
| ruby | Jetel::Downloaders::Ruby |
|
105
|
+
| wget | Jetel::Downloaders::Wget |
|
105
106
|
+------+--------------------------+
|
106
107
|
```
|
107
108
|
|
@@ -200,14 +201,16 @@ COPY "geolite"
|
|
200
201
|
```
|
201
202
|
.
|
202
203
|
├── bin
|
203
|
-
|
204
|
+
├── lib
|
204
205
|
│ └── jetel
|
205
206
|
│ ├── cli
|
206
207
|
│ │ └── cmd
|
207
208
|
│ ├── config
|
208
209
|
│ ├── downloaders
|
209
210
|
│ │ ├── aria
|
210
|
-
│ │
|
211
|
+
│ │ ├── curl
|
212
|
+
│ │ ├── ruby
|
213
|
+
│ │ └── wget
|
211
214
|
│ ├── extensions
|
212
215
|
│ ├── helpers
|
213
216
|
│ ├── loaders
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'pmap'
|
4
|
+
|
5
|
+
require_relative '../../config/config'
|
6
|
+
require_relative '../../modules/module'
|
7
|
+
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'open-uri'
|
10
|
+
|
11
|
+
module Jetel
|
12
|
+
module Modules
|
13
|
+
class Gadm < Module
|
14
|
+
class << self
|
15
|
+
def sources
|
16
|
+
page = Nokogiri::HTML(open('http://www.gadm.org/country'))
|
17
|
+
|
18
|
+
options = page.css('select[name="cnt"] > option')
|
19
|
+
|
20
|
+
res = options.map do |option|
|
21
|
+
full_name = option['value']
|
22
|
+
name = "#{full_name.split('_').first}"
|
23
|
+
filename = "#{name}_adm_shp.zip"
|
24
|
+
{
|
25
|
+
name: name,
|
26
|
+
url: "http://biogeo.ucdavis.edu/data/gadm2.8/shp/#{filename}",
|
27
|
+
filename_downloaded: filename
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
res
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def download(global_options, options, args)
|
36
|
+
self.class.sources.pmap do |source|
|
37
|
+
download_source(source, global_options.merge(options))
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def extract(global_options, options, args)
|
42
|
+
self.class.sources.pmap do |source|
|
43
|
+
unzip(source, global_options.merge(options))
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def transform(global_options, options, args)
|
48
|
+
self.class.sources.pmap(8) do |source|
|
49
|
+
extracted_file = extracted_file(source, global_options.merge(options))
|
50
|
+
transformed_file = transformed_file(source, global_options.merge(options))
|
51
|
+
dest_dir = transform_dir(source, global_options.merge(options))
|
52
|
+
FileUtils.mkdir_p(dest_dir)
|
53
|
+
|
54
|
+
extracted_dir = extract_dir(source, global_options.merge(options))
|
55
|
+
Dir.glob("#{extracted_dir}/*.shp") do |shapefile|
|
56
|
+
puts "Transforming #{shapefile}"
|
57
|
+
|
58
|
+
# "topojson data/Gadm/AFG/extracted/AFG_adm0.shp -o data/Gadm/AFG/transformed/AFG_adm0.topo.json"
|
59
|
+
cmd = "topojson #{shapefile} -o #{shapefile.gsub(extracted_dir, dest_dir).gsub('.shp', '.topo.json')}"
|
60
|
+
puts cmd
|
61
|
+
PTY.spawn(cmd) do |stdout, stdin, pid|
|
62
|
+
begin
|
63
|
+
# Do stuff with the output here. Just printing to show it works
|
64
|
+
stdout.each { |line| print line }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
3
5
|
require 'pmap'
|
4
6
|
require 'zip'
|
5
7
|
require 'csv'
|
@@ -14,10 +16,8 @@ module Jetel
|
|
14
16
|
def sources
|
15
17
|
[
|
16
18
|
{
|
17
|
-
name: '
|
18
|
-
|
19
|
-
filename_transformed: 'IP2LOCATION-ISO3166-2.CSV',
|
20
|
-
url: 'http://www.ip2location.com/downloads/Hi3sL9bnXfe/IP2LOCATION-ISO3166-2.ZIP'
|
19
|
+
name: 'iso3166',
|
20
|
+
url: 'https://en.wikipedia.org/wiki/ISO_3166-1'
|
21
21
|
}
|
22
22
|
]
|
23
23
|
end
|
@@ -31,35 +31,52 @@ module Jetel
|
|
31
31
|
|
32
32
|
def extract(global_options, options, args)
|
33
33
|
self.class.sources.pmap do |source|
|
34
|
-
|
34
|
+
downloaded_file = downloaded_file(source, global_options.merge(options))
|
35
|
+
dest_dir = extract_dir(source, global_options.merge(options))
|
36
|
+
|
37
|
+
puts "Extracting #{downloaded_file}"
|
38
|
+
|
39
|
+
FileUtils.mkdir_p(dest_dir)
|
40
|
+
FileUtils.cp(downloaded_file, dest_dir)
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
38
44
|
def transform(global_options, options, args)
|
39
45
|
self.class.sources.pmap do |source|
|
40
|
-
|
46
|
+
extracted_file = extracted_file(source, global_options.merge(options))
|
47
|
+
dest_dir = transform_dir(source, global_options.merge(options))
|
48
|
+
FileUtils.mkdir_p(dest_dir)
|
49
|
+
|
50
|
+
puts "Transforming #{extracted_file}"
|
41
51
|
|
42
|
-
|
43
|
-
|
52
|
+
page = Nokogiri::HTML(open(extracted_file))
|
53
|
+
rows = page.css('table.wikitable.sortable tr')
|
44
54
|
|
45
|
-
|
55
|
+
headers = [
|
56
|
+
'name',
|
57
|
+
'alpha2',
|
58
|
+
'alpha3',
|
59
|
+
'numeric'
|
60
|
+
]
|
46
61
|
|
47
|
-
|
48
|
-
|
49
|
-
|
62
|
+
transformed_file = "#{extracted_file.gsub(extract_dir(source, global_options.merge(options)), dest_dir)}.csv"
|
63
|
+
CSV.open(transformed_file, 'w', :write_headers => true, :headers => headers, :quote_char => '"', :force_quotes => true) do |csv|
|
64
|
+
rows.each do |row|
|
65
|
+
td = row.css('td')
|
50
66
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
67
|
+
next if td.length < 4
|
68
|
+
|
69
|
+
name = td[0].css('a').text
|
70
|
+
alpha2 = td[1].text
|
71
|
+
alpha3 = td[2].text
|
72
|
+
numeric = td[3].text
|
73
|
+
|
74
|
+
csv << [
|
75
|
+
name,
|
76
|
+
alpha2,
|
77
|
+
alpha3,
|
78
|
+
numeric
|
79
|
+
]
|
63
80
|
end
|
64
81
|
end
|
65
82
|
end
|
data/lib/jetel/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jetel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -425,6 +425,7 @@ files:
|
|
425
425
|
- lib/jetel/loaders/pg/sql/schema.sql.erb
|
426
426
|
- lib/jetel/loaders/pg/sql/truncate_table.sql.erb
|
427
427
|
- lib/jetel/modules/alexa/alexa.rb
|
428
|
+
- lib/jetel/modules/gadm/gadm.rb
|
428
429
|
- lib/jetel/modules/geolite/geolite.rb
|
429
430
|
- lib/jetel/modules/ip/ip.rb
|
430
431
|
- lib/jetel/modules/iso3166/iso3166.rb
|