jetel 0.0.14 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +5 -2
- data/lib/jetel/modules/alexa/alexa.rb +1 -0
- data/lib/jetel/modules/gadm/gadm.rb +72 -0
- data/lib/jetel/modules/iso3166/iso3166.rb +41 -24
- data/lib/jetel/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08a134a7446127a80ff17bc70f7ee1312db161e0
|
4
|
+
data.tar.gz: 6c6b65b87afd99a4f05672d421eb33619b100420
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa616721b387cbf35f0f586e9bac07dd2119e6f1133a7ee0a9574ab82efa2f285e3e16dc12dccda643e77ada5e7fa59b38c2f215d3737908200bc4ef19db1183
|
7
|
+
data.tar.gz: 4009e9ca9867ce8d2eb9ca2d6322cd5254b73334e729d283a4d1d81231bc34c982114a7b2926ba66e005578e1d3a014786bc23b02b76c64d0327e030290f4c73
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
jetel (0.0.
|
4
|
+
jetel (0.0.15)
|
5
5
|
activesupport
|
6
6
|
aws-sdk (~> 2)
|
7
7
|
couchbase (~> 1.3, >= 1.3.14)
|
@@ -84,7 +84,7 @@ GEM
|
|
84
84
|
multi_json (1.11.2)
|
85
85
|
multipart-post (2.0.0)
|
86
86
|
netrc (0.7.9)
|
87
|
-
nokogiri (1.6.6.
|
87
|
+
nokogiri (1.6.6.3)
|
88
88
|
mini_portile (~> 0.6.0)
|
89
89
|
parser (2.2.3.0)
|
90
90
|
ast (>= 1.1, < 3.0)
|
data/README.md
CHANGED
@@ -102,6 +102,7 @@ $ jetel downloaders
|
|
102
102
|
| aria | Jetel::Downloaders::Aria |
|
103
103
|
| curl | Jetel::Downloaders::Curl |
|
104
104
|
| ruby | Jetel::Downloaders::Ruby |
|
105
|
+
| wget | Jetel::Downloaders::Wget |
|
105
106
|
+------+--------------------------+
|
106
107
|
```
|
107
108
|
|
@@ -200,14 +201,16 @@ COPY "geolite"
|
|
200
201
|
```
|
201
202
|
.
|
202
203
|
├── bin
|
203
|
-
|
204
|
+
├── lib
|
204
205
|
│ └── jetel
|
205
206
|
│ ├── cli
|
206
207
|
│ │ └── cmd
|
207
208
|
│ ├── config
|
208
209
|
│ ├── downloaders
|
209
210
|
│ │ ├── aria
|
210
|
-
│ │
|
211
|
+
│ │ ├── curl
|
212
|
+
│ │ ├── ruby
|
213
|
+
│ │ └── wget
|
211
214
|
│ ├── extensions
|
212
215
|
│ ├── helpers
|
213
216
|
│ ├── loaders
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'pmap'
|
4
|
+
|
5
|
+
require_relative '../../config/config'
|
6
|
+
require_relative '../../modules/module'
|
7
|
+
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'open-uri'
|
10
|
+
|
11
|
+
module Jetel
|
12
|
+
module Modules
|
13
|
+
class Gadm < Module
|
14
|
+
class << self
|
15
|
+
def sources
|
16
|
+
page = Nokogiri::HTML(open('http://www.gadm.org/country'))
|
17
|
+
|
18
|
+
options = page.css('select[name="cnt"] > option')
|
19
|
+
|
20
|
+
res = options.map do |option|
|
21
|
+
full_name = option['value']
|
22
|
+
name = "#{full_name.split('_').first}"
|
23
|
+
filename = "#{name}_adm_shp.zip"
|
24
|
+
{
|
25
|
+
name: name,
|
26
|
+
url: "http://biogeo.ucdavis.edu/data/gadm2.8/shp/#{filename}",
|
27
|
+
filename_downloaded: filename
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
res
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def download(global_options, options, args)
|
36
|
+
self.class.sources.pmap do |source|
|
37
|
+
download_source(source, global_options.merge(options))
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def extract(global_options, options, args)
|
42
|
+
self.class.sources.pmap do |source|
|
43
|
+
unzip(source, global_options.merge(options))
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def transform(global_options, options, args)
|
48
|
+
self.class.sources.pmap(8) do |source|
|
49
|
+
extracted_file = extracted_file(source, global_options.merge(options))
|
50
|
+
transformed_file = transformed_file(source, global_options.merge(options))
|
51
|
+
dest_dir = transform_dir(source, global_options.merge(options))
|
52
|
+
FileUtils.mkdir_p(dest_dir)
|
53
|
+
|
54
|
+
extracted_dir = extract_dir(source, global_options.merge(options))
|
55
|
+
Dir.glob("#{extracted_dir}/*.shp") do |shapefile|
|
56
|
+
puts "Transforming #{shapefile}"
|
57
|
+
|
58
|
+
# "topojson data/Gadm/AFG/extracted/AFG_adm0.shp -o data/Gadm/AFG/transformed/AFG_adm0.topo.json"
|
59
|
+
cmd = "topojson #{shapefile} -o #{shapefile.gsub(extracted_dir, dest_dir).gsub('.shp', '.topo.json')}"
|
60
|
+
puts cmd
|
61
|
+
PTY.spawn(cmd) do |stdout, stdin, pid|
|
62
|
+
begin
|
63
|
+
# Do stuff with the output here. Just printing to show it works
|
64
|
+
stdout.each { |line| print line }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
3
5
|
require 'pmap'
|
4
6
|
require 'zip'
|
5
7
|
require 'csv'
|
@@ -14,10 +16,8 @@ module Jetel
|
|
14
16
|
def sources
|
15
17
|
[
|
16
18
|
{
|
17
|
-
name: '
|
18
|
-
|
19
|
-
filename_transformed: 'IP2LOCATION-ISO3166-2.CSV',
|
20
|
-
url: 'http://www.ip2location.com/downloads/Hi3sL9bnXfe/IP2LOCATION-ISO3166-2.ZIP'
|
19
|
+
name: 'iso3166',
|
20
|
+
url: 'https://en.wikipedia.org/wiki/ISO_3166-1'
|
21
21
|
}
|
22
22
|
]
|
23
23
|
end
|
@@ -31,35 +31,52 @@ module Jetel
|
|
31
31
|
|
32
32
|
def extract(global_options, options, args)
|
33
33
|
self.class.sources.pmap do |source|
|
34
|
-
|
34
|
+
downloaded_file = downloaded_file(source, global_options.merge(options))
|
35
|
+
dest_dir = extract_dir(source, global_options.merge(options))
|
36
|
+
|
37
|
+
puts "Extracting #{downloaded_file}"
|
38
|
+
|
39
|
+
FileUtils.mkdir_p(dest_dir)
|
40
|
+
FileUtils.cp(downloaded_file, dest_dir)
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
38
44
|
def transform(global_options, options, args)
|
39
45
|
self.class.sources.pmap do |source|
|
40
|
-
|
46
|
+
extracted_file = extracted_file(source, global_options.merge(options))
|
47
|
+
dest_dir = transform_dir(source, global_options.merge(options))
|
48
|
+
FileUtils.mkdir_p(dest_dir)
|
49
|
+
|
50
|
+
puts "Transforming #{extracted_file}"
|
41
51
|
|
42
|
-
|
43
|
-
|
52
|
+
page = Nokogiri::HTML(open(extracted_file))
|
53
|
+
rows = page.css('table.wikitable.sortable tr')
|
44
54
|
|
45
|
-
|
55
|
+
headers = [
|
56
|
+
'name',
|
57
|
+
'alpha2',
|
58
|
+
'alpha3',
|
59
|
+
'numeric'
|
60
|
+
]
|
46
61
|
|
47
|
-
|
48
|
-
|
49
|
-
|
62
|
+
transformed_file = "#{extracted_file.gsub(extract_dir(source, global_options.merge(options)), dest_dir)}.csv"
|
63
|
+
CSV.open(transformed_file, 'w', :write_headers => true, :headers => headers, :quote_char => '"', :force_quotes => true) do |csv|
|
64
|
+
rows.each do |row|
|
65
|
+
td = row.css('td')
|
50
66
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
67
|
+
next if td.length < 4
|
68
|
+
|
69
|
+
name = td[0].css('a').text
|
70
|
+
alpha2 = td[1].text
|
71
|
+
alpha3 = td[2].text
|
72
|
+
numeric = td[3].text
|
73
|
+
|
74
|
+
csv << [
|
75
|
+
name,
|
76
|
+
alpha2,
|
77
|
+
alpha3,
|
78
|
+
numeric
|
79
|
+
]
|
63
80
|
end
|
64
81
|
end
|
65
82
|
end
|
data/lib/jetel/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jetel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -425,6 +425,7 @@ files:
|
|
425
425
|
- lib/jetel/loaders/pg/sql/schema.sql.erb
|
426
426
|
- lib/jetel/loaders/pg/sql/truncate_table.sql.erb
|
427
427
|
- lib/jetel/modules/alexa/alexa.rb
|
428
|
+
- lib/jetel/modules/gadm/gadm.rb
|
428
429
|
- lib/jetel/modules/geolite/geolite.rb
|
429
430
|
- lib/jetel/modules/ip/ip.rb
|
430
431
|
- lib/jetel/modules/iso3166/iso3166.rb
|