jetel 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +37 -13
- data/README.md +161 -24
- data/jetel.gemspec +4 -0
- data/lib/jetel/cli/cmd/loaders_cmd.rb +19 -0
- data/lib/jetel/cli/shared.rb +5 -5
- data/lib/jetel/loaders/couchbase/couchbase.rb +66 -0
- data/lib/jetel/loaders/elasticsearch/elasticsearch.rb +74 -0
- data/lib/jetel/loaders/pg/pg.rb +4 -2
- data/lib/jetel/modules/alexa/alexa.rb +59 -0
- data/lib/jetel/version.rb +1 -1
- metadata +80 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7606ef95fbeb35e3d08233ab94fb695b7701853
|
4
|
+
data.tar.gz: a059b2f32a381bcb89ef3b41779114167b6823d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e274af8c5a0f8f0c758a828620eabe605d611a92b27c998402101cb47b358921acfd9bd1ee670e98845f7ee1288d2b02564c41b21c4c7d6ec08f6d3cbb904156
|
7
|
+
data.tar.gz: 14e372ef9884f4b613f470858c4dc8ef27865209706a27ae8df89844e4c5f3b1fbe768a93582b099620fdb213c885a9c1170cf4f974c660c5552ce4c117f961e
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
jetel (0.0.
|
4
|
+
jetel (0.0.9)
|
5
5
|
activesupport
|
6
|
+
aws-sdk (~> 2)
|
7
|
+
couchbase (~> 1.3, >= 1.3.14)
|
6
8
|
csv2psql (~> 0.0.19)
|
9
|
+
elasticsearch (~> 1.0, >= 1.0.14)
|
7
10
|
gli
|
8
11
|
i18n
|
9
12
|
json_pure
|
@@ -13,19 +16,9 @@ PATH
|
|
13
16
|
pmap
|
14
17
|
rubyzip
|
15
18
|
terminal-table
|
19
|
+
yajl-ruby (~> 1.2, >= 1.2.1)
|
16
20
|
zip
|
17
21
|
|
18
|
-
PATH
|
19
|
-
remote: /Users/tomaskorcak/dev/csv2psql
|
20
|
-
specs:
|
21
|
-
csv2psql (0.0.19)
|
22
|
-
gli (~> 2.13, >= 2.13.2)
|
23
|
-
json_pure (~> 1.8, >= 1.8.3)
|
24
|
-
lru (~> 0.1, >= 0.1.0)
|
25
|
-
multi_json (~> 1.11, >= 1.11.2)
|
26
|
-
rake (~> 10.4, >= 10.4.2)
|
27
|
-
terminal-table (~> 1.5, >= 1.5.2)
|
28
|
-
|
29
22
|
GEM
|
30
23
|
remote: https://rubygems.org/
|
31
24
|
specs:
|
@@ -38,20 +31,49 @@ GEM
|
|
38
31
|
ast (2.1.0)
|
39
32
|
astrolabe (1.3.1)
|
40
33
|
parser (~> 2.2)
|
34
|
+
aws-sdk (2.1.35)
|
35
|
+
aws-sdk-resources (= 2.1.35)
|
36
|
+
aws-sdk-core (2.1.35)
|
37
|
+
jmespath (~> 1.0)
|
38
|
+
aws-sdk-resources (2.1.35)
|
39
|
+
aws-sdk-core (= 2.1.35)
|
40
|
+
connection_pool (2.2.0)
|
41
|
+
couchbase (1.3.14)
|
42
|
+
connection_pool (>= 1.0.0, <= 3.0.0)
|
43
|
+
multi_json (~> 1.0)
|
44
|
+
yaji (~> 0.3, >= 0.3.2)
|
41
45
|
coveralls (0.8.3)
|
42
46
|
json (~> 1.8)
|
43
47
|
rest-client (>= 1.6.8, < 2)
|
44
48
|
simplecov (~> 0.10.0)
|
45
49
|
term-ansicolor (~> 1.3)
|
46
50
|
thor (~> 0.19.1)
|
51
|
+
csv2psql (0.0.19)
|
52
|
+
gli (~> 2.13, >= 2.13.2)
|
53
|
+
json_pure (~> 1.8, >= 1.8.3)
|
54
|
+
lru (~> 0.1, >= 0.1.0)
|
55
|
+
multi_json (~> 1.11, >= 1.11.2)
|
56
|
+
rake (~> 10.4, >= 10.4.2)
|
57
|
+
terminal-table (~> 1.5, >= 1.5.2)
|
47
58
|
diff-lcs (1.2.5)
|
48
59
|
docile (1.1.5)
|
49
60
|
domain_name (0.5.25)
|
50
61
|
unf (>= 0.0.5, < 1.0.0)
|
62
|
+
elasticsearch (1.0.14)
|
63
|
+
elasticsearch-api (= 1.0.14)
|
64
|
+
elasticsearch-transport (= 1.0.14)
|
65
|
+
elasticsearch-api (1.0.14)
|
66
|
+
multi_json
|
67
|
+
elasticsearch-transport (1.0.14)
|
68
|
+
faraday
|
69
|
+
multi_json
|
70
|
+
faraday (0.9.2)
|
71
|
+
multipart-post (>= 1.2, < 3)
|
51
72
|
gli (2.13.2)
|
52
73
|
http-cookie (1.0.2)
|
53
74
|
domain_name (~> 0.5)
|
54
75
|
i18n (0.7.0)
|
76
|
+
jmespath (1.1.3)
|
55
77
|
json (1.8.3)
|
56
78
|
json_pure (1.8.3)
|
57
79
|
lru (0.1.0)
|
@@ -59,6 +81,7 @@ GEM
|
|
59
81
|
mini_portile (0.6.2)
|
60
82
|
minitest (5.8.2)
|
61
83
|
multi_json (1.11.2)
|
84
|
+
multipart-post (2.0.0)
|
62
85
|
netrc (0.11.0)
|
63
86
|
nokogiri (1.6.6.2)
|
64
87
|
mini_portile (~> 0.6.0)
|
@@ -110,6 +133,8 @@ GEM
|
|
110
133
|
unf (0.1.4)
|
111
134
|
unf_ext
|
112
135
|
unf_ext (0.0.7.1)
|
136
|
+
yaji (0.3.5)
|
137
|
+
yajl-ruby (1.2.1)
|
113
138
|
zip (2.0.2)
|
114
139
|
|
115
140
|
PLATFORMS
|
@@ -118,7 +143,6 @@ PLATFORMS
|
|
118
143
|
DEPENDENCIES
|
119
144
|
bundler (~> 1.5)
|
120
145
|
coveralls
|
121
|
-
csv2psql!
|
122
146
|
jetel!
|
123
147
|
rake
|
124
148
|
rspec
|
data/README.md
CHANGED
@@ -7,22 +7,169 @@ Run `jetel`
|
|
7
7
|
```
|
8
8
|
$ jetel
|
9
9
|
NAME
|
10
|
-
jetel -
|
10
|
+
jetel - Simple custom made tool for data download and basic ETL
|
11
11
|
|
12
12
|
SYNOPSIS
|
13
13
|
jetel [global options] command [command options] [arguments...]
|
14
14
|
|
15
|
+
VERSION
|
16
|
+
0.0.8
|
17
|
+
|
15
18
|
GLOBAL OPTIONS
|
16
|
-
--
|
19
|
+
-d, --download_dir=download-dir - Download directory (default: data)
|
20
|
+
--help - Show this message
|
21
|
+
-l, --data_loader=data-loader - Data Loader (default: pg://jetel:jetel@localhost:5432/jetel)
|
22
|
+
-t, --timeout=download-timeout - Download timeout (default: 600)
|
23
|
+
--version - Display the program version
|
24
|
+
|
25
|
+
COMMANDS
|
26
|
+
config - Show config
|
27
|
+
geolite, Geolite - Module geolite
|
28
|
+
help - Shows a list of commands or help for one command
|
29
|
+
ip, Ip - Module ip
|
30
|
+
iso3166, Iso3166 - Module iso3166
|
31
|
+
modules - Print modules info
|
32
|
+
nga, Nga - Module nga
|
33
|
+
sfpd, Sfpd - Module sfpd
|
34
|
+
version - Print version info
|
35
|
+
wifileaks, Wifileaks - Module wifileaks
|
36
|
+
```
|
37
|
+
|
38
|
+
### Show help for command
|
39
|
+
|
40
|
+
```
|
41
|
+
$ jetel help geolite
|
42
|
+
NAME
|
43
|
+
geolite - Module geolite
|
44
|
+
|
45
|
+
SYNOPSIS
|
46
|
+
jetel [global options] geolite download
|
47
|
+
jetel [global options] geolite extract
|
48
|
+
jetel [global options] geolite load [--analyze_num_rows num] [--column_type column-name=column-type]
|
49
|
+
jetel [global options] geolite transform
|
17
50
|
|
18
51
|
COMMANDS
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
52
|
+
download - download geolite
|
53
|
+
extract - extract geolite
|
54
|
+
load - load geolite
|
55
|
+
transform - transform geolite
|
56
|
+
```
|
57
|
+
|
58
|
+
|
59
|
+
### Show help for subcommand
|
60
|
+
|
61
|
+
```
|
62
|
+
$ jetel help geolite download
|
63
|
+
NAME
|
64
|
+
download - download geolite
|
65
|
+
|
66
|
+
SYNOPSIS
|
67
|
+
jetel [global options] geolite download
|
68
|
+
```
|
69
|
+
|
70
|
+
### Show modules/sources
|
71
|
+
|
72
|
+
```
|
73
|
+
$ jetel modules
|
74
|
+
+-----------+---------------------------+
|
75
|
+
| Name | Class |
|
76
|
+
+-----------+---------------------------+
|
77
|
+
| geolite | Jetel::Modules::Geolite |
|
78
|
+
| ip | Jetel::Modules::Ip |
|
79
|
+
| iso3166 | Jetel::Modules::Iso3166 |
|
80
|
+
| nga | Jetel::Modules::Nga |
|
81
|
+
| sfpd | Jetel::Modules::Sfpd |
|
82
|
+
| wifileaks | Jetel::Modules::Wifileaks |
|
83
|
+
+-----------+---------------------------+
|
84
|
+
```
|
85
|
+
|
86
|
+
### Show loaders
|
87
|
+
|
88
|
+
```
|
89
|
+
$ bundle exec ./bin/jetel loaders
|
90
|
+
+-----------+---------------------------+
|
91
|
+
| Name | Class |
|
92
|
+
+-----------+---------------------------+
|
93
|
+
| couchbase | Jetel::Loaders::Couchbase |
|
94
|
+
| pg | Jetel::Loaders::Pg |
|
95
|
+
+-----------+---------------------------+
|
96
|
+
```
|
97
|
+
|
98
|
+
### Download source
|
99
|
+
|
100
|
+
```
|
101
|
+
$ jetel geolite download
|
102
|
+
Downloading http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip
|
103
|
+
aria2c -j 4 -t 600 -d "data/Geolite/geolite/downloaded" -o "GeoLite2-City-CSV.zip" http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip
|
104
|
+
|
105
|
+
11/06 17:51:35 [NOTICE] File already exists. Renamed to data/Geolite/geolite/downloaded/GeoLite2-City-CSV.zip.1.
|
106
|
+
|
107
|
+
11/06 17:51:35 [NOTICE] Allocating disk space. Use --file-allocation=none to disable it. See --file-allocation option in man page for more details.
|
108
|
+
|
109
|
+
11/06 17:51:48 [NOTICE] Download complete: data/Geolite/geolite/downloaded/GeoLite2-City-CSV.zip.1
|
110
|
+
|
111
|
+
Download Results:
|
112
|
+
gid |stat|avg speed |path/URI
|
113
|
+
======+====+===========+=======================================================
|
114
|
+
d0bf04|OK | 2.4MiB/s|data/Geolite/geolite/downloaded/GeoLite2-City-CSV.zip.1
|
115
|
+
|
116
|
+
Status Legend:
|
117
|
+
(OK):download completed.
|
118
|
+
```
|
119
|
+
|
120
|
+
### Extract source
|
121
|
+
|
122
|
+
```
|
123
|
+
$ jetel geolite extract
|
124
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Blocks-IPv6.csv
|
125
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-ja.csv
|
126
|
+
Extracting GeoLite2-City-CSV_20151103/COPYRIGHT.txt
|
127
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-zh-CN.csv
|
128
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Blocks-IPv4.csv
|
129
|
+
Extracting GeoLite2-City-CSV_20151103/LICENSE.txt
|
130
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-fr.csv
|
131
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-ru.csv
|
132
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-en.csv
|
133
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-pt-BR.csv
|
134
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-de.csv
|
135
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-es.csv
|
136
|
+
```
|
137
|
+
|
138
|
+
### Transform source
|
139
|
+
|
140
|
+
```
|
141
|
+
$ jetel geolite transform
|
142
|
+
Transforming data/Geolite/geolite/extracted/GeoLite2-City-Blocks-IPv4.csv
|
143
|
+
```
|
144
|
+
|
145
|
+
### Load source
|
146
|
+
|
147
|
+
```
|
148
|
+
$ jetel geolite load --analyze_num_rows 50000
|
149
|
+
DROP TABLE IF EXISTS "geolite";
|
150
|
+
CREATE TABLE "geolite"
|
151
|
+
(
|
152
|
+
"network" CIDR NOT NULL,
|
153
|
+
"geoname_id" BIGINT,
|
154
|
+
"registered_country_geoname_id" BIGINT,
|
155
|
+
"represented_country_geoname_id" TEXT,
|
156
|
+
"is_anonymous_proxy" BOOLEAN NOT NULL,
|
157
|
+
"is_satellite_provider" BOOLEAN NOT NULL,
|
158
|
+
"postal_code" TEXT,
|
159
|
+
"latitude" DECIMAL,
|
160
|
+
"longitude" DECIMAL
|
161
|
+
)
|
162
|
+
WITH (
|
163
|
+
OIDS=FALSE
|
164
|
+
);
|
165
|
+
COPY "geolite"
|
166
|
+
FROM STDIN
|
167
|
+
|
168
|
+
WITH DELIMITER ','
|
169
|
+
|
170
|
+
CSV HEADER
|
171
|
+
;
|
172
|
+
3037320 row(s) affected
|
26
173
|
```
|
27
174
|
|
28
175
|
## Structure
|
@@ -49,22 +196,12 @@ COMMANDS
|
|
49
196
|
└── test
|
50
197
|
```
|
51
198
|
|
52
|
-
## Examples
|
53
|
-
|
54
|
-
**Plays nicely with [csv2psql](https://github.com/korczis/csv2psql)**
|
55
|
-
|
56
|
-
```
|
57
|
-
$ csv2psql convert -t --drop-table --create-table -t afrinic tmp/Ip/afrinic/transformed/delegated-afrinic-latest | psql -h 127.0.0.1 -U jetel
|
58
|
-
|
59
|
-
$ csv2psql convert -t --drop-table --create-table -t apnic tmp/Ip/apnic/transformed/delegated-apnic-latest | psql -h 127.0.0.1 -U jetel
|
60
|
-
```
|
61
|
-
|
62
199
|
### Rake
|
63
200
|
|
64
201
|
```
|
65
|
-
|
66
|
-
rake build # Build jetel-0.0.
|
67
|
-
rake install # Build and install jetel-0.0.
|
68
|
-
rake install:local # Build and install jetel-0.0.
|
69
|
-
rake release # Create tag v0.0.
|
202
|
+
$ rake -T
|
203
|
+
rake gem:build # Build jetel-0.0.8.gem into the pkg directory
|
204
|
+
rake gem:install # Build and install jetel-0.0.8.gem into system gems
|
205
|
+
rake gem:install:local # Build and install jetel-0.0.8.gem into system gems without network access
|
206
|
+
rake gem:release # Create tag v0.0.8 and build and push jetel-0.0.8.gem to Rubygems
|
70
207
|
```
|
data/jetel.gemspec
CHANGED
@@ -20,7 +20,10 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
22
|
spec.add_dependency 'activesupport'
|
23
|
+
spec.add_dependency 'aws-sdk', '~> 2'
|
24
|
+
spec.add_dependency 'couchbase', '~> 1.3', '>= 1.3.14'
|
23
25
|
spec.add_dependency 'csv2psql', '~> 0.0.19'
|
26
|
+
spec.add_dependency 'elasticsearch', '~> 1.0', '>= 1.0.14'
|
24
27
|
spec.add_dependency 'gli'
|
25
28
|
spec.add_dependency 'i18n'
|
26
29
|
spec.add_dependency 'json_pure'
|
@@ -30,6 +33,7 @@ Gem::Specification.new do |spec|
|
|
30
33
|
spec.add_dependency 'pmap'
|
31
34
|
spec.add_dependency 'rubyzip'
|
32
35
|
spec.add_dependency 'terminal-table'
|
36
|
+
spec.add_dependency 'yajl-ruby', '~> 1.2', '>= 1.2.1'
|
33
37
|
spec.add_dependency 'zip'
|
34
38
|
|
35
39
|
spec.add_development_dependency 'bundler', '~> 1.5'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'terminal-table'
|
4
|
+
|
5
|
+
require_relative '../../loaders/loaders'
|
6
|
+
|
7
|
+
LOADERS = Jetel::Loaders.loaders
|
8
|
+
|
9
|
+
desc 'Print loaders info'
|
10
|
+
command :loaders do |c|
|
11
|
+
c.action do |_global_options, _options, _args|
|
12
|
+
rows = LOADERS.map do |m|
|
13
|
+
[m[:name], m[:klass]]
|
14
|
+
end
|
15
|
+
|
16
|
+
table = Terminal::Table.new :headings => %w(Name Class), :rows => rows
|
17
|
+
puts table
|
18
|
+
end
|
19
|
+
end
|
data/lib/jetel/cli/shared.rb
CHANGED
@@ -0,0 +1,66 @@
|
|
1
|
+
require_relative '../loader'
|
2
|
+
|
3
|
+
require_relative '../../helpers/helpers'
|
4
|
+
|
5
|
+
require 'couchbase'
|
6
|
+
require 'yajl/json_gem'
|
7
|
+
|
8
|
+
require 'securerandom'
|
9
|
+
|
10
|
+
module Jetel
|
11
|
+
module Loaders
|
12
|
+
class Couchbase < Loader
|
13
|
+
attr_reader :client
|
14
|
+
|
15
|
+
def initialize(uri)
|
16
|
+
super
|
17
|
+
|
18
|
+
tmp = uri.split('://')
|
19
|
+
tmp = tmp[1].split('@')
|
20
|
+
|
21
|
+
parts = tmp[0].split(':')
|
22
|
+
user = parts[0]
|
23
|
+
password = parts[1]
|
24
|
+
|
25
|
+
parts = tmp[1].split('/')
|
26
|
+
host, port = parts[0].split(':')
|
27
|
+
bucket = parts[1]
|
28
|
+
|
29
|
+
opts = {
|
30
|
+
:host => host,
|
31
|
+
:port => (port && port.to_i) || 8091,
|
32
|
+
# :options => '',
|
33
|
+
# :tty => '',
|
34
|
+
:bucket => bucket,
|
35
|
+
# :username => user,
|
36
|
+
# :password => password,
|
37
|
+
:connection_timeout => 360e6,
|
38
|
+
:timeout => 360e6
|
39
|
+
}
|
40
|
+
|
41
|
+
@client = ::Couchbase.connect(opts)
|
42
|
+
end
|
43
|
+
|
44
|
+
def load(modul, source, file, opts)
|
45
|
+
super
|
46
|
+
|
47
|
+
cache = {}
|
48
|
+
CSV.open(file, 'rt', :headers => true, :converters => :all) do |csv|
|
49
|
+
csv.each do |row|
|
50
|
+
cache[SecureRandom.uuid] = row.to_hash
|
51
|
+
if cache.length === 5_000
|
52
|
+
client.add(cache)
|
53
|
+
cache = {}
|
54
|
+
print '.'
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
if cache.length > 0
|
59
|
+
client.add(cache)
|
60
|
+
cache = {}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require_relative '../loader'
|
2
|
+
|
3
|
+
require_relative '../../helpers/helpers'
|
4
|
+
|
5
|
+
require 'elasticsearch'
|
6
|
+
|
7
|
+
module Jetel
|
8
|
+
module Loaders
|
9
|
+
class Elasticsearch < Loader
|
10
|
+
attr_reader :client, :index, :document_type
|
11
|
+
|
12
|
+
def initialize(uri)
|
13
|
+
super
|
14
|
+
|
15
|
+
tmp = uri.split('://')
|
16
|
+
tmp = tmp[1].split('@')
|
17
|
+
|
18
|
+
parts = tmp[0].split(':')
|
19
|
+
user = parts[0]
|
20
|
+
password = parts[1]
|
21
|
+
|
22
|
+
parts = tmp[1].split('/')
|
23
|
+
host, port = parts[0].split(':')
|
24
|
+
@index, @document_type = parts[1], parts[2]
|
25
|
+
|
26
|
+
opts = {
|
27
|
+
:host => host,
|
28
|
+
:port => (port && port.to_i) || 9200,
|
29
|
+
# :options => '',
|
30
|
+
# :tty => '',
|
31
|
+
# :bucket => bucket,
|
32
|
+
# :username => user,
|
33
|
+
# :password => password,
|
34
|
+
# :connection_timeout => 360e6,
|
35
|
+
# :timeout => 360e6
|
36
|
+
}
|
37
|
+
|
38
|
+
@client = ::Elasticsearch::Client.new(opts)
|
39
|
+
|
40
|
+
puts client.cluster.health
|
41
|
+
|
42
|
+
# client.index index: index, type: document_type, body: {title: 'Test'}
|
43
|
+
end
|
44
|
+
|
45
|
+
def load(modul, source, file, opts)
|
46
|
+
super
|
47
|
+
|
48
|
+
cache = []
|
49
|
+
CSV.open(file, 'rt', :headers => true, :converters => :all) do |csv|
|
50
|
+
csv.each do |row|
|
51
|
+
cache << {
|
52
|
+
create: {
|
53
|
+
_index: @index,
|
54
|
+
_type: @document_type,
|
55
|
+
# _id: 1,
|
56
|
+
data: row.to_hash
|
57
|
+
}
|
58
|
+
}
|
59
|
+
if cache.length === 5_000
|
60
|
+
client.bulk(body: cache)
|
61
|
+
cache = []
|
62
|
+
print '.'
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if cache.length > 0
|
67
|
+
client.bulk(body: cache)
|
68
|
+
cache = []
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/jetel/loaders/pg/pg.rb
CHANGED
@@ -107,9 +107,11 @@ module Jetel
|
|
107
107
|
|
108
108
|
# Display any error messages
|
109
109
|
while res = @client.get_result
|
110
|
-
|
111
|
-
|
110
|
+
e_message = res.error_message
|
111
|
+
if e_message && !e_message.empty?
|
112
|
+
puts e_message
|
112
113
|
end
|
114
|
+
puts "#{res.cmdtuples} row(s) affected"
|
113
115
|
end
|
114
116
|
|
115
117
|
sql
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'pmap'
|
4
|
+
|
5
|
+
require_relative '../../config/config'
|
6
|
+
require_relative '../../modules/module'
|
7
|
+
|
8
|
+
module Jetel
|
9
|
+
module Modules
|
10
|
+
class Alexa < Module
|
11
|
+
class << self
|
12
|
+
def sources
|
13
|
+
[
|
14
|
+
{
|
15
|
+
name: 'alexa',
|
16
|
+
# filename_downloaded: 'top-1m.csv.zip',
|
17
|
+
filename_extracted: 'top-1m.csv',
|
18
|
+
filename_transformed: 'top-1m.csv',
|
19
|
+
url: 'http://s3.amazonaws.com/alexa-static/top-1m.csv.zip'
|
20
|
+
}
|
21
|
+
]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def download(global_options, options, args)
|
26
|
+
self.class.sources.pmap do |source|
|
27
|
+
download_source(source, global_options.merge(options))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def extract(global_options, options, args)
|
32
|
+
self.class.sources.pmap do |source|
|
33
|
+
unzip(source, global_options.merge(options))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def transform(global_options, options, args)
|
38
|
+
self.class.sources.pmap do |source|
|
39
|
+
extracted_file = extracted_file(source, global_options.merge(options))
|
40
|
+
transformed_file = transformed_file(source, global_options.merge(options))
|
41
|
+
dest_dir = transform_dir(source, global_options.merge(options))
|
42
|
+
|
43
|
+
puts "Transforming #{extracted_file}"
|
44
|
+
|
45
|
+
FileUtils.mkdir_p(dest_dir)
|
46
|
+
File.open(extracted_file, 'rt') do |fin|
|
47
|
+
File.open(transformed_file, 'wt') do |fout|
|
48
|
+
fout.puts('rank,url')
|
49
|
+
|
50
|
+
while buff = fin.read(4096)
|
51
|
+
fout.write(buff)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/jetel/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jetel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -24,6 +24,40 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: aws-sdk
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: couchbase
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 1.3.14
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '1.3'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 1.3.14
|
27
61
|
- !ruby/object:Gem::Dependency
|
28
62
|
name: csv2psql
|
29
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,6 +72,26 @@ dependencies:
|
|
38
72
|
- - "~>"
|
39
73
|
- !ruby/object:Gem::Version
|
40
74
|
version: 0.0.19
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: elasticsearch
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '1.0'
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: 1.0.14
|
85
|
+
type: :runtime
|
86
|
+
prerelease: false
|
87
|
+
version_requirements: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - "~>"
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '1.0'
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 1.0.14
|
41
95
|
- !ruby/object:Gem::Dependency
|
42
96
|
name: gli
|
43
97
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,6 +218,26 @@ dependencies:
|
|
164
218
|
- - ">="
|
165
219
|
- !ruby/object:Gem::Version
|
166
220
|
version: '0'
|
221
|
+
- !ruby/object:Gem::Dependency
|
222
|
+
name: yajl-ruby
|
223
|
+
requirement: !ruby/object:Gem::Requirement
|
224
|
+
requirements:
|
225
|
+
- - "~>"
|
226
|
+
- !ruby/object:Gem::Version
|
227
|
+
version: '1.2'
|
228
|
+
- - ">="
|
229
|
+
- !ruby/object:Gem::Version
|
230
|
+
version: 1.2.1
|
231
|
+
type: :runtime
|
232
|
+
prerelease: false
|
233
|
+
version_requirements: !ruby/object:Gem::Requirement
|
234
|
+
requirements:
|
235
|
+
- - "~>"
|
236
|
+
- !ruby/object:Gem::Version
|
237
|
+
version: '1.2'
|
238
|
+
- - ">="
|
239
|
+
- !ruby/object:Gem::Version
|
240
|
+
version: 1.2.1
|
167
241
|
- !ruby/object:Gem::Dependency
|
168
242
|
name: zip
|
169
243
|
requirement: !ruby/object:Gem::Requirement
|
@@ -288,6 +362,7 @@ files:
|
|
288
362
|
- lib/jetel/cli/app.rb
|
289
363
|
- lib/jetel/cli/cli.rb
|
290
364
|
- lib/jetel/cli/cmd/config_cmd.rb
|
365
|
+
- lib/jetel/cli/cmd/loaders_cmd.rb
|
291
366
|
- lib/jetel/cli/cmd/modules_cmd.rb
|
292
367
|
- lib/jetel/cli/cmd/version_cmd.rb
|
293
368
|
- lib/jetel/cli/shared.rb
|
@@ -302,6 +377,8 @@ files:
|
|
302
377
|
- lib/jetel/helpers/helpers.rb
|
303
378
|
- lib/jetel/jetel/jetel.rb
|
304
379
|
- lib/jetel/lib.rb
|
380
|
+
- lib/jetel/loaders/couchbase/couchbase.rb
|
381
|
+
- lib/jetel/loaders/elasticsearch/elasticsearch.rb
|
305
382
|
- lib/jetel/loaders/loader.rb
|
306
383
|
- lib/jetel/loaders/loaders.rb
|
307
384
|
- lib/jetel/loaders/pg/pg.rb
|
@@ -311,6 +388,7 @@ files:
|
|
311
388
|
- lib/jetel/loaders/pg/sql/header.sql.erb
|
312
389
|
- lib/jetel/loaders/pg/sql/schema.sql.erb
|
313
390
|
- lib/jetel/loaders/pg/sql/truncate_table.sql.erb
|
391
|
+
- lib/jetel/modules/alexa/alexa.rb
|
314
392
|
- lib/jetel/modules/geolite/geolite.rb
|
315
393
|
- lib/jetel/modules/ip/ip.rb
|
316
394
|
- lib/jetel/modules/iso3166/iso3166.rb
|
@@ -347,4 +425,3 @@ specification_version: 4
|
|
347
425
|
summary: Jetel
|
348
426
|
test_files:
|
349
427
|
- test/spec_helper.rb
|
350
|
-
has_rdoc:
|