jetel 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +37 -13
- data/README.md +161 -24
- data/jetel.gemspec +4 -0
- data/lib/jetel/cli/cmd/loaders_cmd.rb +19 -0
- data/lib/jetel/cli/shared.rb +5 -5
- data/lib/jetel/loaders/couchbase/couchbase.rb +66 -0
- data/lib/jetel/loaders/elasticsearch/elasticsearch.rb +74 -0
- data/lib/jetel/loaders/pg/pg.rb +4 -2
- data/lib/jetel/modules/alexa/alexa.rb +59 -0
- data/lib/jetel/version.rb +1 -1
- metadata +80 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c7606ef95fbeb35e3d08233ab94fb695b7701853
|
|
4
|
+
data.tar.gz: a059b2f32a381bcb89ef3b41779114167b6823d5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e274af8c5a0f8f0c758a828620eabe605d611a92b27c998402101cb47b358921acfd9bd1ee670e98845f7ee1288d2b02564c41b21c4c7d6ec08f6d3cbb904156
|
|
7
|
+
data.tar.gz: 14e372ef9884f4b613f470858c4dc8ef27865209706a27ae8df89844e4c5f3b1fbe768a93582b099620fdb213c885a9c1170cf4f974c660c5552ce4c117f961e
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
jetel (0.0.
|
|
4
|
+
jetel (0.0.9)
|
|
5
5
|
activesupport
|
|
6
|
+
aws-sdk (~> 2)
|
|
7
|
+
couchbase (~> 1.3, >= 1.3.14)
|
|
6
8
|
csv2psql (~> 0.0.19)
|
|
9
|
+
elasticsearch (~> 1.0, >= 1.0.14)
|
|
7
10
|
gli
|
|
8
11
|
i18n
|
|
9
12
|
json_pure
|
|
@@ -13,19 +16,9 @@ PATH
|
|
|
13
16
|
pmap
|
|
14
17
|
rubyzip
|
|
15
18
|
terminal-table
|
|
19
|
+
yajl-ruby (~> 1.2, >= 1.2.1)
|
|
16
20
|
zip
|
|
17
21
|
|
|
18
|
-
PATH
|
|
19
|
-
remote: /Users/tomaskorcak/dev/csv2psql
|
|
20
|
-
specs:
|
|
21
|
-
csv2psql (0.0.19)
|
|
22
|
-
gli (~> 2.13, >= 2.13.2)
|
|
23
|
-
json_pure (~> 1.8, >= 1.8.3)
|
|
24
|
-
lru (~> 0.1, >= 0.1.0)
|
|
25
|
-
multi_json (~> 1.11, >= 1.11.2)
|
|
26
|
-
rake (~> 10.4, >= 10.4.2)
|
|
27
|
-
terminal-table (~> 1.5, >= 1.5.2)
|
|
28
|
-
|
|
29
22
|
GEM
|
|
30
23
|
remote: https://rubygems.org/
|
|
31
24
|
specs:
|
|
@@ -38,20 +31,49 @@ GEM
|
|
|
38
31
|
ast (2.1.0)
|
|
39
32
|
astrolabe (1.3.1)
|
|
40
33
|
parser (~> 2.2)
|
|
34
|
+
aws-sdk (2.1.35)
|
|
35
|
+
aws-sdk-resources (= 2.1.35)
|
|
36
|
+
aws-sdk-core (2.1.35)
|
|
37
|
+
jmespath (~> 1.0)
|
|
38
|
+
aws-sdk-resources (2.1.35)
|
|
39
|
+
aws-sdk-core (= 2.1.35)
|
|
40
|
+
connection_pool (2.2.0)
|
|
41
|
+
couchbase (1.3.14)
|
|
42
|
+
connection_pool (>= 1.0.0, <= 3.0.0)
|
|
43
|
+
multi_json (~> 1.0)
|
|
44
|
+
yaji (~> 0.3, >= 0.3.2)
|
|
41
45
|
coveralls (0.8.3)
|
|
42
46
|
json (~> 1.8)
|
|
43
47
|
rest-client (>= 1.6.8, < 2)
|
|
44
48
|
simplecov (~> 0.10.0)
|
|
45
49
|
term-ansicolor (~> 1.3)
|
|
46
50
|
thor (~> 0.19.1)
|
|
51
|
+
csv2psql (0.0.19)
|
|
52
|
+
gli (~> 2.13, >= 2.13.2)
|
|
53
|
+
json_pure (~> 1.8, >= 1.8.3)
|
|
54
|
+
lru (~> 0.1, >= 0.1.0)
|
|
55
|
+
multi_json (~> 1.11, >= 1.11.2)
|
|
56
|
+
rake (~> 10.4, >= 10.4.2)
|
|
57
|
+
terminal-table (~> 1.5, >= 1.5.2)
|
|
47
58
|
diff-lcs (1.2.5)
|
|
48
59
|
docile (1.1.5)
|
|
49
60
|
domain_name (0.5.25)
|
|
50
61
|
unf (>= 0.0.5, < 1.0.0)
|
|
62
|
+
elasticsearch (1.0.14)
|
|
63
|
+
elasticsearch-api (= 1.0.14)
|
|
64
|
+
elasticsearch-transport (= 1.0.14)
|
|
65
|
+
elasticsearch-api (1.0.14)
|
|
66
|
+
multi_json
|
|
67
|
+
elasticsearch-transport (1.0.14)
|
|
68
|
+
faraday
|
|
69
|
+
multi_json
|
|
70
|
+
faraday (0.9.2)
|
|
71
|
+
multipart-post (>= 1.2, < 3)
|
|
51
72
|
gli (2.13.2)
|
|
52
73
|
http-cookie (1.0.2)
|
|
53
74
|
domain_name (~> 0.5)
|
|
54
75
|
i18n (0.7.0)
|
|
76
|
+
jmespath (1.1.3)
|
|
55
77
|
json (1.8.3)
|
|
56
78
|
json_pure (1.8.3)
|
|
57
79
|
lru (0.1.0)
|
|
@@ -59,6 +81,7 @@ GEM
|
|
|
59
81
|
mini_portile (0.6.2)
|
|
60
82
|
minitest (5.8.2)
|
|
61
83
|
multi_json (1.11.2)
|
|
84
|
+
multipart-post (2.0.0)
|
|
62
85
|
netrc (0.11.0)
|
|
63
86
|
nokogiri (1.6.6.2)
|
|
64
87
|
mini_portile (~> 0.6.0)
|
|
@@ -110,6 +133,8 @@ GEM
|
|
|
110
133
|
unf (0.1.4)
|
|
111
134
|
unf_ext
|
|
112
135
|
unf_ext (0.0.7.1)
|
|
136
|
+
yaji (0.3.5)
|
|
137
|
+
yajl-ruby (1.2.1)
|
|
113
138
|
zip (2.0.2)
|
|
114
139
|
|
|
115
140
|
PLATFORMS
|
|
@@ -118,7 +143,6 @@ PLATFORMS
|
|
|
118
143
|
DEPENDENCIES
|
|
119
144
|
bundler (~> 1.5)
|
|
120
145
|
coveralls
|
|
121
|
-
csv2psql!
|
|
122
146
|
jetel!
|
|
123
147
|
rake
|
|
124
148
|
rspec
|
data/README.md
CHANGED
|
@@ -7,22 +7,169 @@ Run `jetel`
|
|
|
7
7
|
```
|
|
8
8
|
$ jetel
|
|
9
9
|
NAME
|
|
10
|
-
jetel -
|
|
10
|
+
jetel - Simple custom made tool for data download and basic ETL
|
|
11
11
|
|
|
12
12
|
SYNOPSIS
|
|
13
13
|
jetel [global options] command [command options] [arguments...]
|
|
14
14
|
|
|
15
|
+
VERSION
|
|
16
|
+
0.0.8
|
|
17
|
+
|
|
15
18
|
GLOBAL OPTIONS
|
|
16
|
-
--
|
|
19
|
+
-d, --download_dir=download-dir - Download directory (default: data)
|
|
20
|
+
--help - Show this message
|
|
21
|
+
-l, --data_loader=data-loader - Data Loader (default: pg://jetel:jetel@localhost:5432/jetel)
|
|
22
|
+
-t, --timeout=download-timeout - Download timeout (default: 600)
|
|
23
|
+
--version - Display the program version
|
|
24
|
+
|
|
25
|
+
COMMANDS
|
|
26
|
+
config - Show config
|
|
27
|
+
geolite, Geolite - Module geolite
|
|
28
|
+
help - Shows a list of commands or help for one command
|
|
29
|
+
ip, Ip - Module ip
|
|
30
|
+
iso3166, Iso3166 - Module iso3166
|
|
31
|
+
modules - Print modules info
|
|
32
|
+
nga, Nga - Module nga
|
|
33
|
+
sfpd, Sfpd - Module sfpd
|
|
34
|
+
version - Print version info
|
|
35
|
+
wifileaks, Wifileaks - Module wifileaks
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Show help for command
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
$ jetel help geolite
|
|
42
|
+
NAME
|
|
43
|
+
geolite - Module geolite
|
|
44
|
+
|
|
45
|
+
SYNOPSIS
|
|
46
|
+
jetel [global options] geolite download
|
|
47
|
+
jetel [global options] geolite extract
|
|
48
|
+
jetel [global options] geolite load [--analyze_num_rows num] [--column_type column-name=column-type]
|
|
49
|
+
jetel [global options] geolite transform
|
|
17
50
|
|
|
18
51
|
COMMANDS
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
52
|
+
download - download geolite
|
|
53
|
+
extract - extract geolite
|
|
54
|
+
load - load geolite
|
|
55
|
+
transform - transform geolite
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
### Show help for subcommand
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
$ jetel help geolite download
|
|
63
|
+
NAME
|
|
64
|
+
download - download geolite
|
|
65
|
+
|
|
66
|
+
SYNOPSIS
|
|
67
|
+
jetel [global options] geolite download
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Show modules/sources
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
$ jetel modules
|
|
74
|
+
+-----------+---------------------------+
|
|
75
|
+
| Name | Class |
|
|
76
|
+
+-----------+---------------------------+
|
|
77
|
+
| geolite | Jetel::Modules::Geolite |
|
|
78
|
+
| ip | Jetel::Modules::Ip |
|
|
79
|
+
| iso3166 | Jetel::Modules::Iso3166 |
|
|
80
|
+
| nga | Jetel::Modules::Nga |
|
|
81
|
+
| sfpd | Jetel::Modules::Sfpd |
|
|
82
|
+
| wifileaks | Jetel::Modules::Wifileaks |
|
|
83
|
+
+-----------+---------------------------+
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Show loaders
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
$ bundle exec ./bin/jetel loaders
|
|
90
|
+
+-----------+---------------------------+
|
|
91
|
+
| Name | Class |
|
|
92
|
+
+-----------+---------------------------+
|
|
93
|
+
| couchbase | Jetel::Loaders::Couchbase |
|
|
94
|
+
| pg | Jetel::Loaders::Pg |
|
|
95
|
+
+-----------+---------------------------+
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Download source
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
$ jetel geolite download
|
|
102
|
+
Downloading http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip
|
|
103
|
+
aria2c -j 4 -t 600 -d "data/Geolite/geolite/downloaded" -o "GeoLite2-City-CSV.zip" http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip
|
|
104
|
+
|
|
105
|
+
11/06 17:51:35 [NOTICE] File already exists. Renamed to data/Geolite/geolite/downloaded/GeoLite2-City-CSV.zip.1.
|
|
106
|
+
|
|
107
|
+
11/06 17:51:35 [NOTICE] Allocating disk space. Use --file-allocation=none to disable it. See --file-allocation option in man page for more details.
|
|
108
|
+
|
|
109
|
+
11/06 17:51:48 [NOTICE] Download complete: data/Geolite/geolite/downloaded/GeoLite2-City-CSV.zip.1
|
|
110
|
+
|
|
111
|
+
Download Results:
|
|
112
|
+
gid |stat|avg speed |path/URI
|
|
113
|
+
======+====+===========+=======================================================
|
|
114
|
+
d0bf04|OK | 2.4MiB/s|data/Geolite/geolite/downloaded/GeoLite2-City-CSV.zip.1
|
|
115
|
+
|
|
116
|
+
Status Legend:
|
|
117
|
+
(OK):download completed.
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Extract source
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
$ jetel geolite extract
|
|
124
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Blocks-IPv6.csv
|
|
125
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-ja.csv
|
|
126
|
+
Extracting GeoLite2-City-CSV_20151103/COPYRIGHT.txt
|
|
127
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-zh-CN.csv
|
|
128
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Blocks-IPv4.csv
|
|
129
|
+
Extracting GeoLite2-City-CSV_20151103/LICENSE.txt
|
|
130
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-fr.csv
|
|
131
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-ru.csv
|
|
132
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-en.csv
|
|
133
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-pt-BR.csv
|
|
134
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-de.csv
|
|
135
|
+
Extracting GeoLite2-City-CSV_20151103/GeoLite2-City-Locations-es.csv
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Transform source
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
$ jetel geolite transform
|
|
142
|
+
Transforming data/Geolite/geolite/extracted/GeoLite2-City-Blocks-IPv4.csv
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Load source
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
$ jetel geolite load --analyze_num_rows 50000
|
|
149
|
+
DROP TABLE IF EXISTS "geolite";
|
|
150
|
+
CREATE TABLE "geolite"
|
|
151
|
+
(
|
|
152
|
+
"network" CIDR NOT NULL,
|
|
153
|
+
"geoname_id" BIGINT,
|
|
154
|
+
"registered_country_geoname_id" BIGINT,
|
|
155
|
+
"represented_country_geoname_id" TEXT,
|
|
156
|
+
"is_anonymous_proxy" BOOLEAN NOT NULL,
|
|
157
|
+
"is_satellite_provider" BOOLEAN NOT NULL,
|
|
158
|
+
"postal_code" TEXT,
|
|
159
|
+
"latitude" DECIMAL,
|
|
160
|
+
"longitude" DECIMAL
|
|
161
|
+
)
|
|
162
|
+
WITH (
|
|
163
|
+
OIDS=FALSE
|
|
164
|
+
);
|
|
165
|
+
COPY "geolite"
|
|
166
|
+
FROM STDIN
|
|
167
|
+
|
|
168
|
+
WITH DELIMITER ','
|
|
169
|
+
|
|
170
|
+
CSV HEADER
|
|
171
|
+
;
|
|
172
|
+
3037320 row(s) affected
|
|
26
173
|
```
|
|
27
174
|
|
|
28
175
|
## Structure
|
|
@@ -49,22 +196,12 @@ COMMANDS
|
|
|
49
196
|
└── test
|
|
50
197
|
```
|
|
51
198
|
|
|
52
|
-
## Examples
|
|
53
|
-
|
|
54
|
-
**Plays nicely with [csv2psql](https://github.com/korczis/csv2psql)**
|
|
55
|
-
|
|
56
|
-
```
|
|
57
|
-
$ csv2psql convert -t --drop-table --create-table -t afrinic tmp/Ip/afrinic/transformed/delegated-afrinic-latest | psql -h 127.0.0.1 -U jetel
|
|
58
|
-
|
|
59
|
-
$ csv2psql convert -t --drop-table --create-table -t apnic tmp/Ip/apnic/transformed/delegated-apnic-latest | psql -h 127.0.0.1 -U jetel
|
|
60
|
-
```
|
|
61
|
-
|
|
62
199
|
### Rake
|
|
63
200
|
|
|
64
201
|
```
|
|
65
|
-
|
|
66
|
-
rake build # Build jetel-0.0.
|
|
67
|
-
rake install # Build and install jetel-0.0.
|
|
68
|
-
rake install:local # Build and install jetel-0.0.
|
|
69
|
-
rake release # Create tag v0.0.
|
|
202
|
+
$ rake -T
|
|
203
|
+
rake gem:build # Build jetel-0.0.8.gem into the pkg directory
|
|
204
|
+
rake gem:install # Build and install jetel-0.0.8.gem into system gems
|
|
205
|
+
rake gem:install:local # Build and install jetel-0.0.8.gem into system gems without network access
|
|
206
|
+
rake gem:release # Create tag v0.0.8 and build and push jetel-0.0.8.gem to Rubygems
|
|
70
207
|
```
|
data/jetel.gemspec
CHANGED
|
@@ -20,7 +20,10 @@ Gem::Specification.new do |spec|
|
|
|
20
20
|
spec.require_paths = ['lib']
|
|
21
21
|
|
|
22
22
|
spec.add_dependency 'activesupport'
|
|
23
|
+
spec.add_dependency 'aws-sdk', '~> 2'
|
|
24
|
+
spec.add_dependency 'couchbase', '~> 1.3', '>= 1.3.14'
|
|
23
25
|
spec.add_dependency 'csv2psql', '~> 0.0.19'
|
|
26
|
+
spec.add_dependency 'elasticsearch', '~> 1.0', '>= 1.0.14'
|
|
24
27
|
spec.add_dependency 'gli'
|
|
25
28
|
spec.add_dependency 'i18n'
|
|
26
29
|
spec.add_dependency 'json_pure'
|
|
@@ -30,6 +33,7 @@ Gem::Specification.new do |spec|
|
|
|
30
33
|
spec.add_dependency 'pmap'
|
|
31
34
|
spec.add_dependency 'rubyzip'
|
|
32
35
|
spec.add_dependency 'terminal-table'
|
|
36
|
+
spec.add_dependency 'yajl-ruby', '~> 1.2', '>= 1.2.1'
|
|
33
37
|
spec.add_dependency 'zip'
|
|
34
38
|
|
|
35
39
|
spec.add_development_dependency 'bundler', '~> 1.5'
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'terminal-table'
|
|
4
|
+
|
|
5
|
+
require_relative '../../loaders/loaders'
|
|
6
|
+
|
|
7
|
+
LOADERS = Jetel::Loaders.loaders
|
|
8
|
+
|
|
9
|
+
desc 'Print loaders info'
|
|
10
|
+
command :loaders do |c|
|
|
11
|
+
c.action do |_global_options, _options, _args|
|
|
12
|
+
rows = LOADERS.map do |m|
|
|
13
|
+
[m[:name], m[:klass]]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
table = Terminal::Table.new :headings => %w(Name Class), :rows => rows
|
|
17
|
+
puts table
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/jetel/cli/shared.rb
CHANGED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
require_relative '../loader'
|
|
2
|
+
|
|
3
|
+
require_relative '../../helpers/helpers'
|
|
4
|
+
|
|
5
|
+
require 'couchbase'
|
|
6
|
+
require 'yajl/json_gem'
|
|
7
|
+
|
|
8
|
+
require 'securerandom'
|
|
9
|
+
|
|
10
|
+
module Jetel
|
|
11
|
+
module Loaders
|
|
12
|
+
class Couchbase < Loader
|
|
13
|
+
attr_reader :client
|
|
14
|
+
|
|
15
|
+
def initialize(uri)
|
|
16
|
+
super
|
|
17
|
+
|
|
18
|
+
tmp = uri.split('://')
|
|
19
|
+
tmp = tmp[1].split('@')
|
|
20
|
+
|
|
21
|
+
parts = tmp[0].split(':')
|
|
22
|
+
user = parts[0]
|
|
23
|
+
password = parts[1]
|
|
24
|
+
|
|
25
|
+
parts = tmp[1].split('/')
|
|
26
|
+
host, port = parts[0].split(':')
|
|
27
|
+
bucket = parts[1]
|
|
28
|
+
|
|
29
|
+
opts = {
|
|
30
|
+
:host => host,
|
|
31
|
+
:port => (port && port.to_i) || 8091,
|
|
32
|
+
# :options => '',
|
|
33
|
+
# :tty => '',
|
|
34
|
+
:bucket => bucket,
|
|
35
|
+
# :username => user,
|
|
36
|
+
# :password => password,
|
|
37
|
+
:connection_timeout => 360e6,
|
|
38
|
+
:timeout => 360e6
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@client = ::Couchbase.connect(opts)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def load(modul, source, file, opts)
|
|
45
|
+
super
|
|
46
|
+
|
|
47
|
+
cache = {}
|
|
48
|
+
CSV.open(file, 'rt', :headers => true, :converters => :all) do |csv|
|
|
49
|
+
csv.each do |row|
|
|
50
|
+
cache[SecureRandom.uuid] = row.to_hash
|
|
51
|
+
if cache.length === 5_000
|
|
52
|
+
client.add(cache)
|
|
53
|
+
cache = {}
|
|
54
|
+
print '.'
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
if cache.length > 0
|
|
59
|
+
client.add(cache)
|
|
60
|
+
cache = {}
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
require_relative '../loader'
|
|
2
|
+
|
|
3
|
+
require_relative '../../helpers/helpers'
|
|
4
|
+
|
|
5
|
+
require 'elasticsearch'
|
|
6
|
+
|
|
7
|
+
module Jetel
|
|
8
|
+
module Loaders
|
|
9
|
+
class Elasticsearch < Loader
|
|
10
|
+
attr_reader :client, :index, :document_type
|
|
11
|
+
|
|
12
|
+
def initialize(uri)
|
|
13
|
+
super
|
|
14
|
+
|
|
15
|
+
tmp = uri.split('://')
|
|
16
|
+
tmp = tmp[1].split('@')
|
|
17
|
+
|
|
18
|
+
parts = tmp[0].split(':')
|
|
19
|
+
user = parts[0]
|
|
20
|
+
password = parts[1]
|
|
21
|
+
|
|
22
|
+
parts = tmp[1].split('/')
|
|
23
|
+
host, port = parts[0].split(':')
|
|
24
|
+
@index, @document_type = parts[1], parts[2]
|
|
25
|
+
|
|
26
|
+
opts = {
|
|
27
|
+
:host => host,
|
|
28
|
+
:port => (port && port.to_i) || 9200,
|
|
29
|
+
# :options => '',
|
|
30
|
+
# :tty => '',
|
|
31
|
+
# :bucket => bucket,
|
|
32
|
+
# :username => user,
|
|
33
|
+
# :password => password,
|
|
34
|
+
# :connection_timeout => 360e6,
|
|
35
|
+
# :timeout => 360e6
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
@client = ::Elasticsearch::Client.new(opts)
|
|
39
|
+
|
|
40
|
+
puts client.cluster.health
|
|
41
|
+
|
|
42
|
+
# client.index index: index, type: document_type, body: {title: 'Test'}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def load(modul, source, file, opts)
|
|
46
|
+
super
|
|
47
|
+
|
|
48
|
+
cache = []
|
|
49
|
+
CSV.open(file, 'rt', :headers => true, :converters => :all) do |csv|
|
|
50
|
+
csv.each do |row|
|
|
51
|
+
cache << {
|
|
52
|
+
create: {
|
|
53
|
+
_index: @index,
|
|
54
|
+
_type: @document_type,
|
|
55
|
+
# _id: 1,
|
|
56
|
+
data: row.to_hash
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if cache.length === 5_000
|
|
60
|
+
client.bulk(body: cache)
|
|
61
|
+
cache = []
|
|
62
|
+
print '.'
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
if cache.length > 0
|
|
67
|
+
client.bulk(body: cache)
|
|
68
|
+
cache = []
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
data/lib/jetel/loaders/pg/pg.rb
CHANGED
|
@@ -107,9 +107,11 @@ module Jetel
|
|
|
107
107
|
|
|
108
108
|
# Display any error messages
|
|
109
109
|
while res = @client.get_result
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
e_message = res.error_message
|
|
111
|
+
if e_message && !e_message.empty?
|
|
112
|
+
puts e_message
|
|
112
113
|
end
|
|
114
|
+
puts "#{res.cmdtuples} row(s) affected"
|
|
113
115
|
end
|
|
114
116
|
|
|
115
117
|
sql
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'pmap'
|
|
4
|
+
|
|
5
|
+
require_relative '../../config/config'
|
|
6
|
+
require_relative '../../modules/module'
|
|
7
|
+
|
|
8
|
+
module Jetel
|
|
9
|
+
module Modules
|
|
10
|
+
class Alexa < Module
|
|
11
|
+
class << self
|
|
12
|
+
def sources
|
|
13
|
+
[
|
|
14
|
+
{
|
|
15
|
+
name: 'alexa',
|
|
16
|
+
# filename_downloaded: 'top-1m.csv.zip',
|
|
17
|
+
filename_extracted: 'top-1m.csv',
|
|
18
|
+
filename_transformed: 'top-1m.csv',
|
|
19
|
+
url: 'http://s3.amazonaws.com/alexa-static/top-1m.csv.zip'
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def download(global_options, options, args)
|
|
26
|
+
self.class.sources.pmap do |source|
|
|
27
|
+
download_source(source, global_options.merge(options))
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def extract(global_options, options, args)
|
|
32
|
+
self.class.sources.pmap do |source|
|
|
33
|
+
unzip(source, global_options.merge(options))
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def transform(global_options, options, args)
|
|
38
|
+
self.class.sources.pmap do |source|
|
|
39
|
+
extracted_file = extracted_file(source, global_options.merge(options))
|
|
40
|
+
transformed_file = transformed_file(source, global_options.merge(options))
|
|
41
|
+
dest_dir = transform_dir(source, global_options.merge(options))
|
|
42
|
+
|
|
43
|
+
puts "Transforming #{extracted_file}"
|
|
44
|
+
|
|
45
|
+
FileUtils.mkdir_p(dest_dir)
|
|
46
|
+
File.open(extracted_file, 'rt') do |fin|
|
|
47
|
+
File.open(transformed_file, 'wt') do |fout|
|
|
48
|
+
fout.puts('rank,url')
|
|
49
|
+
|
|
50
|
+
while buff = fin.read(4096)
|
|
51
|
+
fout.write(buff)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
data/lib/jetel/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: jetel
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tomas Korcak
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-11-
|
|
11
|
+
date: 2015-11-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activesupport
|
|
@@ -24,6 +24,40 @@ dependencies:
|
|
|
24
24
|
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: aws-sdk
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '2'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '2'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: couchbase
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.3'
|
|
48
|
+
- - ">="
|
|
49
|
+
- !ruby/object:Gem::Version
|
|
50
|
+
version: 1.3.14
|
|
51
|
+
type: :runtime
|
|
52
|
+
prerelease: false
|
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
54
|
+
requirements:
|
|
55
|
+
- - "~>"
|
|
56
|
+
- !ruby/object:Gem::Version
|
|
57
|
+
version: '1.3'
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: 1.3.14
|
|
27
61
|
- !ruby/object:Gem::Dependency
|
|
28
62
|
name: csv2psql
|
|
29
63
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -38,6 +72,26 @@ dependencies:
|
|
|
38
72
|
- - "~>"
|
|
39
73
|
- !ruby/object:Gem::Version
|
|
40
74
|
version: 0.0.19
|
|
75
|
+
- !ruby/object:Gem::Dependency
|
|
76
|
+
name: elasticsearch
|
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '1.0'
|
|
82
|
+
- - ">="
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
version: 1.0.14
|
|
85
|
+
type: :runtime
|
|
86
|
+
prerelease: false
|
|
87
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
88
|
+
requirements:
|
|
89
|
+
- - "~>"
|
|
90
|
+
- !ruby/object:Gem::Version
|
|
91
|
+
version: '1.0'
|
|
92
|
+
- - ">="
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
version: 1.0.14
|
|
41
95
|
- !ruby/object:Gem::Dependency
|
|
42
96
|
name: gli
|
|
43
97
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -164,6 +218,26 @@ dependencies:
|
|
|
164
218
|
- - ">="
|
|
165
219
|
- !ruby/object:Gem::Version
|
|
166
220
|
version: '0'
|
|
221
|
+
- !ruby/object:Gem::Dependency
|
|
222
|
+
name: yajl-ruby
|
|
223
|
+
requirement: !ruby/object:Gem::Requirement
|
|
224
|
+
requirements:
|
|
225
|
+
- - "~>"
|
|
226
|
+
- !ruby/object:Gem::Version
|
|
227
|
+
version: '1.2'
|
|
228
|
+
- - ">="
|
|
229
|
+
- !ruby/object:Gem::Version
|
|
230
|
+
version: 1.2.1
|
|
231
|
+
type: :runtime
|
|
232
|
+
prerelease: false
|
|
233
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
234
|
+
requirements:
|
|
235
|
+
- - "~>"
|
|
236
|
+
- !ruby/object:Gem::Version
|
|
237
|
+
version: '1.2'
|
|
238
|
+
- - ">="
|
|
239
|
+
- !ruby/object:Gem::Version
|
|
240
|
+
version: 1.2.1
|
|
167
241
|
- !ruby/object:Gem::Dependency
|
|
168
242
|
name: zip
|
|
169
243
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -288,6 +362,7 @@ files:
|
|
|
288
362
|
- lib/jetel/cli/app.rb
|
|
289
363
|
- lib/jetel/cli/cli.rb
|
|
290
364
|
- lib/jetel/cli/cmd/config_cmd.rb
|
|
365
|
+
- lib/jetel/cli/cmd/loaders_cmd.rb
|
|
291
366
|
- lib/jetel/cli/cmd/modules_cmd.rb
|
|
292
367
|
- lib/jetel/cli/cmd/version_cmd.rb
|
|
293
368
|
- lib/jetel/cli/shared.rb
|
|
@@ -302,6 +377,8 @@ files:
|
|
|
302
377
|
- lib/jetel/helpers/helpers.rb
|
|
303
378
|
- lib/jetel/jetel/jetel.rb
|
|
304
379
|
- lib/jetel/lib.rb
|
|
380
|
+
- lib/jetel/loaders/couchbase/couchbase.rb
|
|
381
|
+
- lib/jetel/loaders/elasticsearch/elasticsearch.rb
|
|
305
382
|
- lib/jetel/loaders/loader.rb
|
|
306
383
|
- lib/jetel/loaders/loaders.rb
|
|
307
384
|
- lib/jetel/loaders/pg/pg.rb
|
|
@@ -311,6 +388,7 @@ files:
|
|
|
311
388
|
- lib/jetel/loaders/pg/sql/header.sql.erb
|
|
312
389
|
- lib/jetel/loaders/pg/sql/schema.sql.erb
|
|
313
390
|
- lib/jetel/loaders/pg/sql/truncate_table.sql.erb
|
|
391
|
+
- lib/jetel/modules/alexa/alexa.rb
|
|
314
392
|
- lib/jetel/modules/geolite/geolite.rb
|
|
315
393
|
- lib/jetel/modules/ip/ip.rb
|
|
316
394
|
- lib/jetel/modules/iso3166/iso3166.rb
|
|
@@ -347,4 +425,3 @@ specification_version: 4
|
|
|
347
425
|
summary: Jetel
|
|
348
426
|
test_files:
|
|
349
427
|
- test/spec_helper.rb
|
|
350
|
-
has_rdoc:
|