embulk-output-orc 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -2
- data/build.gradle +1 -1
- data/example/example.yml +4 -6
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +26 -19
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b6323f654a352230f551c23017a85e9ceee97ef
|
4
|
+
data.tar.gz: 400429eadfbb55ddaaaa2e954bddb54306a31e78
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8359917c8f9b429189faec83b51aa3fee3abfbfc74130eef9b83489b46e8be4ff40a2ca273c7c17a6dd770bb329cb25195c1e3c85bf103b46acee5b952c7cc66
|
7
|
+
data.tar.gz: 3910abc4803eec8a36fc275229a4ff3e5b0f14c4fbf1bdea7071cd49c62b2782025acef0d485987509ea15723713a822abc3a306e570e76c57df2c951be22925
|
data/README.md
CHANGED
@@ -36,14 +36,19 @@
|
|
36
36
|
out:
|
37
37
|
type: orc
|
38
38
|
path_prefix: "/tmp/output"
|
39
|
-
buffer_size: 8000
|
40
|
-
strip_size: 90000
|
41
39
|
compression_kind: ZLIB
|
42
40
|
overwrite: true
|
43
41
|
```
|
44
42
|
|
45
43
|
## ChangeLog
|
46
44
|
|
45
|
+
### ver 0.3.0
|
46
|
+
|
47
|
+
- Change default value : (block_size, buffer_size, strip_size)
|
48
|
+
|
49
|
+
- default value is Hive's default value.
|
50
|
+
(see: https://orc.apache.org/docs/hive-config.html)
|
51
|
+
|
47
52
|
### ver 0.2.0
|
48
53
|
|
49
54
|
- support: output to s3
|
data/build.gradle
CHANGED
data/example/example.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
in:
|
3
3
|
type: randomj
|
4
|
-
rows:
|
4
|
+
rows: 1024000
|
5
5
|
threads: 1
|
6
6
|
# default_timezone: Asia/Tokyo
|
7
7
|
primary_key: myid
|
@@ -14,14 +14,12 @@ in:
|
|
14
14
|
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
15
15
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
exec:
|
18
|
+
max_threads: 2 # run at most 8 tasks concurrently
|
19
|
+
min_output_tasks: 1 # disable page scattering
|
20
20
|
|
21
21
|
out:
|
22
22
|
type: orc
|
23
23
|
overwrite: true
|
24
24
|
path_prefix: "/tmp/output"
|
25
|
-
buffer_size: 8000
|
26
|
-
strip_size: 90000
|
27
25
|
compression_kind: ZLIB
|
@@ -197,25 +197,32 @@ public class OrcOutputPlugin
|
|
197
197
|
@Override
|
198
198
|
public void add(Page page)
|
199
199
|
{
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
200
|
+
synchronized (this) {
|
201
|
+
try {
|
202
|
+
// int size = page.getStringReferences().size();
|
203
|
+
final TypeDescription schema = getSchema(reader.getSchema());
|
204
|
+
final VectorizedRowBatch batch = schema.createRowBatch();
|
205
|
+
// batch.size = size;
|
206
|
+
|
207
|
+
reader.setPage(page);
|
208
|
+
while (reader.nextRecord()) {
|
209
|
+
final int row = batch.size++;
|
210
|
+
reader.getSchema().visitColumns(
|
211
|
+
new OrcColumnVisitor(reader, batch, row)
|
212
|
+
);
|
213
|
+
if (batch.size >= batch.getMaxSize()) {
|
214
|
+
writer.addRowBatch(batch);
|
215
|
+
batch.reset();
|
216
|
+
}
|
217
|
+
}
|
218
|
+
if (batch.size != 0) {
|
219
|
+
writer.addRowBatch(batch);
|
220
|
+
batch.reset();
|
221
|
+
}
|
222
|
+
}
|
223
|
+
catch (IOException e) {
|
224
|
+
e.printStackTrace();
|
225
|
+
}
|
219
226
|
}
|
220
227
|
}
|
221
228
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-12-
|
11
|
+
date: 2017-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,7 +94,7 @@ files:
|
|
94
94
|
- classpath/curator-client-2.7.1.jar
|
95
95
|
- classpath/curator-framework-2.7.1.jar
|
96
96
|
- classpath/curator-recipes-2.7.1.jar
|
97
|
-
- classpath/embulk-output-orc-0.3.
|
97
|
+
- classpath/embulk-output-orc-0.3.1.jar
|
98
98
|
- classpath/embulk-util-aws-credentials-0.2.8.jar
|
99
99
|
- classpath/gson-2.2.4.jar
|
100
100
|
- classpath/hadoop-annotations-2.7.3.jar
|