embulk-output-orc 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -2
- data/build.gradle +1 -1
- data/example/example.yml +4 -6
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +26 -19
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b6323f654a352230f551c23017a85e9ceee97ef
|
4
|
+
data.tar.gz: 400429eadfbb55ddaaaa2e954bddb54306a31e78
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8359917c8f9b429189faec83b51aa3fee3abfbfc74130eef9b83489b46e8be4ff40a2ca273c7c17a6dd770bb329cb25195c1e3c85bf103b46acee5b952c7cc66
|
7
|
+
data.tar.gz: 3910abc4803eec8a36fc275229a4ff3e5b0f14c4fbf1bdea7071cd49c62b2782025acef0d485987509ea15723713a822abc3a306e570e76c57df2c951be22925
|
data/README.md
CHANGED
@@ -36,14 +36,19 @@
|
|
36
36
|
out:
|
37
37
|
type: orc
|
38
38
|
path_prefix: "/tmp/output"
|
39
|
-
buffer_size: 8000
|
40
|
-
strip_size: 90000
|
41
39
|
compression_kind: ZLIB
|
42
40
|
overwrite: true
|
43
41
|
```
|
44
42
|
|
45
43
|
## ChangeLog
|
46
44
|
|
45
|
+
### ver 0.3.0
|
46
|
+
|
47
|
+
- Change default value : (block_size, buffer_size, strip_size)
|
48
|
+
|
49
|
+
- default value is Hive's default value.
|
50
|
+
(see: https://orc.apache.org/docs/hive-config.html)
|
51
|
+
|
47
52
|
### ver 0.2.0
|
48
53
|
|
49
54
|
- support: output to s3
|
data/build.gradle
CHANGED
data/example/example.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
in:
|
3
3
|
type: randomj
|
4
|
-
rows:
|
4
|
+
rows: 1024000
|
5
5
|
threads: 1
|
6
6
|
# default_timezone: Asia/Tokyo
|
7
7
|
primary_key: myid
|
@@ -14,14 +14,12 @@ in:
|
|
14
14
|
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
15
15
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
exec:
|
18
|
+
max_threads: 2 # run at most 8 tasks concurrently
|
19
|
+
min_output_tasks: 1 # disable page scattering
|
20
20
|
|
21
21
|
out:
|
22
22
|
type: orc
|
23
23
|
overwrite: true
|
24
24
|
path_prefix: "/tmp/output"
|
25
|
-
buffer_size: 8000
|
26
|
-
strip_size: 90000
|
27
25
|
compression_kind: ZLIB
|
@@ -197,25 +197,32 @@ public class OrcOutputPlugin
|
|
197
197
|
@Override
|
198
198
|
public void add(Page page)
|
199
199
|
{
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
200
|
+
synchronized (this) {
|
201
|
+
try {
|
202
|
+
// int size = page.getStringReferences().size();
|
203
|
+
final TypeDescription schema = getSchema(reader.getSchema());
|
204
|
+
final VectorizedRowBatch batch = schema.createRowBatch();
|
205
|
+
// batch.size = size;
|
206
|
+
|
207
|
+
reader.setPage(page);
|
208
|
+
while (reader.nextRecord()) {
|
209
|
+
final int row = batch.size++;
|
210
|
+
reader.getSchema().visitColumns(
|
211
|
+
new OrcColumnVisitor(reader, batch, row)
|
212
|
+
);
|
213
|
+
if (batch.size >= batch.getMaxSize()) {
|
214
|
+
writer.addRowBatch(batch);
|
215
|
+
batch.reset();
|
216
|
+
}
|
217
|
+
}
|
218
|
+
if (batch.size != 0) {
|
219
|
+
writer.addRowBatch(batch);
|
220
|
+
batch.reset();
|
221
|
+
}
|
222
|
+
}
|
223
|
+
catch (IOException e) {
|
224
|
+
e.printStackTrace();
|
225
|
+
}
|
219
226
|
}
|
220
227
|
}
|
221
228
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-12-
|
11
|
+
date: 2017-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,7 +94,7 @@ files:
|
|
94
94
|
- classpath/curator-client-2.7.1.jar
|
95
95
|
- classpath/curator-framework-2.7.1.jar
|
96
96
|
- classpath/curator-recipes-2.7.1.jar
|
97
|
-
- classpath/embulk-output-orc-0.3.
|
97
|
+
- classpath/embulk-output-orc-0.3.1.jar
|
98
98
|
- classpath/embulk-util-aws-credentials-0.2.8.jar
|
99
99
|
- classpath/gson-2.2.4.jar
|
100
100
|
- classpath/hadoop-annotations-2.7.3.jar
|