embulk-output-orc 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f90685be3f76457be27d9ee129b56276a66dd42b
4
- data.tar.gz: ac33ac212a7bb6352aef8eb0caf310f5afd25bbe
3
+ metadata.gz: 725b869f19175110dd6e542619f5bfd9c8c0168b
4
+ data.tar.gz: 9c8016b015f380815cb2ad7127c192243ebfa45a
5
5
  SHA512:
6
- metadata.gz: aecde246d738967d2a91906560d9735ea11981e8f7349da57d7418a3ab08a7796fb5573f1e9b72b54ff6eee1a08f4ade83194024bfcf67ff6113ff6f570fb7b6
7
- data.tar.gz: 34b3eb0dc0c01388edf4310f5f55d00b0eab65b95b2def0a888867df8f79e0087c66d70e099d2af6327e57be03a80a75a6135cf281a58f5c4a0b8e22ed7caa1a
6
+ metadata.gz: 197da7bd52b19ccc2c3f30ad0d2c2c36446fa00e48426de9bb333b0ef2a8eddc213f0db947e50215297333a933e5d6d3b63ad0c0388f78e19d984fae6810bb99
7
+ data.tar.gz: ccf9bc5b8f73c8d0f48e7d17e16dd4b113b3f9b7a1b9178045109ef3476f4cb47f58e0699e4a9516ab81830d59a4bae3be2fba8f936c4cef0f705f6acc384f2d
data/README.md CHANGED
@@ -43,6 +43,12 @@ out:
43
43
 
44
44
  ## ChangeLog
45
45
 
46
+ ### ver 0.3.4
47
+
48
+ - Bump `orc` library to `1.5.4`
49
+ - bugfix
50
+ - https://github.com/yuokada/embulk-output-orc/pull/17
51
+
46
52
  ### ver 0.3.3
47
53
 
48
54
  - bugfix
@@ -18,7 +18,7 @@ configurations {
18
18
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
19
19
  }
20
20
 
21
- version = "0.3.3"
21
+ version = "0.3.4"
22
22
 
23
23
  sourceCompatibility = 1.8
24
24
  targetCompatibility = 1.8
@@ -27,8 +27,8 @@ dependencies {
27
27
  compile "org.embulk:embulk-core:0.8.34"
28
28
  provided "org.embulk:embulk-core:0.8.34"
29
29
 
30
- compile "org.apache.orc:orc:1.4.4"
31
- compile "org.apache.orc:orc-core:1.4.4"
30
+ compile "org.apache.orc:orc:1.5.4"
31
+ compile "org.apache.orc:orc-core:1.5.4"
32
32
  compile "org.apache.hadoop:hadoop-hdfs:2.7.5"
33
33
 
34
34
  compile 'org.embulk.input.s3:embulk-util-aws-credentials:0.2.8'
@@ -9,6 +9,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
9
9
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
10
10
  import org.apache.hadoop.util.VersionInfo;
11
11
  import org.apache.orc.CompressionKind;
12
+ import org.apache.orc.MemoryManager;
12
13
  import org.apache.orc.OrcFile;
13
14
  import org.apache.orc.TypeDescription;
14
15
  import org.apache.orc.Writer;
@@ -163,6 +164,7 @@ public class OrcOutputPlugin
163
164
  writer = OrcFile.createWriter(
164
165
  new Path(buildPath(task, processorIndex)),
165
166
  writerOptions.setSchema(oschema)
167
+ .memory(new WriterLocalMemoryManager())
166
168
  .version(OrcFile.Version.V_0_12)
167
169
  );
168
170
  }
@@ -201,33 +203,31 @@ public class OrcOutputPlugin
201
203
  @Override
202
204
  public void add(Page page)
203
205
  {
204
- synchronized (this) {
205
- try {
206
- // int size = page.getStringReferences().size();
207
- final TypeDescription schema = getSchema(reader.getSchema());
208
- final VectorizedRowBatch batch = schema.createRowBatch();
209
- // batch.size = size;
210
-
211
- reader.setPage(page);
212
- while (reader.nextRecord()) {
213
- final int row = batch.size++;
214
- reader.getSchema().visitColumns(
215
- new OrcColumnVisitor(reader, batch, row)
216
- );
217
- if (batch.size >= batch.getMaxSize()) {
218
- writer.addRowBatch(batch);
219
- batch.reset();
220
- }
221
- }
222
- if (batch.size != 0) {
206
+ try {
207
+ // int size = page.getStringReferences().size();
208
+ final TypeDescription schema = getSchema(reader.getSchema());
209
+ final VectorizedRowBatch batch = schema.createRowBatch();
210
+ // batch.size = size;
211
+
212
+ reader.setPage(page);
213
+ while (reader.nextRecord()) {
214
+ final int row = batch.size++;
215
+ reader.getSchema().visitColumns(
216
+ new OrcColumnVisitor(reader, batch, row)
217
+ );
218
+ if (batch.size >= batch.getMaxSize()) {
223
219
  writer.addRowBatch(batch);
224
220
  batch.reset();
225
221
  }
226
222
  }
227
- catch (IOException e) {
228
- e.printStackTrace();
223
+ if (batch.size != 0) {
224
+ writer.addRowBatch(batch);
225
+ batch.reset();
229
226
  }
230
227
  }
228
+ catch (IOException e) {
229
+ e.printStackTrace();
230
+ }
231
231
  }
232
232
 
233
233
  @Override
@@ -257,4 +257,42 @@ public class OrcOutputPlugin
257
257
  return Exec.newTaskReport();
258
258
  }
259
259
  }
260
+
261
+ // We avoid using orc.MemoryManagerImpl since it is not threadsafe, but embulk is multi-threaded.
262
+ // Embulk creates and uses multiple instances of TransactionalPageOutput in worker threads.
263
+ // As a workaround, WriterLocalMemoryManager is bound to a single orc.Writer instance, and
264
+ // notifies checkMemory() only to that instance.
265
+ private static class WriterLocalMemoryManager implements MemoryManager
266
+ {
267
+ final long rowsBetweenChecks = 10000;
268
+
269
+ private int rowsAddedSinceCheck = 0;
270
+ Callback boundCallback = null;
271
+
272
+ @Override
273
+ public void addWriter(Path path, long requestedAllocation, Callback callback) throws IOException
274
+ {
275
+ if (boundCallback != null) {
276
+ throw new IllegalStateException("WriterLocalMemoryManager should be bound to a single orc.Writer instance.");
277
+ }
278
+
279
+ boundCallback = callback;
280
+ }
281
+
282
+ @Override
283
+ public void removeWriter(Path path) throws IOException
284
+ {
285
+ boundCallback = null;
286
+ }
287
+
288
+ @Override
289
+ public void addedRow(int rows) throws IOException
290
+ {
291
+ rowsAddedSinceCheck += rows;
292
+ if (rowsAddedSinceCheck > rowsBetweenChecks) {
293
+ boundCallback.checkMemory(1);
294
+ rowsAddedSinceCheck = 0;
295
+ }
296
+ }
297
+ }
260
298
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-10 00:00:00.000000000 Z
11
+ date: 2019-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -50,8 +50,7 @@ files:
50
50
  - LICENSE.txt
51
51
  - README.md
52
52
  - build.gradle
53
- - classpath/activation-1.1.jar
54
- - classpath/aircompressor-0.8.jar
53
+ - classpath/aircompressor-0.10.jar
55
54
  - classpath/animal-sniffer-annotations-1.14.jar
56
55
  - classpath/apacheds-i18n-2.0.0-M15.jar
57
56
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
@@ -81,7 +80,7 @@ files:
81
80
  - classpath/curator-client-2.7.1.jar
82
81
  - classpath/curator-framework-2.7.1.jar
83
82
  - classpath/curator-recipes-2.7.1.jar
84
- - classpath/embulk-output-orc-0.3.3.jar
83
+ - classpath/embulk-output-orc-0.3.4.jar
85
84
  - classpath/embulk-util-aws-credentials-0.2.8.jar
86
85
  - classpath/error_prone_annotations-2.1.3.jar
87
86
  - classpath/gson-2.2.4.jar
@@ -91,7 +90,7 @@ files:
91
90
  - classpath/hadoop-aws-2.7.5.jar
92
91
  - classpath/hadoop-common-2.7.5.jar
93
92
  - classpath/hadoop-hdfs-2.7.5.jar
94
- - classpath/hive-storage-api-2.2.1.jar
93
+ - classpath/hive-storage-api-2.6.0.jar
95
94
  - classpath/htrace-core-3.1.0-incubating.jar
96
95
  - classpath/httpclient-4.3.6.jar
97
96
  - classpath/httpcore-4.3.3.jar
@@ -101,7 +100,7 @@ files:
101
100
  - classpath/jackson-mapper-asl-1.9.13.jar
102
101
  - classpath/jackson-xc-1.8.3.jar
103
102
  - classpath/java-xmlbuilder-0.4.jar
104
- - classpath/jaxb-api-2.2.2.jar
103
+ - classpath/jaxb-api-2.2.11.jar
105
104
  - classpath/jaxb-impl-2.2.3-1.jar
106
105
  - classpath/jcl-over-slf4j-1.7.12.jar
107
106
  - classpath/jersey-core-1.9.jar
@@ -120,13 +119,13 @@ files:
120
119
  - classpath/log4j-1.2.17.jar
121
120
  - classpath/netty-3.7.0.Final.jar
122
121
  - classpath/netty-all-4.0.23.Final.jar
123
- - classpath/orc-core-1.4.4.jar
122
+ - classpath/orc-core-1.5.4.jar
123
+ - classpath/orc-shims-1.5.4.jar
124
124
  - classpath/paranamer-2.3.jar
125
125
  - classpath/protobuf-java-2.5.0.jar
126
126
  - classpath/servlet-api-2.5-20081211.jar
127
127
  - classpath/servlet-api-2.5.jar
128
128
  - classpath/snappy-java-1.0.4.1.jar
129
- - classpath/stax-api-1.0-2.jar
130
129
  - classpath/xercesImpl-2.9.1.jar
131
130
  - classpath/xml-apis-1.3.04.jar
132
131
  - classpath/xmlenc-0.52.jar