embulk-input-parquet_hadoop 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 11d3bfc5cf66805e9ce41966e90759d5acfbed8f
4
- data.tar.gz: 234ecd00864d9c122f01a95ab224c18bdff3ccea
3
+ metadata.gz: 26ed2eaecbcd68dc340a28050283d99e8d4328d8
4
+ data.tar.gz: cb043093611c02591c8cd013bef9bde4d17ad410
5
5
  SHA512:
6
- metadata.gz: 05e661e93e1e5c99edec29e2c83cd68d79f45e8c828afb0aeba822e44003057cf5deb1c69e14cf8eebd32755c19a06766c095e9dd0812bc3feee3f3ae4574c0a
7
- data.tar.gz: 9b1119067ba7eaeb18ee4ddaac2322881b6177fb3eab92995784745b72d90e6e0c9e60d0bc552afd652f6556392b008628e9065cbd8762ab48a2275cb2a62944
6
+ metadata.gz: ccd763d5484bbd3e34ea45c217dc3363b5229848194e5061981fba202300b76790fa2a453388ca51ffe81410e8950d8c8647855c784a49461b4537fffc9fa909
7
+ data.tar.gz: 6f12ead633dba51521b33154c5ae8b237e6372aeb00d689414e8fae89168e17cac5c08059580fca7532f012b7769f13ad804e82b55187c73cebe2377ee03a017
@@ -63,7 +63,8 @@ public class ConfigurationFactory
63
63
  try {
64
64
  logger.trace("embulk-input-parquet_hadoop: load a config file: {}", f);
65
65
  c.addResource(new File(f).toURI().toURL());
66
- } catch (MalformedURLException e) {
66
+ }
67
+ catch (MalformedURLException e) {
67
68
  throw new ConfigException(e);
68
69
  }
69
70
  }
@@ -18,6 +18,7 @@ package org.embulk.input.parquet_hadoop;
18
18
  import com.google.common.base.Function;
19
19
  import com.google.common.base.Throwables;
20
20
  import com.google.common.collect.Lists;
21
+ import jp.co.cyberagent.parquet.msgpack.read.MessagePackReadSupport;
21
22
  import org.apache.hadoop.conf.Configuration;
22
23
  import org.apache.hadoop.fs.FileStatus;
23
24
  import org.apache.hadoop.fs.FileSystem;
@@ -43,9 +44,9 @@ import org.embulk.spi.type.Types;
43
44
  import org.msgpack.value.Value;
44
45
  import org.slf4j.Logger;
45
46
  import org.slf4j.bridge.SLF4JBridgeHandler;
46
- import studio.adtech.parquet.msgpack.read.MessagePackReadSupport;
47
47
 
48
48
  import javax.annotation.Nullable;
49
+
49
50
  import java.io.IOException;
50
51
  import java.util.List;
51
52
  import java.util.logging.Level;
@@ -100,12 +101,14 @@ public class ParquetHadoopInputPlugin
100
101
  List<String> files = Lists.transform(statusList, new Function<FileStatus, String>() {
101
102
  @Nullable
102
103
  @Override
103
- public String apply(@Nullable FileStatus input) {
104
+ public String apply(@Nullable FileStatus input)
105
+ {
104
106
  return input.getPath().toString();
105
107
  }
106
108
  });
107
109
  task.setFiles(files);
108
- } catch (IOException e) {
110
+ }
111
+ catch (IOException e) {
109
112
  throw Throwables.propagate(e);
110
113
  }
111
114
 
@@ -152,7 +155,8 @@ public class ParquetHadoopInputPlugin
152
155
  ParquetRowReader<Value> reader;
153
156
  try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
154
157
  reader = new ParquetRowReader<>(conf, filePath, new MessagePackReadSupport());
155
- } catch (ParquetRuntimeException | IOException e) {
158
+ }
159
+ catch (ParquetRuntimeException | IOException e) {
156
160
  throw new DataException(e);
157
161
  }
158
162
 
@@ -160,7 +164,8 @@ public class ParquetHadoopInputPlugin
160
164
  while (true) {
161
165
  try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
162
166
  value = reader.read();
163
- } catch (ParquetRuntimeException | IOException e) {
167
+ }
168
+ catch (ParquetRuntimeException | IOException e) {
164
169
  throw new DataException(e);
165
170
  }
166
171
  if (value == null) {
@@ -175,7 +180,8 @@ public class ParquetHadoopInputPlugin
175
180
 
176
181
  try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
177
182
  reader.close();
178
- } catch (ParquetRuntimeException | IOException e) {
183
+ }
184
+ catch (ParquetRuntimeException | IOException e) {
179
185
  throw new DataException(e);
180
186
  }
181
187
  }
@@ -195,7 +201,8 @@ public class ParquetHadoopInputPlugin
195
201
  return new PageBuilder(Exec.getBufferAllocator(), schema, output);
196
202
  }
197
203
 
198
- private List<FileStatus> listFileStatuses(FileSystem fs, Path rootPath) throws IOException {
204
+ private List<FileStatus> listFileStatuses(FileSystem fs, Path rootPath) throws IOException
205
+ {
199
206
  List<FileStatus> fileStatuses = Lists.newArrayList();
200
207
 
201
208
  FileStatus[] entries = fs.globStatus(rootPath, HiddenFileFilter.INSTANCE);
@@ -207,7 +214,8 @@ public class ParquetHadoopInputPlugin
207
214
  if (entry.isDirectory()) {
208
215
  List<FileStatus> subEntries = listRecursive(fs, entry);
209
216
  fileStatuses.addAll(subEntries);
210
- } else {
217
+ }
218
+ else {
211
219
  fileStatuses.add(entry);
212
220
  }
213
221
  }
@@ -223,7 +231,8 @@ public class ParquetHadoopInputPlugin
223
231
  for (FileStatus entry : entries) {
224
232
  statusList.addAll(listRecursive(fs, entry));
225
233
  }
226
- } else {
234
+ }
235
+ else {
227
236
  statusList.add(status);
228
237
  }
229
238
  return statusList;
@@ -241,14 +250,16 @@ public class ParquetHadoopInputPlugin
241
250
  Level level;
242
251
  try {
243
252
  level = Level.parse(task.getParquetLogLevel());
244
- } catch (IllegalArgumentException e) {
253
+ }
254
+ catch (IllegalArgumentException e) {
245
255
  logger.warn("embulk-input-parquet_hadoop: Invalid parquet_log_level", e);
246
256
  level = Level.WARNING;
247
257
  }
248
258
  // invoke static initializer that overrides log level.
249
259
  try {
250
260
  Class.forName("org.apache.parquet.Log");
251
- } catch (ClassNotFoundException e) {
261
+ }
262
+ catch (ClassNotFoundException e) {
252
263
  logger.warn("", e);
253
264
  }
254
265
 
@@ -48,7 +48,8 @@ import java.util.List;
48
48
  import java.util.Map;
49
49
  import java.util.Set;
50
50
 
51
- public class ParquetRowReader<T> {
51
+ public class ParquetRowReader<T>
52
+ {
52
53
  private static final Logger logger = Exec.getLogger(ParquetRowReader.class);
53
54
 
54
55
  private final Path filePath;
@@ -69,7 +70,8 @@ public class ParquetRowReader<T> {
69
70
  private static final boolean strictTypeChecking = true;
70
71
  private static final FilterCompat.Filter filter = FilterCompat.NOOP;
71
72
 
72
- public ParquetRowReader(Configuration configuration, Path filePath, ReadSupport<T> readSupport) throws IOException {
73
+ public ParquetRowReader(Configuration configuration, Path filePath, ReadSupport<T> readSupport) throws IOException
74
+ {
73
75
  this.filePath = filePath;
74
76
 
75
77
  ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, filePath, ParquetMetadataConverter.NO_FILTER);
@@ -100,7 +102,8 @@ public class ParquetRowReader<T> {
100
102
  logger.info("ParquetRowReader initialized will read a total of " + total + " records.");
101
103
  }
102
104
 
103
- private void checkRead() throws IOException {
105
+ private void checkRead() throws IOException
106
+ {
104
107
  if (current == totalCountLoadedSoFar) {
105
108
  PageReadStore pages = reader.readNextRowGroup();
106
109
  if (pages == null) {
@@ -119,7 +122,8 @@ public class ParquetRowReader<T> {
119
122
  * @throws IOException
120
123
  * @throws ParquetDecodingException
121
124
  */
122
- public T read() throws IOException {
125
+ public T read() throws IOException
126
+ {
123
127
  T currentValue = null;
124
128
  boolean recordFound = false;
125
129
  while (!recordFound) {
@@ -134,7 +138,8 @@ public class ParquetRowReader<T> {
134
138
 
135
139
  try {
136
140
  currentValue = recordReader.read();
137
- } catch (RecordMaterializer.RecordMaterializationException e) {
141
+ }
142
+ catch (RecordMaterializer.RecordMaterializationException e) {
138
143
  // this might throw, but it's fatal if it does.
139
144
  unmaterializableRecordCounter.incErrors(e);
140
145
  logger.debug("skipping a corrupt record");
@@ -157,7 +162,8 @@ public class ParquetRowReader<T> {
157
162
  recordFound = true;
158
163
 
159
164
  logger.debug("read value: {}", currentValue);
160
- } catch (RuntimeException e) {
165
+ }
166
+ catch (RuntimeException e) {
161
167
  throw new ParquetDecodingException(
162
168
  String.format("Can not read value at %d in block %d in file %s", current, currentBlock, filePath), e);
163
169
  }
@@ -166,11 +172,13 @@ public class ParquetRowReader<T> {
166
172
  return currentValue;
167
173
  }
168
174
 
169
- public void close() throws IOException {
175
+ public void close() throws IOException
176
+ {
170
177
  reader.close();
171
178
  }
172
179
 
173
- private static <K, V> Map<K, Set<V>> toSetMultiMap(Map<K, V> map) {
180
+ private static <K, V> Map<K, Set<V>> toSetMultiMap(Map<K, V> map)
181
+ {
174
182
  Map<K, Set<V>> setMultiMap = new HashMap<>();
175
183
  for (Map.Entry<K, V> entry : map.entrySet()) {
176
184
  Set<V> set = new HashSet<>();
@@ -25,20 +25,23 @@ package org.embulk.input.parquet_hadoop;
25
25
  * hadoop jars is not in classpath of system class loader.
26
26
  * So we need to set context class loader to plugins' class loader.
27
27
  */
28
- class PluginClassLoaderScope implements AutoCloseable {
28
+ class PluginClassLoaderScope implements AutoCloseable
29
+ {
29
30
  private static final ClassLoader PLUGIN_CLASS_LOADER =
30
31
  ParquetHadoopInputPlugin.class.getClassLoader();
31
32
 
32
33
  private final ClassLoader original;
33
34
 
34
- public PluginClassLoaderScope() {
35
+ public PluginClassLoaderScope()
36
+ {
35
37
  Thread current = Thread.currentThread();
36
38
  this.original = current.getContextClassLoader();
37
39
  Thread.currentThread().setContextClassLoader(PLUGIN_CLASS_LOADER);
38
40
  }
39
41
 
40
42
  @Override
41
- public void close() {
43
+ public void close()
44
+ {
42
45
  Thread.currentThread().setContextClassLoader(original);
43
46
  }
44
47
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-parquet_hadoop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koji AGAWA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-08 00:00:00.000000000 Z
11
+ date: 2017-03-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -69,7 +69,7 @@ files:
69
69
  - classpath/curator-client-2.7.1.jar
70
70
  - classpath/curator-framework-2.7.1.jar
71
71
  - classpath/curator-recipes-2.7.1.jar
72
- - classpath/embulk-input-parquet_hadoop-0.1.0.jar
72
+ - classpath/embulk-input-parquet_hadoop-0.1.1.jar
73
73
  - classpath/gson-2.2.4.jar
74
74
  - classpath/hadoop-annotations-2.7.3.jar
75
75
  - classpath/hadoop-auth-2.7.3.jar
@@ -117,7 +117,7 @@ files:
117
117
  - classpath/parquet-format-2.3.0-incubating.jar
118
118
  - classpath/parquet-hadoop-1.8.1.jar
119
119
  - classpath/parquet-jackson-1.8.1.jar
120
- - classpath/parquet-msgpack-0.1.0.jar
120
+ - classpath/parquet-msgpack-0.1.1.jar
121
121
  - classpath/protobuf-java-2.5.0.jar
122
122
  - classpath/servlet-api-2.5.jar
123
123
  - classpath/slf4j-api-1.7.24.jar