embulk-input-hdfs 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c428003a976a3148f1b59e7dd54c7ec870ed3bce
|
4
|
+
data.tar.gz: a2941dce02f97452b54938bf73b5d1dceba5f4c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 02ca4fc8c3c82571296eb8da1d3a61533d0017d49c2712886e0872fbebf58ae8656412c61e3b6f1d6aaf71078ad1e886029a91907df92e236b2c56d3cbbb6083
|
7
|
+
data.tar.gz: f218db27f48822f33427ac0ebcccf5c58c41cdec92486cf9ceb1ddb240f5332744b8d65daf6a672e18488aa675c597ef56d34c994b7c04fe218a5e9c3428c91d
|
data/build.gradle
CHANGED
@@ -58,7 +58,7 @@ public class HdfsFileInputPlugin implements FileInputPlugin
|
|
58
58
|
|
59
59
|
@Config("num_partitions") // this parameter is the approximate value.
|
60
60
|
@ConfigDefault("-1") // Default: Runtime.getRuntime().availableProcessors()
|
61
|
-
public
|
61
|
+
public long getApproximateNumPartitions();
|
62
62
|
|
63
63
|
public List<HdfsPartialFile> getFiles();
|
64
64
|
public void setFiles(List<HdfsPartialFile> hdfsFiles);
|
@@ -246,25 +246,25 @@ public class HdfsFileInputPlugin implements FileInputPlugin
|
|
246
246
|
}
|
247
247
|
});
|
248
248
|
|
249
|
-
|
249
|
+
long totalFileLength = 0;
|
250
250
|
for (Path path : pathList) {
|
251
251
|
totalFileLength += fs.getFileStatus(path).getLen();
|
252
252
|
}
|
253
253
|
|
254
254
|
// TODO: optimum allocation of resources
|
255
|
-
|
255
|
+
long approximateNumPartitions =
|
256
256
|
(task.getApproximateNumPartitions() <= 0) ? Runtime.getRuntime().availableProcessors() : task.getApproximateNumPartitions();
|
257
|
-
|
257
|
+
long partitionSizeByOneTask = totalFileLength / approximateNumPartitions;
|
258
258
|
|
259
259
|
List<HdfsPartialFile> hdfsPartialFiles = new ArrayList<>();
|
260
260
|
for (Path path : pathList) {
|
261
|
-
|
261
|
+
long fileLength = fs.getFileStatus(path).getLen(); // declare `fileLength` here because this is used below.
|
262
262
|
if (fileLength <= 0) {
|
263
263
|
logger.info("embulk-input-hdfs: Skip the 0 byte target file: {}", path);
|
264
264
|
continue;
|
265
265
|
}
|
266
266
|
|
267
|
-
|
267
|
+
long numPartitions;
|
268
268
|
if (path.toString().endsWith(".gz") || path.toString().endsWith(".bz2") || path.toString().endsWith(".lzo")) {
|
269
269
|
numPartitions = 1;
|
270
270
|
}
|
@@ -14,9 +14,9 @@ public class HdfsFilePartitioner
|
|
14
14
|
{
|
15
15
|
private FileSystem fs;
|
16
16
|
private Path path;
|
17
|
-
private
|
17
|
+
private long numPartitions;
|
18
18
|
|
19
|
-
public HdfsFilePartitioner(FileSystem fs, Path path,
|
19
|
+
public HdfsFilePartitioner(FileSystem fs, Path path, long numPartitions)
|
20
20
|
{
|
21
21
|
this.fs = fs;
|
22
22
|
this.path = path;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-hdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- takahiro.nakayama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -82,7 +82,7 @@ files:
|
|
82
82
|
- classpath/curator-client-2.6.0.jar
|
83
83
|
- classpath/curator-framework-2.6.0.jar
|
84
84
|
- classpath/curator-recipes-2.6.0.jar
|
85
|
-
- classpath/embulk-input-hdfs-0.1.
|
85
|
+
- classpath/embulk-input-hdfs-0.1.8.jar
|
86
86
|
- classpath/gson-2.2.4.jar
|
87
87
|
- classpath/hadoop-annotations-2.6.0.jar
|
88
88
|
- classpath/hadoop-auth-2.6.0.jar
|