embulk-input-hdfs 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e125254c0d5e9b6837c42a6dc3a6701c8897395
4
- data.tar.gz: 1b2c5aba3844aabd1e7d7eb1af7a177eb9ea8ae0
3
+ metadata.gz: c428003a976a3148f1b59e7dd54c7ec870ed3bce
4
+ data.tar.gz: a2941dce02f97452b54938bf73b5d1dceba5f4c5
5
5
  SHA512:
6
- metadata.gz: 70d2eb9a08bcd17da14a3c4815d2cb88ab6941cd33d7a08d913e1006349d4690a28a3994797120edec3aab7c9a6978aaf4368419cae674b5fce65b5c33b0039c
7
- data.tar.gz: 65a03af4289551d279a60b028b77602d606c6a7816ec328fd910c5f6d0a171833260675350766e9f22f5426781a9a9cd0cf6e275780ba43a57c8b7ae105786bf
6
+ metadata.gz: 02ca4fc8c3c82571296eb8da1d3a61533d0017d49c2712886e0872fbebf58ae8656412c61e3b6f1d6aaf71078ad1e886029a91907df92e236b2c56d3cbbb6083
7
+ data.tar.gz: f218db27f48822f33427ac0ebcccf5c58c41cdec92486cf9ceb1ddb240f5332744b8d65daf6a672e18488aa675c597ef56d34c994b7c04fe218a5e9c3428c91d
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.1.7"
15
+ version = "0.1.8"
16
16
 
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
@@ -58,7 +58,7 @@ public class HdfsFileInputPlugin implements FileInputPlugin
58
58
 
59
59
  @Config("num_partitions") // this parameter is the approximate value.
60
60
  @ConfigDefault("-1") // Default: Runtime.getRuntime().availableProcessors()
61
- public int getApproximateNumPartitions();
61
+ public long getApproximateNumPartitions();
62
62
 
63
63
  public List<HdfsPartialFile> getFiles();
64
64
  public void setFiles(List<HdfsPartialFile> hdfsFiles);
@@ -246,25 +246,25 @@ public class HdfsFileInputPlugin implements FileInputPlugin
246
246
  }
247
247
  });
248
248
 
249
- int totalFileLength = 0;
249
+ long totalFileLength = 0;
250
250
  for (Path path : pathList) {
251
251
  totalFileLength += fs.getFileStatus(path).getLen();
252
252
  }
253
253
 
254
254
  // TODO: optimum allocation of resources
255
- int approximateNumPartitions =
255
+ long approximateNumPartitions =
256
256
  (task.getApproximateNumPartitions() <= 0) ? Runtime.getRuntime().availableProcessors() : task.getApproximateNumPartitions();
257
- int partitionSizeByOneTask = totalFileLength / approximateNumPartitions;
257
+ long partitionSizeByOneTask = totalFileLength / approximateNumPartitions;
258
258
 
259
259
  List<HdfsPartialFile> hdfsPartialFiles = new ArrayList<>();
260
260
  for (Path path : pathList) {
261
- int fileLength = (int) fs.getFileStatus(path).getLen(); // declare `fileLength` here because this is used below.
261
+ long fileLength = fs.getFileStatus(path).getLen(); // declare `fileLength` here because this is used below.
262
262
  if (fileLength <= 0) {
263
263
  logger.info("embulk-input-hdfs: Skip the 0 byte target file: {}", path);
264
264
  continue;
265
265
  }
266
266
 
267
- int numPartitions;
267
+ long numPartitions;
268
268
  if (path.toString().endsWith(".gz") || path.toString().endsWith(".bz2") || path.toString().endsWith(".lzo")) {
269
269
  numPartitions = 1;
270
270
  }
@@ -14,9 +14,9 @@ public class HdfsFilePartitioner
14
14
  {
15
15
  private FileSystem fs;
16
16
  private Path path;
17
- private int numPartitions;
17
+ private long numPartitions;
18
18
 
19
- public HdfsFilePartitioner(FileSystem fs, Path path, int numPartitions)
19
+ public HdfsFilePartitioner(FileSystem fs, Path path, long numPartitions)
20
20
  {
21
21
  this.fs = fs;
22
22
  this.path = path;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-hdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - takahiro.nakayama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-01 00:00:00.000000000 Z
11
+ date: 2015-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -82,7 +82,7 @@ files:
82
82
  - classpath/curator-client-2.6.0.jar
83
83
  - classpath/curator-framework-2.6.0.jar
84
84
  - classpath/curator-recipes-2.6.0.jar
85
- - classpath/embulk-input-hdfs-0.1.7.jar
85
+ - classpath/embulk-input-hdfs-0.1.8.jar
86
86
  - classpath/gson-2.2.4.jar
87
87
  - classpath/hadoop-annotations-2.6.0.jar
88
88
  - classpath/hadoop-auth-2.6.0.jar