embulk-input-hdfs 0.1.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b559162cba6af0dd036310522baf9559ac4ebcf3
4
- data.tar.gz: 07f1fc7beb1205ba2baf4984c3495a3942514f68
3
+ metadata.gz: aa9425d56cb955c999bdfc8f307004f260065797
4
+ data.tar.gz: d11249a30d11ad70595d5961383f39d626290f4e
5
5
  SHA512:
6
- metadata.gz: 85a7d5b8ba72ed14787881251084edb5e0f59e6424c17377c2545fa9f0e4c95444f3b7b40a61c1b30636d228c6973fec517f4a10be0b6338b3f73b0c8524abd5
7
- data.tar.gz: 898da79bf93d26349a4916d5118980921e90829f5b6745e53678d661fb87d632a0ab4e2b9974ca5bf713fae62a58b305d2ea153675de88c5c4a05c2780f3ea8a
6
+ metadata.gz: 46e6e225a7cc1acf6a1396ecdf72b8fcce1b0679196bfe1bfd18baec6015d602dbedbc22e3bda74f2ce916ddbce8043152ed5d93302850ca1a79cc54f07a1fb8
7
+ data.tar.gz: e49523b895c4d11e10e25295f4298d5f8c2113c1d7eefc48e3a8862fadcea7eaa51c1e677b26e5d24459198da66b58668184d6c4b14f5b86878cba6dbc1384fa
data/README.md CHANGED
@@ -12,7 +12,7 @@ Read files on Hdfs.
12
12
 
13
13
  - **config_files** list of paths to Hadoop's configuration files (array of strings, default: `[]`)
14
14
  - **config** overwrites configuration parameters (hash, default: `{}`)
15
- - **input_path** file path on Hdfs. you can use glob and Date format like `%Y%m%d/%s`.
15
+ - **path** file path on Hdfs. you can use glob and Date format like `%Y%m%d/%s`.
16
16
  - **rewind_seconds** When you use Date format in input_path property, the format is executed by using the time which is Now minus this property.
17
17
  - **partition** when this is true, partition input files and increase task count. (default: `true`)
18
18
  - **num_partitions** number of partitions. (default: `Runtime.getRuntime().availableProcessors()`)
@@ -30,7 +30,7 @@ in:
30
30
  dfs.replication: 1
31
31
  fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
32
32
  fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
33
- input_path: /user/embulk/test/%Y-%m-%d/*
33
+ path: /user/embulk/test/%Y-%m-%d/*
34
34
  rewind_seconds: 86400
35
35
  partition: true
36
36
  num_partitions: 30
@@ -12,7 +12,7 @@ configurations {
12
12
  provided
13
13
  }
14
14
 
15
- version = "0.1.1"
15
+ version = "0.1.4"
16
16
 
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
@@ -1,34 +1,34 @@
1
1
  package org.embulk.input.hdfs;
2
2
 
3
- import java.io.IOException;
4
- import java.io.InputStream;
5
- import java.util.List;
6
- import java.util.ArrayList;
7
- import java.util.Map;
8
-
9
3
  import com.google.common.base.Function;
10
- import com.google.common.base.Optional;
11
- import com.google.common.collect.ImmutableList;
12
4
  import com.google.common.collect.Lists;
13
5
  import org.apache.hadoop.conf.Configuration;
14
6
  import org.apache.hadoop.fs.FileStatus;
15
7
  import org.apache.hadoop.fs.FileSystem;
16
8
  import org.apache.hadoop.fs.Path;
17
- import org.embulk.config.TaskReport;
9
+ import org.apache.hadoop.fs.PathNotFoundException;
18
10
  import org.embulk.config.Config;
19
11
  import org.embulk.config.ConfigDefault;
20
- import org.embulk.config.ConfigInject;
21
12
  import org.embulk.config.ConfigDiff;
13
+ import org.embulk.config.ConfigInject;
22
14
  import org.embulk.config.ConfigSource;
23
15
  import org.embulk.config.Task;
16
+ import org.embulk.config.TaskReport;
24
17
  import org.embulk.config.TaskSource;
25
- import org.embulk.spi.*;
26
- import org.embulk.spi.util.InputStreamFileInput;
18
+ import org.embulk.spi.BufferAllocator;
19
+ import org.embulk.spi.Exec;
20
+ import org.embulk.spi.FileInputPlugin;
21
+ import org.embulk.spi.TransactionalFileInput;
27
22
  import org.embulk.spi.util.InputStreamTransactionalFileInput;
28
23
  import org.jruby.embed.ScriptingContainer;
29
24
  import org.slf4j.Logger;
30
25
 
31
26
  import javax.annotation.Nullable;
27
+ import java.io.IOException;
28
+ import java.io.InputStream;
29
+ import java.util.ArrayList;
30
+ import java.util.List;
31
+ import java.util.Map;
32
32
 
33
33
  public class HdfsFileInputPlugin implements FileInputPlugin
34
34
  {
@@ -44,8 +44,8 @@ public class HdfsFileInputPlugin implements FileInputPlugin
44
44
  @ConfigDefault("{}")
45
45
  public Map<String, String> getConfig();
46
46
 
47
- @Config("input_path")
48
- public String getInputPath();
47
+ @Config("path")
48
+ public String getPath();
49
49
 
50
50
  @Config("rewind_seconds")
51
51
  @ConfigDefault("0")
@@ -72,9 +72,14 @@ public class HdfsFileInputPlugin implements FileInputPlugin
72
72
  PluginTask task = config.loadConfig(PluginTask.class);
73
73
 
74
74
  // listing Files
75
- String pathString = strftime(task.getInputPath(), task.getRewindSeconds());
75
+ String pathString = strftime(task.getPath(), task.getRewindSeconds());
76
76
  try {
77
77
  List<String> originalFileList = buildFileList(getFs(task), pathString);
78
+
79
+ if (originalFileList.isEmpty()) {
80
+ throw new PathNotFoundException(pathString);
81
+ }
82
+
78
83
  task.setFiles(allocateHdfsFilesToTasks(task, getFs(task), originalFileList));
79
84
  logger.info("Loading target files: {}", originalFileList);
80
85
  }
@@ -190,13 +195,17 @@ public class HdfsFileInputPlugin implements FileInputPlugin
190
195
  throws IOException
191
196
  {
192
197
  List<String> fileList = new ArrayList<>();
193
- for (FileStatus entry : fs.globStatus(new Path(pathString))) {
198
+ Path rootPath = new Path(pathString);
199
+
200
+ for (FileStatus entry : fs.globStatus(rootPath)) {
194
201
  if (entry.isDirectory()) {
195
202
  fileList.addAll(lsr(fs, entry));
196
- } else {
203
+ }
204
+ else {
197
205
  fileList.add(entry.getPath().toString());
198
206
  }
199
207
  }
208
+
200
209
  return fileList;
201
210
  }
202
211
 
@@ -117,11 +117,12 @@ public class HdfsPartialFileInputStream extends InputStream
117
117
  {
118
118
  if (current >= start) {
119
119
  return;
120
-
121
120
  }
121
+
122
122
  if (start == 0) {
123
123
  current = 0;
124
- } else {
124
+ }
125
+ else {
125
126
  current = original.skip(--start);
126
127
  if (current != start) {
127
128
  throw new IOException("Cannot skip.");
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-hdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - takahiro.nakayama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-09 00:00:00.000000000 Z
11
+ date: 2015-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -82,7 +82,7 @@ files:
82
82
  - classpath/curator-client-2.6.0.jar
83
83
  - classpath/curator-framework-2.6.0.jar
84
84
  - classpath/curator-recipes-2.6.0.jar
85
- - classpath/embulk-input-hdfs-0.1.1.jar
85
+ - classpath/embulk-input-hdfs-0.1.4.jar
86
86
  - classpath/gson-2.2.4.jar
87
87
  - classpath/hadoop-annotations-2.6.0.jar
88
88
  - classpath/hadoop-auth-2.6.0.jar