embulk-input-hdfs 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,7 @@ import javax.annotation.Nullable;
30
30
 
31
31
  import java.io.File;
32
32
  import java.util.ArrayList;
33
+ import java.util.Iterator;
33
34
  import java.util.List;
34
35
 
35
36
  import static org.junit.Assert.assertEquals;
@@ -69,6 +70,8 @@ public class TestHdfsFileInputPlugin
69
70
  assertEquals(true, task.getPartition());
70
71
  assertEquals(0, task.getRewindSeconds());
71
72
  assertEquals(-1, task.getApproximateNumPartitions());
73
+ assertEquals(0, task.getSkipHeaderLines());
74
+ assertEquals(false, task.getDecompression());
72
75
  }
73
76
 
74
77
  @Test(expected = ConfigException.class)
@@ -99,17 +102,14 @@ public class TestHdfsFileInputPlugin
99
102
  }
100
103
  });
101
104
 
102
- List<String> resultFList = Lists.transform(task.getFiles(), new Function<HdfsPartialFile, String>()
103
- {
104
- @Nullable
105
- @Override
106
- public String apply(@Nullable HdfsPartialFile input)
107
- {
108
- assert input != null;
109
- return input.getPath();
105
+ List<String> resultFList = Lists.newArrayList();
106
+ for (int i = 0; i < task.getPartialFileList().getTaskCount();i++) {
107
+ for (PartialFile partialFile : task.getPartialFileList().get(i)) {
108
+ resultFList.add(partialFile.getPath().toString());
110
109
  }
111
- });
112
- assertEquals(fileList, resultFList);
110
+ }
111
+ assertEquals(fileList.size(), resultFList.size());
112
+ assert fileList.containsAll(resultFList);
113
113
  return emptyTaskReports(taskCount);
114
114
  }
115
115
  });
@@ -120,8 +120,9 @@ public class TestHdfsFileInputPlugin
120
120
  {
121
121
  ConfigSource config = getConfigWithDefaultValues();
122
122
  config.set("num_partitions", 10);
123
+ config.set("decompression", true);
123
124
  runner.transaction(config, new Control());
124
- assertRecords(config, output);
125
+ assertRecords(config, output, 12);
125
126
  }
126
127
 
127
128
  @Test
@@ -129,8 +130,31 @@ public class TestHdfsFileInputPlugin
129
130
  {
130
131
  ConfigSource config = getConfigWithDefaultValues();
131
132
  config.set("partition", false);
133
+ config.set("decompression", true);
134
+ runner.transaction(config, new Control());
135
+ assertRecords(config, output, 12);
136
+ }
137
+
138
+ @Test
139
+ public void testHdfsFileInputByOpenWithoutCompressionCodec()
140
+ {
141
+ ConfigSource config = getConfigWithDefaultValues();
142
+ config.set("partition", false);
143
+ config.set("path", getClass().getResource("/sample_01.csv").getPath());
132
144
  runner.transaction(config, new Control());
133
- assertRecords(config, output);
145
+ assertRecords(config, output, 4);
146
+ }
147
+
148
+ @Test
149
+ public void testStrftime()
150
+ {
151
+ ConfigSource config = getConfigWithDefaultValues();
152
+ config.set("path", "/tmp/%Y-%m-%d");
153
+ config.set("rewind_seconds", 86400);
154
+ PluginTask task = config.loadConfig(PluginTask.class);
155
+ String result = plugin.strftime(task.getJRuby(), task.getPath(), task.getRewindSeconds());
156
+ String expected = task.getJRuby().runScriptlet("(Time.now - 86400).strftime('/tmp/%Y-%m-%d')").toString();
157
+ assertEquals(expected, result);
134
158
  }
135
159
 
136
160
  private class Control
@@ -201,10 +225,10 @@ public class TestHdfsFileInputPlugin
201
225
  return builder.build();
202
226
  }
203
227
 
204
- private void assertRecords(ConfigSource config, MockPageOutput output)
228
+ private void assertRecords(ConfigSource config, MockPageOutput output, long size)
205
229
  {
206
230
  List<Object[]> records = getRecords(config, output);
207
- assertEquals(8, records.size());
231
+ assertEquals(size, records.size());
208
232
  {
209
233
  Object[] record = records.get(0);
210
234
  assertEquals(1L, record[0]);
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-hdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-08 00:00:00.000000000 Z
11
+ date: 2016-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -47,6 +47,7 @@ extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
49
  - .travis.yml
50
+ - CHENGELOG.md
50
51
  - LICENSE.txt
51
52
  - README.md
52
53
  - build.gradle
@@ -54,18 +55,22 @@ files:
54
55
  - config/checkstyle/default.xml
55
56
  - example/config.yml
56
57
  - example/data.csv
58
+ - example/data2.csv.gz
57
59
  - gradle/wrapper/gradle-wrapper.jar
58
60
  - gradle/wrapper/gradle-wrapper.properties
59
61
  - gradlew
60
62
  - gradlew.bat
61
63
  - lib/embulk/input/hdfs.rb
64
+ - src/main/java/org/embulk/input/hdfs/ConfigurationBuilder.java
62
65
  - src/main/java/org/embulk/input/hdfs/HdfsFileInputPlugin.java
63
- - src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java
64
- - src/main/java/org/embulk/input/hdfs/HdfsPartialFile.java
65
- - src/main/java/org/embulk/input/hdfs/HdfsPartialFileInputStream.java
66
+ - src/main/java/org/embulk/input/hdfs/PartialFile.java
67
+ - src/main/java/org/embulk/input/hdfs/PartialFileInputStream.java
68
+ - src/main/java/org/embulk/input/hdfs/PartialFileInputStreamBuilder.java
69
+ - src/main/java/org/embulk/input/hdfs/PartialFileList.java
66
70
  - src/test/java/org/embulk/input/hdfs/TestHdfsFileInputPlugin.java
67
71
  - src/test/resources/sample_01.csv
68
72
  - src/test/resources/sample_02.csv
73
+ - src/test/resources/sample_03.csv.gz
69
74
  - classpath/activation-1.1.jar
70
75
  - classpath/apacheds-i18n-2.0.0-M15.jar
71
76
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
@@ -89,23 +94,23 @@ files:
89
94
  - classpath/curator-client-2.6.0.jar
90
95
  - classpath/curator-framework-2.6.0.jar
91
96
  - classpath/curator-recipes-2.6.0.jar
92
- - classpath/embulk-input-hdfs-0.1.9.jar
97
+ - classpath/embulk-input-hdfs-0.2.1.jar
93
98
  - classpath/gson-2.2.4.jar
94
- - classpath/hadoop-annotations-2.6.3.jar
95
- - classpath/hadoop-auth-2.6.3.jar
96
- - classpath/hadoop-client-2.6.3.jar
97
- - classpath/hadoop-common-2.6.3.jar
98
- - classpath/hadoop-hdfs-2.6.3.jar
99
- - classpath/hadoop-mapreduce-client-app-2.6.3.jar
100
- - classpath/hadoop-mapreduce-client-common-2.6.3.jar
101
- - classpath/hadoop-mapreduce-client-core-2.6.3.jar
102
- - classpath/hadoop-mapreduce-client-jobclient-2.6.3.jar
103
- - classpath/hadoop-mapreduce-client-shuffle-2.6.3.jar
104
- - classpath/hadoop-yarn-api-2.6.3.jar
105
- - classpath/hadoop-yarn-client-2.6.3.jar
106
- - classpath/hadoop-yarn-common-2.6.3.jar
107
- - classpath/hadoop-yarn-server-common-2.6.3.jar
108
- - classpath/hadoop-yarn-server-nodemanager-2.6.3.jar
99
+ - classpath/hadoop-annotations-2.6.4.jar
100
+ - classpath/hadoop-auth-2.6.4.jar
101
+ - classpath/hadoop-client-2.6.4.jar
102
+ - classpath/hadoop-common-2.6.4.jar
103
+ - classpath/hadoop-hdfs-2.6.4.jar
104
+ - classpath/hadoop-mapreduce-client-app-2.6.4.jar
105
+ - classpath/hadoop-mapreduce-client-common-2.6.4.jar
106
+ - classpath/hadoop-mapreduce-client-core-2.6.4.jar
107
+ - classpath/hadoop-mapreduce-client-jobclient-2.6.4.jar
108
+ - classpath/hadoop-mapreduce-client-shuffle-2.6.4.jar
109
+ - classpath/hadoop-yarn-api-2.6.4.jar
110
+ - classpath/hadoop-yarn-client-2.6.4.jar
111
+ - classpath/hadoop-yarn-common-2.6.4.jar
112
+ - classpath/hadoop-yarn-server-common-2.6.4.jar
113
+ - classpath/hadoop-yarn-server-nodemanager-2.6.4.jar
109
114
  - classpath/htrace-core-3.0.4.jar
110
115
  - classpath/httpclient-4.2.5.jar
111
116
  - classpath/httpcore-4.2.4.jar
@@ -1,40 +0,0 @@
1
- package org.embulk.input.hdfs;
2
-
3
- import org.apache.hadoop.fs.FileSystem;
4
- import org.apache.hadoop.fs.Path;
5
-
6
- import java.io.IOException;
7
- import java.util.ArrayList;
8
- import java.util.List;
9
-
10
- /**
11
- * Created by takahiro.nakayama on 8/20/15.
12
- */
13
- public class HdfsFilePartitioner
14
- {
15
- private FileSystem fs;
16
- private Path path;
17
- private long numPartitions;
18
-
19
- public HdfsFilePartitioner(FileSystem fs, Path path, long numPartitions)
20
- {
21
- this.fs = fs;
22
- this.path = path;
23
- this.numPartitions = numPartitions;
24
- }
25
-
26
- public List<HdfsPartialFile> getHdfsPartialFiles()
27
- throws IOException
28
- {
29
- List<HdfsPartialFile> hdfsPartialFiles = new ArrayList<>();
30
- long size = fs.getFileStatus(path).getLen();
31
- for (int i = 0; i < numPartitions; i++) {
32
- long start = size * i / numPartitions;
33
- long end = size * (i + 1) / numPartitions;
34
- if (start < end) {
35
- hdfsPartialFiles.add(new HdfsPartialFile(path.toString(), start, end));
36
- }
37
- }
38
- return hdfsPartialFiles;
39
- }
40
- }
@@ -1,39 +0,0 @@
1
- package org.embulk.input.hdfs;
2
-
3
- /**
4
- * Created by takahiro.nakayama on 8/20/15.
5
- */
6
- // ref. https://github.com/hito4t/embulk-input-filesplit/blob/master/src/main/java/org/embulk/input/filesplit/PartialFile.java
7
- public class HdfsPartialFile
8
- {
9
- private String path;
10
- private long start;
11
- private long end;
12
-
13
- public HdfsPartialFile(String path, long start, long end)
14
- {
15
- this.path = path;
16
- this.start = start;
17
- this.end = end;
18
- }
19
-
20
- // see: http://stackoverflow.com/questions/7625783/jsonmappingexception-no-suitable-constructor-found-for-type-simple-type-class
21
- public HdfsPartialFile()
22
- {
23
- }
24
-
25
- public String getPath()
26
- {
27
- return path;
28
- }
29
-
30
- public long getStart()
31
- {
32
- return start;
33
- }
34
-
35
- public long getEnd()
36
- {
37
- return end;
38
- }
39
- }