embulk-output-hdfs 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f9fa40ed9c13dbc656239643f3153f160f66ef08
4
- data.tar.gz: 61fb3a7a55c94873e58f7edc4ac5e6ae1cf337ae
3
+ metadata.gz: 90bae20ab751bea6d3807b44b252062d137a710d
4
+ data.tar.gz: 482d46a137ba2fad65988bd98ec88613a884e590
5
5
  SHA512:
6
- metadata.gz: e131f8221baaa36c20fcd8ee77b88cac8dde80bc20e1663f1d5dae3d54ed88aae08862a5113f928916a2f5ddd19321bbf7270d8cc43109336ffede61e2adc99f
7
- data.tar.gz: 0afec8392aeb2d109ebe9beb738d6b5e09f029b5cc04ab16b5b9bc83246a13f8c0a2fd86393da2d924cb46791d44a5f9d78d84ad5f06c60d061f67e61b260c64
6
+ metadata.gz: b23e3d09a38d4dd493e965bd3229e87ff89d43828ce48cd71c2b0ae996575d6a8a6fd9e404b6b075c3bb7719961d45af856dc6a21e778516d3783e06b4c92cd9
7
+ data.tar.gz: c5ad1e1f16d5c632a5352c25dc3e065945a40fcdac135aeb8cc8150332d4812cc262c2869fe9308337b246ffdf0bf08448d1e8f47f27c08eb542be529491515b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ 0.2.4 (2016-04-27)
2
+ ==================
3
+ - Enhancement: Avoid to create 0 byte files
4
+ - https://github.com/civitaspo/embulk-output-hdfs/pull/14
5
+
1
6
  0.2.3 (2016-04-20)
2
7
  ==================
3
8
  - Add: `delete_in_advance` option
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.2.3"
18
+ version = "0.2.4"
19
19
 
20
20
  sourceCompatibility = 1.7
21
21
  targetCompatibility = 1.7
@@ -0,0 +1,55 @@
1
+ hdfs_example: &hdfs_example
2
+ config_files:
3
+ - /etc/hadoop/conf/core-site.xml
4
+ - /etc/hadoop/conf/hdfs-site.xml
5
+ config:
6
+ fs.defaultFS: 'hdfs://hadoop-nn1:8020'
7
+ fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
8
+ fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
9
+
10
+ local_fs_example: &local_fs_example
11
+ config:
12
+ fs.defaultFS: 'file:///'
13
+ fs.hdfs.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
14
+ fs.file.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
15
+ io.compression.codecs: 'org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec'
16
+
17
+ exec:
18
+ min_output_tasks: 10
19
+
20
+ in:
21
+ type: file
22
+ path_prefix: example/data
23
+ parser:
24
+ charset: UTF-8
25
+ newline: CRLF
26
+ type: csv
27
+ delimiter: ','
28
+ quote: '"'
29
+ header_line: true
30
+ stop_on_invalid_record: true
31
+ columns:
32
+ - {name: id, type: long}
33
+ - {name: account, type: long}
34
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
35
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
36
+ - {name: comment, type: string}
37
+
38
+
39
+ out:
40
+ type: hdfs
41
+ <<: *local_fs_example
42
+ path_prefix: /tmp/embulk-output-hdfs_example/file_
43
+ file_ext: csv
44
+ delete_in_advance: FILE_ONLY
45
+ formatter:
46
+ type: csv
47
+ newline: CRLF
48
+ newline_in_field: LF
49
+ header_line: false
50
+ charset: UTF-8
51
+ quote_policy: NONE
52
+ quote: '"'
53
+ escape: '\'
54
+ null_string: ''
55
+ default_timezone: UTC
@@ -122,23 +122,14 @@ public class HdfsFileOutputPlugin
122
122
  {
123
123
  private final List<String> hdfsFileNames = new ArrayList<>();
124
124
  private int fileIndex = 0;
125
+ private Path currentPath = null;
125
126
  private OutputStream output = null;
126
127
 
127
128
  @Override
128
129
  public void nextFile()
129
130
  {
130
131
  closeCurrentStream();
131
- Path path = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
132
- try {
133
- FileSystem fs = getFs(task);
134
- output = fs.create(path, task.getOverwrite());
135
- logger.info("Uploading '{}'", path);
136
- }
137
- catch (IOException e) {
138
- logger.error(e.getMessage());
139
- throw new RuntimeException(e);
140
- }
141
- hdfsFileNames.add(path.toString());
132
+ currentPath = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
142
133
  fileIndex++;
143
134
  }
144
135
 
@@ -146,6 +137,13 @@ public class HdfsFileOutputPlugin
146
137
  public void add(Buffer buffer)
147
138
  {
148
139
  try {
140
+ // this implementation is for creating file when there is data.
141
+ if (output == null) {
142
+ FileSystem fs = getFs(task);
143
+ output = fs.create(currentPath, task.getOverwrite());
144
+ logger.info("Uploading '{}'", currentPath);
145
+ hdfsFileNames.add(currentPath.toString());
146
+ }
149
147
  output.write(buffer.array(), buffer.offset(), buffer.limit());
150
148
  }
151
149
  catch (IOException e) {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-hdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-20 00:00:00.000000000 Z
11
+ date: 2016-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -54,6 +54,7 @@ files:
54
54
  - config/checkstyle/checkstyle.xml
55
55
  - config/checkstyle/default.xml
56
56
  - example/config.yml
57
+ - example/config_avoid_create_0byte_file.yml
57
58
  - example/data.csv
58
59
  - gradle/wrapper/gradle-wrapper.jar
59
60
  - gradle/wrapper/gradle-wrapper.properties
@@ -85,7 +86,7 @@ files:
85
86
  - classpath/curator-client-2.6.0.jar
86
87
  - classpath/curator-framework-2.6.0.jar
87
88
  - classpath/curator-recipes-2.6.0.jar
88
- - classpath/embulk-output-hdfs-0.2.3.jar
89
+ - classpath/embulk-output-hdfs-0.2.4.jar
89
90
  - classpath/gson-2.2.4.jar
90
91
  - classpath/hadoop-annotations-2.6.0.jar
91
92
  - classpath/hadoop-auth-2.6.0.jar