embulk-output-hdfs 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f9fa40ed9c13dbc656239643f3153f160f66ef08
4
- data.tar.gz: 61fb3a7a55c94873e58f7edc4ac5e6ae1cf337ae
3
+ metadata.gz: 90bae20ab751bea6d3807b44b252062d137a710d
4
+ data.tar.gz: 482d46a137ba2fad65988bd98ec88613a884e590
5
5
  SHA512:
6
- metadata.gz: e131f8221baaa36c20fcd8ee77b88cac8dde80bc20e1663f1d5dae3d54ed88aae08862a5113f928916a2f5ddd19321bbf7270d8cc43109336ffede61e2adc99f
7
- data.tar.gz: 0afec8392aeb2d109ebe9beb738d6b5e09f029b5cc04ab16b5b9bc83246a13f8c0a2fd86393da2d924cb46791d44a5f9d78d84ad5f06c60d061f67e61b260c64
6
+ metadata.gz: b23e3d09a38d4dd493e965bd3229e87ff89d43828ce48cd71c2b0ae996575d6a8a6fd9e404b6b075c3bb7719961d45af856dc6a21e778516d3783e06b4c92cd9
7
+ data.tar.gz: c5ad1e1f16d5c632a5352c25dc3e065945a40fcdac135aeb8cc8150332d4812cc262c2869fe9308337b246ffdf0bf08448d1e8f47f27c08eb542be529491515b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ 0.2.4 (2016-04-27)
2
+ ==================
3
+ - Enhancement: Avoid to create 0 byte files
4
+ - https://github.com/civitaspo/embulk-output-hdfs/pull/14
5
+
1
6
  0.2.3 (2016-04-20)
2
7
  ==================
3
8
  - Add: `delete_in_advance` option
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.2.3"
18
+ version = "0.2.4"
19
19
 
20
20
  sourceCompatibility = 1.7
21
21
  targetCompatibility = 1.7
@@ -0,0 +1,55 @@
1
+ hdfs_example: &hdfs_example
2
+ config_files:
3
+ - /etc/hadoop/conf/core-site.xml
4
+ - /etc/hadoop/conf/hdfs-site.xml
5
+ config:
6
+ fs.defaultFS: 'hdfs://hadoop-nn1:8020'
7
+ fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
8
+ fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
9
+
10
+ local_fs_example: &local_fs_example
11
+ config:
12
+ fs.defaultFS: 'file:///'
13
+ fs.hdfs.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
14
+ fs.file.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
15
+ io.compression.codecs: 'org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec'
16
+
17
+ exec:
18
+ min_output_tasks: 10
19
+
20
+ in:
21
+ type: file
22
+ path_prefix: example/data
23
+ parser:
24
+ charset: UTF-8
25
+ newline: CRLF
26
+ type: csv
27
+ delimiter: ','
28
+ quote: '"'
29
+ header_line: true
30
+ stop_on_invalid_record: true
31
+ columns:
32
+ - {name: id, type: long}
33
+ - {name: account, type: long}
34
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
35
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
36
+ - {name: comment, type: string}
37
+
38
+
39
+ out:
40
+ type: hdfs
41
+ <<: *local_fs_example
42
+ path_prefix: /tmp/embulk-output-hdfs_example/file_
43
+ file_ext: csv
44
+ delete_in_advance: FILE_ONLY
45
+ formatter:
46
+ type: csv
47
+ newline: CRLF
48
+ newline_in_field: LF
49
+ header_line: false
50
+ charset: UTF-8
51
+ quote_policy: NONE
52
+ quote: '"'
53
+ escape: '\'
54
+ null_string: ''
55
+ default_timezone: UTC
@@ -122,23 +122,14 @@ public class HdfsFileOutputPlugin
122
122
  {
123
123
  private final List<String> hdfsFileNames = new ArrayList<>();
124
124
  private int fileIndex = 0;
125
+ private Path currentPath = null;
125
126
  private OutputStream output = null;
126
127
 
127
128
  @Override
128
129
  public void nextFile()
129
130
  {
130
131
  closeCurrentStream();
131
- Path path = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
132
- try {
133
- FileSystem fs = getFs(task);
134
- output = fs.create(path, task.getOverwrite());
135
- logger.info("Uploading '{}'", path);
136
- }
137
- catch (IOException e) {
138
- logger.error(e.getMessage());
139
- throw new RuntimeException(e);
140
- }
141
- hdfsFileNames.add(path.toString());
132
+ currentPath = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
142
133
  fileIndex++;
143
134
  }
144
135
 
@@ -146,6 +137,13 @@ public class HdfsFileOutputPlugin
146
137
  public void add(Buffer buffer)
147
138
  {
148
139
  try {
140
+ // this implementation is for creating file when there is data.
141
+ if (output == null) {
142
+ FileSystem fs = getFs(task);
143
+ output = fs.create(currentPath, task.getOverwrite());
144
+ logger.info("Uploading '{}'", currentPath);
145
+ hdfsFileNames.add(currentPath.toString());
146
+ }
149
147
  output.write(buffer.array(), buffer.offset(), buffer.limit());
150
148
  }
151
149
  catch (IOException e) {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-hdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-20 00:00:00.000000000 Z
11
+ date: 2016-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -54,6 +54,7 @@ files:
54
54
  - config/checkstyle/checkstyle.xml
55
55
  - config/checkstyle/default.xml
56
56
  - example/config.yml
57
+ - example/config_avoid_create_0byte_file.yml
57
58
  - example/data.csv
58
59
  - gradle/wrapper/gradle-wrapper.jar
59
60
  - gradle/wrapper/gradle-wrapper.properties
@@ -85,7 +86,7 @@ files:
85
86
  - classpath/curator-client-2.6.0.jar
86
87
  - classpath/curator-framework-2.6.0.jar
87
88
  - classpath/curator-recipes-2.6.0.jar
88
- - classpath/embulk-output-hdfs-0.2.3.jar
89
+ - classpath/embulk-output-hdfs-0.2.4.jar
89
90
  - classpath/gson-2.2.4.jar
90
91
  - classpath/hadoop-annotations-2.6.0.jar
91
92
  - classpath/hadoop-auth-2.6.0.jar