embulk-output-hdfs 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/build.gradle +1 -1
- data/example/config_avoid_create_0byte_file.yml +55 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +9 -11
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90bae20ab751bea6d3807b44b252062d137a710d
|
4
|
+
data.tar.gz: 482d46a137ba2fad65988bd98ec88613a884e590
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b23e3d09a38d4dd493e965bd3229e87ff89d43828ce48cd71c2b0ae996575d6a8a6fd9e404b6b075c3bb7719961d45af856dc6a21e778516d3783e06b4c92cd9
|
7
|
+
data.tar.gz: c5ad1e1f16d5c632a5352c25dc3e065945a40fcdac135aeb8cc8150332d4812cc262c2869fe9308337b246ffdf0bf08448d1e8f47f27c08eb542be529491515b
|
data/CHANGELOG.md
CHANGED
data/build.gradle
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
hdfs_example: &hdfs_example
|
2
|
+
config_files:
|
3
|
+
- /etc/hadoop/conf/core-site.xml
|
4
|
+
- /etc/hadoop/conf/hdfs-site.xml
|
5
|
+
config:
|
6
|
+
fs.defaultFS: 'hdfs://hadoop-nn1:8020'
|
7
|
+
fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
|
8
|
+
fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
|
9
|
+
|
10
|
+
local_fs_example: &local_fs_example
|
11
|
+
config:
|
12
|
+
fs.defaultFS: 'file:///'
|
13
|
+
fs.hdfs.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
|
14
|
+
fs.file.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
|
15
|
+
io.compression.codecs: 'org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec'
|
16
|
+
|
17
|
+
exec:
|
18
|
+
min_output_tasks: 10
|
19
|
+
|
20
|
+
in:
|
21
|
+
type: file
|
22
|
+
path_prefix: example/data
|
23
|
+
parser:
|
24
|
+
charset: UTF-8
|
25
|
+
newline: CRLF
|
26
|
+
type: csv
|
27
|
+
delimiter: ','
|
28
|
+
quote: '"'
|
29
|
+
header_line: true
|
30
|
+
stop_on_invalid_record: true
|
31
|
+
columns:
|
32
|
+
- {name: id, type: long}
|
33
|
+
- {name: account, type: long}
|
34
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
35
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
36
|
+
- {name: comment, type: string}
|
37
|
+
|
38
|
+
|
39
|
+
out:
|
40
|
+
type: hdfs
|
41
|
+
<<: *local_fs_example
|
42
|
+
path_prefix: /tmp/embulk-output-hdfs_example/file_
|
43
|
+
file_ext: csv
|
44
|
+
delete_in_advance: FILE_ONLY
|
45
|
+
formatter:
|
46
|
+
type: csv
|
47
|
+
newline: CRLF
|
48
|
+
newline_in_field: LF
|
49
|
+
header_line: false
|
50
|
+
charset: UTF-8
|
51
|
+
quote_policy: NONE
|
52
|
+
quote: '"'
|
53
|
+
escape: '\'
|
54
|
+
null_string: ''
|
55
|
+
default_timezone: UTC
|
@@ -122,23 +122,14 @@ public class HdfsFileOutputPlugin
|
|
122
122
|
{
|
123
123
|
private final List<String> hdfsFileNames = new ArrayList<>();
|
124
124
|
private int fileIndex = 0;
|
125
|
+
private Path currentPath = null;
|
125
126
|
private OutputStream output = null;
|
126
127
|
|
127
128
|
@Override
|
128
129
|
public void nextFile()
|
129
130
|
{
|
130
131
|
closeCurrentStream();
|
131
|
-
|
132
|
-
try {
|
133
|
-
FileSystem fs = getFs(task);
|
134
|
-
output = fs.create(path, task.getOverwrite());
|
135
|
-
logger.info("Uploading '{}'", path);
|
136
|
-
}
|
137
|
-
catch (IOException e) {
|
138
|
-
logger.error(e.getMessage());
|
139
|
-
throw new RuntimeException(e);
|
140
|
-
}
|
141
|
-
hdfsFileNames.add(path.toString());
|
132
|
+
currentPath = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
|
142
133
|
fileIndex++;
|
143
134
|
}
|
144
135
|
|
@@ -146,6 +137,13 @@ public class HdfsFileOutputPlugin
|
|
146
137
|
public void add(Buffer buffer)
|
147
138
|
{
|
148
139
|
try {
|
140
|
+
// this implementation is for creating file when there is data.
|
141
|
+
if (output == null) {
|
142
|
+
FileSystem fs = getFs(task);
|
143
|
+
output = fs.create(currentPath, task.getOverwrite());
|
144
|
+
logger.info("Uploading '{}'", currentPath);
|
145
|
+
hdfsFileNames.add(currentPath.toString());
|
146
|
+
}
|
149
147
|
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
150
148
|
}
|
151
149
|
catch (IOException e) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-hdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- config/checkstyle/checkstyle.xml
|
55
55
|
- config/checkstyle/default.xml
|
56
56
|
- example/config.yml
|
57
|
+
- example/config_avoid_create_0byte_file.yml
|
57
58
|
- example/data.csv
|
58
59
|
- gradle/wrapper/gradle-wrapper.jar
|
59
60
|
- gradle/wrapper/gradle-wrapper.properties
|
@@ -85,7 +86,7 @@ files:
|
|
85
86
|
- classpath/curator-client-2.6.0.jar
|
86
87
|
- classpath/curator-framework-2.6.0.jar
|
87
88
|
- classpath/curator-recipes-2.6.0.jar
|
88
|
-
- classpath/embulk-output-hdfs-0.2.
|
89
|
+
- classpath/embulk-output-hdfs-0.2.4.jar
|
89
90
|
- classpath/gson-2.2.4.jar
|
90
91
|
- classpath/hadoop-annotations-2.6.0.jar
|
91
92
|
- classpath/hadoop-auth-2.6.0.jar
|