embulk-output-hdfs 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/build.gradle +1 -1
- data/example/config_avoid_create_0byte_file.yml +55 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +9 -11
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90bae20ab751bea6d3807b44b252062d137a710d
|
4
|
+
data.tar.gz: 482d46a137ba2fad65988bd98ec88613a884e590
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b23e3d09a38d4dd493e965bd3229e87ff89d43828ce48cd71c2b0ae996575d6a8a6fd9e404b6b075c3bb7719961d45af856dc6a21e778516d3783e06b4c92cd9
|
7
|
+
data.tar.gz: c5ad1e1f16d5c632a5352c25dc3e065945a40fcdac135aeb8cc8150332d4812cc262c2869fe9308337b246ffdf0bf08448d1e8f47f27c08eb542be529491515b
|
data/CHANGELOG.md
CHANGED
data/build.gradle
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
hdfs_example: &hdfs_example
|
2
|
+
config_files:
|
3
|
+
- /etc/hadoop/conf/core-site.xml
|
4
|
+
- /etc/hadoop/conf/hdfs-site.xml
|
5
|
+
config:
|
6
|
+
fs.defaultFS: 'hdfs://hadoop-nn1:8020'
|
7
|
+
fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
|
8
|
+
fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
|
9
|
+
|
10
|
+
local_fs_example: &local_fs_example
|
11
|
+
config:
|
12
|
+
fs.defaultFS: 'file:///'
|
13
|
+
fs.hdfs.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
|
14
|
+
fs.file.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
|
15
|
+
io.compression.codecs: 'org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec'
|
16
|
+
|
17
|
+
exec:
|
18
|
+
min_output_tasks: 10
|
19
|
+
|
20
|
+
in:
|
21
|
+
type: file
|
22
|
+
path_prefix: example/data
|
23
|
+
parser:
|
24
|
+
charset: UTF-8
|
25
|
+
newline: CRLF
|
26
|
+
type: csv
|
27
|
+
delimiter: ','
|
28
|
+
quote: '"'
|
29
|
+
header_line: true
|
30
|
+
stop_on_invalid_record: true
|
31
|
+
columns:
|
32
|
+
- {name: id, type: long}
|
33
|
+
- {name: account, type: long}
|
34
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
35
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
36
|
+
- {name: comment, type: string}
|
37
|
+
|
38
|
+
|
39
|
+
out:
|
40
|
+
type: hdfs
|
41
|
+
<<: *local_fs_example
|
42
|
+
path_prefix: /tmp/embulk-output-hdfs_example/file_
|
43
|
+
file_ext: csv
|
44
|
+
delete_in_advance: FILE_ONLY
|
45
|
+
formatter:
|
46
|
+
type: csv
|
47
|
+
newline: CRLF
|
48
|
+
newline_in_field: LF
|
49
|
+
header_line: false
|
50
|
+
charset: UTF-8
|
51
|
+
quote_policy: NONE
|
52
|
+
quote: '"'
|
53
|
+
escape: '\'
|
54
|
+
null_string: ''
|
55
|
+
default_timezone: UTC
|
@@ -122,23 +122,14 @@ public class HdfsFileOutputPlugin
|
|
122
122
|
{
|
123
123
|
private final List<String> hdfsFileNames = new ArrayList<>();
|
124
124
|
private int fileIndex = 0;
|
125
|
+
private Path currentPath = null;
|
125
126
|
private OutputStream output = null;
|
126
127
|
|
127
128
|
@Override
|
128
129
|
public void nextFile()
|
129
130
|
{
|
130
131
|
closeCurrentStream();
|
131
|
-
|
132
|
-
try {
|
133
|
-
FileSystem fs = getFs(task);
|
134
|
-
output = fs.create(path, task.getOverwrite());
|
135
|
-
logger.info("Uploading '{}'", path);
|
136
|
-
}
|
137
|
-
catch (IOException e) {
|
138
|
-
logger.error(e.getMessage());
|
139
|
-
throw new RuntimeException(e);
|
140
|
-
}
|
141
|
-
hdfsFileNames.add(path.toString());
|
132
|
+
currentPath = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
|
142
133
|
fileIndex++;
|
143
134
|
}
|
144
135
|
|
@@ -146,6 +137,13 @@ public class HdfsFileOutputPlugin
|
|
146
137
|
public void add(Buffer buffer)
|
147
138
|
{
|
148
139
|
try {
|
140
|
+
// this implementation is for creating file when there is data.
|
141
|
+
if (output == null) {
|
142
|
+
FileSystem fs = getFs(task);
|
143
|
+
output = fs.create(currentPath, task.getOverwrite());
|
144
|
+
logger.info("Uploading '{}'", currentPath);
|
145
|
+
hdfsFileNames.add(currentPath.toString());
|
146
|
+
}
|
149
147
|
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
150
148
|
}
|
151
149
|
catch (IOException e) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-hdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- config/checkstyle/checkstyle.xml
|
55
55
|
- config/checkstyle/default.xml
|
56
56
|
- example/config.yml
|
57
|
+
- example/config_avoid_create_0byte_file.yml
|
57
58
|
- example/data.csv
|
58
59
|
- gradle/wrapper/gradle-wrapper.jar
|
59
60
|
- gradle/wrapper/gradle-wrapper.properties
|
@@ -85,7 +86,7 @@ files:
|
|
85
86
|
- classpath/curator-client-2.6.0.jar
|
86
87
|
- classpath/curator-framework-2.6.0.jar
|
87
88
|
- classpath/curator-recipes-2.6.0.jar
|
88
|
-
- classpath/embulk-output-hdfs-0.2.
|
89
|
+
- classpath/embulk-output-hdfs-0.2.4.jar
|
89
90
|
- classpath/gson-2.2.4.jar
|
90
91
|
- classpath/hadoop-annotations-2.6.0.jar
|
91
92
|
- classpath/hadoop-auth-2.6.0.jar
|