embulk-input-filesplit 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/classpath/{embulk-input-filesplit-0.1.1.jar → embulk-input-filesplit-0.1.2.jar} +0 -0
- data/src/main/java/org/embulk/input/filesplit/LocalFileSplitInputPlugin.java +37 -37
- data/src/main/java/org/embulk/input/filesplit/PartialFile.java +49 -52
- data/src/main/java/org/embulk/input/filesplit/PartialFileInputStream.java +154 -154
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db346109428797d7798e50be35a6d8b24b016e84
|
4
|
+
data.tar.gz: e3c3206c03686b1ba05d3ddcd955768451f871c4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b4df6195347a2eae091b9e783e1c554244885d408f7e89706a27129f7afc18ad76a031bb1155b17818d81b8399c2286cc02d6772ff137c59df09bc30d3331d81
|
7
|
+
data.tar.gz: 49a1b4306e12a38532ef48dbe3c51094a970b9869e2d9510ab79c8fb8fdbdc35fd1b88df963b0a929a49dea546ce78f0a9313e78da2d894041b77fb8491264d4
|
data/build.gradle
CHANGED
Binary file
|
@@ -31,7 +31,7 @@ import com.google.common.base.Optional;
|
|
31
31
|
public class LocalFileSplitInputPlugin
|
32
32
|
implements FileInputPlugin
|
33
33
|
{
|
34
|
-
|
34
|
+
public interface PluginTask
|
35
35
|
extends Task
|
36
36
|
{
|
37
37
|
@Config("path")
|
@@ -59,22 +59,22 @@ public class LocalFileSplitInputPlugin
|
|
59
59
|
|
60
60
|
int tasks;
|
61
61
|
if (task.getTasks().isPresent()) {
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
62
|
+
tasks = task.getTasks().get();
|
63
|
+
if (tasks <= 0) {
|
64
|
+
throw new IllegalArgumentException(String.format("'tasks' is %d but must be greater than 0", tasks));
|
65
|
+
}
|
66
66
|
} else {
|
67
|
-
|
67
|
+
tasks = Runtime.getRuntime().availableProcessors() * 2;
|
68
68
|
}
|
69
69
|
|
70
70
|
long size = new File(task.getPath()).length();
|
71
71
|
List<PartialFile> files = new ArrayList<PartialFile>();
|
72
72
|
for (int i = 0; i < tasks; i++) {
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
73
|
+
long start = size * i / tasks;
|
74
|
+
long end = size * (i + 1) / tasks;
|
75
|
+
if (start < end) {
|
76
|
+
files.add(new PartialFile(task.getPath(), start, end));
|
77
|
+
}
|
78
78
|
}
|
79
79
|
|
80
80
|
task.setFiles(files);
|
@@ -132,7 +132,7 @@ public class LocalFileSplitInputPlugin
|
|
132
132
|
|
133
133
|
InputStream in = new PartialFileInputStream(new FileInputStream(file.getPath()), file.getStart(), file.getEnd());
|
134
134
|
if (file.getStart() > 0 && hasHeader) {
|
135
|
-
|
135
|
+
in = new SequenceInputStream(openHeader(file.getPath()), in);
|
136
136
|
}
|
137
137
|
return in;
|
138
138
|
}
|
@@ -142,31 +142,31 @@ public class LocalFileSplitInputPlugin
|
|
142
142
|
|
143
143
|
private InputStream openHeader(String path) throws IOException
|
144
144
|
{
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
145
|
+
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
146
|
+
try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(path))) {
|
147
|
+
while (true) {
|
148
|
+
int c = in.read();
|
149
|
+
if (c < 0) {
|
150
|
+
break;
|
151
|
+
}
|
152
|
+
|
153
|
+
header.write(c);
|
154
|
+
|
155
|
+
if (c == '\n') {
|
156
|
+
break;
|
157
|
+
}
|
158
|
+
|
159
|
+
if (c == '\r') {
|
160
|
+
int c2 = in.read();
|
161
|
+
if (c2 == '\n') {
|
162
|
+
header.write(c2);
|
163
|
+
}
|
164
|
+
break;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
header.close();
|
169
|
+
return new ByteArrayInputStream(header.toByteArray());
|
170
170
|
}
|
171
171
|
}
|
172
172
|
|
@@ -1,53 +1,50 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
{
|
51
|
-
this.end = end;
|
52
|
-
}
|
1
|
+
package org.embulk.input.filesplit;
|
2
|
+
|
3
|
+
public class PartialFile
|
4
|
+
{
|
5
|
+
private String path;
|
6
|
+
private long start;
|
7
|
+
private long end;
|
8
|
+
|
9
|
+
|
10
|
+
public PartialFile(String path, long start, long end)
|
11
|
+
{
|
12
|
+
this.path = path;
|
13
|
+
this.start = start;
|
14
|
+
this.end = end;
|
15
|
+
}
|
16
|
+
|
17
|
+
public PartialFile() {
|
18
|
+
}
|
19
|
+
|
20
|
+
public String getPath()
|
21
|
+
{
|
22
|
+
return path;
|
23
|
+
}
|
24
|
+
|
25
|
+
public void setPath(String path)
|
26
|
+
{
|
27
|
+
this.path = path;
|
28
|
+
}
|
29
|
+
|
30
|
+
|
31
|
+
public long getStart()
|
32
|
+
{
|
33
|
+
return start;
|
34
|
+
}
|
35
|
+
|
36
|
+
public void setStart(long start)
|
37
|
+
{
|
38
|
+
this.start = start;
|
39
|
+
}
|
40
|
+
|
41
|
+
public long getEnd()
|
42
|
+
{
|
43
|
+
return end;
|
44
|
+
}
|
45
|
+
|
46
|
+
public void setEnd(long end)
|
47
|
+
{
|
48
|
+
this.end = end;
|
49
|
+
}
|
53
50
|
}
|
@@ -1,154 +1,154 @@
|
|
1
|
-
package org.embulk.input.filesplit;
|
2
|
-
|
3
|
-
import java.io.BufferedInputStream;
|
4
|
-
import java.io.IOException;
|
5
|
-
import java.io.InputStream;
|
6
|
-
import java.io.PushbackInputStream;
|
7
|
-
|
8
|
-
|
9
|
-
public class PartialFileInputStream extends InputStream
|
10
|
-
{
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
}
|
1
|
+
package org.embulk.input.filesplit;
|
2
|
+
|
3
|
+
import java.io.BufferedInputStream;
|
4
|
+
import java.io.IOException;
|
5
|
+
import java.io.InputStream;
|
6
|
+
import java.io.PushbackInputStream;
|
7
|
+
|
8
|
+
|
9
|
+
public class PartialFileInputStream extends InputStream
|
10
|
+
{
|
11
|
+
private final PushbackInputStream original;
|
12
|
+
private long start;
|
13
|
+
private long end;
|
14
|
+
private long current;
|
15
|
+
private boolean eof;
|
16
|
+
|
17
|
+
public PartialFileInputStream(InputStream original, long start, long end)
|
18
|
+
{
|
19
|
+
this.original = new PushbackInputStream(new BufferedInputStream(original));
|
20
|
+
this.start = start;
|
21
|
+
this.end = end;
|
22
|
+
current = -1;
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public int read(byte[] b) throws IOException
|
27
|
+
{
|
28
|
+
return read(b, 0, b.length);
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public int read(byte[] b, int off, int len) throws IOException
|
33
|
+
{
|
34
|
+
initializeIfNeeded();
|
35
|
+
|
36
|
+
if (eof) {
|
37
|
+
return -1;
|
38
|
+
}
|
39
|
+
|
40
|
+
int read = original.read(b, off, len);
|
41
|
+
if (read < 0) {
|
42
|
+
eof = true;
|
43
|
+
return -1;
|
44
|
+
}
|
45
|
+
|
46
|
+
current += read;
|
47
|
+
if (current >= end) {
|
48
|
+
for (int i = Math.max((int)(end - 1 - current + read), 0); i < read; i++) {
|
49
|
+
if (b[off + i] == '\n') {
|
50
|
+
eof = true;
|
51
|
+
return i + 1;
|
52
|
+
}
|
53
|
+
|
54
|
+
if (b[off + i] == '\r') {
|
55
|
+
int next = (i < read ? b[off + i + 1] : prefetch());
|
56
|
+
if (next != '\n') {
|
57
|
+
eof = true;
|
58
|
+
return i + 1;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
return read;
|
65
|
+
}
|
66
|
+
|
67
|
+
@Override
|
68
|
+
public int read() throws IOException
|
69
|
+
{
|
70
|
+
initializeIfNeeded();
|
71
|
+
|
72
|
+
if (eof) {
|
73
|
+
return -1;
|
74
|
+
}
|
75
|
+
|
76
|
+
int read = original.read();
|
77
|
+
current++;
|
78
|
+
|
79
|
+
if (read < 0) {
|
80
|
+
eof = true;
|
81
|
+
return -1;
|
82
|
+
}
|
83
|
+
|
84
|
+
if (current >= end) {
|
85
|
+
if (read == '\n' || read == '\r' && prefetch() != '\n') {
|
86
|
+
eof = true;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
return read;
|
91
|
+
}
|
92
|
+
|
93
|
+
@Override
|
94
|
+
public long skip(long n) throws IOException
|
95
|
+
{
|
96
|
+
throw new IOException("Skip not supported.");
|
97
|
+
/*
|
98
|
+
long skip = original.skip(n);
|
99
|
+
current += skip;
|
100
|
+
return skip;
|
101
|
+
*/
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public int available() throws IOException
|
106
|
+
{
|
107
|
+
return 0;
|
108
|
+
}
|
109
|
+
|
110
|
+
@Override
|
111
|
+
public void close() throws IOException
|
112
|
+
{
|
113
|
+
original.close();
|
114
|
+
}
|
115
|
+
|
116
|
+
private void initializeIfNeeded() throws IOException
|
117
|
+
{
|
118
|
+
if (current >= start) {
|
119
|
+
return;
|
120
|
+
|
121
|
+
}
|
122
|
+
if (start == 0) {
|
123
|
+
current = 0;
|
124
|
+
} else {
|
125
|
+
current = original.skip(--start);
|
126
|
+
if (current != start) {
|
127
|
+
throw new IOException("Cannot skip.");
|
128
|
+
}
|
129
|
+
|
130
|
+
int c;
|
131
|
+
while ((c = original.read()) >= 0) {
|
132
|
+
start++;
|
133
|
+
current++;
|
134
|
+
|
135
|
+
if (c == '\n' || c == '\r' && prefetch() != '\n') {
|
136
|
+
break;
|
137
|
+
}
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
if (start >= end) {
|
142
|
+
eof = true;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
private int prefetch() throws IOException
|
147
|
+
{
|
148
|
+
int c = original.read();
|
149
|
+
if (c >= 0) {
|
150
|
+
original.unread(c);
|
151
|
+
}
|
152
|
+
return c;
|
153
|
+
}
|
154
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-filesplit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hitoshi Tanaka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -36,7 +36,7 @@ files:
|
|
36
36
|
- src/test/resources/yml/test-only-header.yml
|
37
37
|
- src/test/resources/yml/test-tasks.yml
|
38
38
|
- src/test/resources/yml/test.yml
|
39
|
-
- classpath/embulk-input-filesplit-0.1.
|
39
|
+
- classpath/embulk-input-filesplit-0.1.2.jar
|
40
40
|
homepage: https://github.com/hito4t/embulk-input-filesplit
|
41
41
|
licenses:
|
42
42
|
- Apache 2.0
|