embulk 0.6.14 → 0.6.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +16 -3
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +21 -8
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +12 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +9 -9
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +2 -3
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +2 -4
- data/embulk-docs/src/built-in.rst +9 -9
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.15.rst +26 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +1 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +0 -7
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +1 -1
- data/lib/embulk/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48e00af06c823ff60ec39a9d020896c6b16442a5
|
4
|
+
data.tar.gz: 711922cebca874837dce9d100cbee5d52e5c833f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 714466b1fbd8794ace91fcc3b074dac84b2c72d5504e9751da9f04045f86861c653b0cd9088e443b3b53fda56b400ba7bb8e1d1a75ca8e0c2e2ba2dcbd66e745
|
7
|
+
data.tar.gz: 7ffa66ba254cd40e2cb4427d356648eec8e61e194c00d94b2f93817f22bb70d182154fe88ce0d38c7088507b8a4c0c445e45f6ab8da37627a8158113824921a5
|
data/build.gradle
CHANGED
@@ -91,7 +91,7 @@ public class PreviewExecutor
|
|
91
91
|
|
92
92
|
try {
|
93
93
|
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
94
|
-
public List<CommitReport> run(final TaskSource inputTask, Schema inputSchema, int taskCount)
|
94
|
+
public List<CommitReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount)
|
95
95
|
{
|
96
96
|
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
97
97
|
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
@@ -103,8 +103,16 @@ public class PreviewExecutor
|
|
103
103
|
|
104
104
|
PageOutput out = new SamplingPageOutput(task.getSampleRows(), outputSchema);
|
105
105
|
try {
|
106
|
-
|
107
|
-
|
106
|
+
for (int taskIndex=0; taskIndex < taskCount; taskIndex++) {
|
107
|
+
try {
|
108
|
+
out = Filters.open(filterPlugins, filterTasks, filterSchemas, out);
|
109
|
+
input.run(inputTask, inputSchema, taskIndex, out);
|
110
|
+
} catch (NoSampleException ex) {
|
111
|
+
if (taskIndex == taskCount - 1) {
|
112
|
+
throw ex;
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
108
116
|
} finally {
|
109
117
|
out.close();
|
110
118
|
}
|
@@ -135,6 +143,11 @@ public class PreviewExecutor
|
|
135
143
|
this.pages = new ArrayList<Page>();
|
136
144
|
}
|
137
145
|
|
146
|
+
public int getRecordCount()
|
147
|
+
{
|
148
|
+
return recordCount;
|
149
|
+
}
|
150
|
+
|
138
151
|
@Override
|
139
152
|
public void add(Page page)
|
140
153
|
{
|
@@ -3,6 +3,7 @@ package org.embulk.spi;
|
|
3
3
|
import java.util.Objects;
|
4
4
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
5
5
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
6
7
|
import org.embulk.config.ConfigSource;
|
7
8
|
import org.embulk.spi.type.Type;
|
8
9
|
import org.embulk.spi.type.TimestampType;
|
@@ -25,35 +26,47 @@ public class ColumnConfig
|
|
25
26
|
}
|
26
27
|
}
|
27
28
|
|
28
|
-
|
29
|
-
public ColumnConfig(
|
30
|
-
@JsonProperty("name") String name,
|
31
|
-
@JsonProperty("type") Type type,
|
32
|
-
@JsonProperty("option") ConfigSource option)
|
29
|
+
public ColumnConfig(String name, Type type, ConfigSource option)
|
33
30
|
{
|
34
31
|
this.name = name;
|
35
32
|
this.type = type;
|
36
33
|
this.option = option;
|
37
34
|
}
|
38
35
|
|
39
|
-
@
|
36
|
+
@JsonCreator
|
37
|
+
public ColumnConfig(ConfigSource conf)
|
38
|
+
{
|
39
|
+
this.name = conf.get(String.class, "name");
|
40
|
+
this.type = conf.get(Type.class, "type");
|
41
|
+
this.option = conf.deepCopy();
|
42
|
+
option.remove("name");
|
43
|
+
option.remove("type");
|
44
|
+
}
|
45
|
+
|
40
46
|
public String getName()
|
41
47
|
{
|
42
48
|
return name;
|
43
49
|
}
|
44
50
|
|
45
|
-
@JsonProperty("type")
|
46
51
|
public Type getType()
|
47
52
|
{
|
48
53
|
return type;
|
49
54
|
}
|
50
55
|
|
51
|
-
@JsonProperty("option")
|
52
56
|
public ConfigSource getOption()
|
53
57
|
{
|
54
58
|
return option;
|
55
59
|
}
|
56
60
|
|
61
|
+
@JsonValue
|
62
|
+
public ConfigSource getConfigSource()
|
63
|
+
{
|
64
|
+
ConfigSource conf = option.deepCopy();
|
65
|
+
conf.set("name", name);
|
66
|
+
conf.set("type", type);
|
67
|
+
return conf;
|
68
|
+
}
|
69
|
+
|
57
70
|
@Deprecated
|
58
71
|
public String getFormat()
|
59
72
|
{
|
@@ -39,6 +39,7 @@ public class PageBuilder
|
|
39
39
|
this.schema = schema;
|
40
40
|
this.columnOffsets = PageFormat.columnOffsets(schema);
|
41
41
|
this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
|
42
|
+
Arrays.fill(nullBitSet, (byte) -1);
|
42
43
|
this.fixedRecordSize = PageFormat.recordHeaderSize(schema) + PageFormat.totalColumnSize(schema);
|
43
44
|
this.nextVariableLengthDataOffset = fixedRecordSize;
|
44
45
|
newBuffer();
|
@@ -69,6 +70,11 @@ public class PageBuilder
|
|
69
70
|
nullBitSet[columnIndex >>> 3] |= (1 << (columnIndex & 7));
|
70
71
|
}
|
71
72
|
|
73
|
+
private void clearNull(int columnIndex)
|
74
|
+
{
|
75
|
+
nullBitSet[columnIndex >>> 3] &= ~(1 << (columnIndex & 7));
|
76
|
+
}
|
77
|
+
|
72
78
|
public void setBoolean(Column column, boolean value)
|
73
79
|
{
|
74
80
|
// TODO check type?
|
@@ -78,6 +84,7 @@ public class PageBuilder
|
|
78
84
|
public void setBoolean(int columnIndex, boolean value)
|
79
85
|
{
|
80
86
|
bufferSlice.setByte(getOffset(columnIndex), value ? (byte) 1 : (byte) 0);
|
87
|
+
clearNull(columnIndex);
|
81
88
|
}
|
82
89
|
|
83
90
|
public void setLong(Column column, long value)
|
@@ -89,6 +96,7 @@ public class PageBuilder
|
|
89
96
|
public void setLong(int columnIndex, long value)
|
90
97
|
{
|
91
98
|
bufferSlice.setLong(getOffset(columnIndex), value);
|
99
|
+
clearNull(columnIndex);
|
92
100
|
}
|
93
101
|
|
94
102
|
public void setDouble(Column column, double value)
|
@@ -100,6 +108,7 @@ public class PageBuilder
|
|
100
108
|
public void setDouble(int columnIndex, double value)
|
101
109
|
{
|
102
110
|
bufferSlice.setDouble(getOffset(columnIndex), value);
|
111
|
+
clearNull(columnIndex);
|
103
112
|
}
|
104
113
|
|
105
114
|
public void setString(Column column, String value)
|
@@ -119,6 +128,7 @@ public class PageBuilder
|
|
119
128
|
bufferSlice.setInt(getOffset(columnIndex), index);
|
120
129
|
stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
|
121
130
|
}
|
131
|
+
clearNull(columnIndex);
|
122
132
|
}
|
123
133
|
|
124
134
|
public void setTimestamp(Column column, Timestamp value)
|
@@ -132,6 +142,7 @@ public class PageBuilder
|
|
132
142
|
int offset = getOffset(columnIndex);
|
133
143
|
bufferSlice.setLong(offset, value.getEpochSecond());
|
134
144
|
bufferSlice.setInt(offset + 8, value.getNano());
|
145
|
+
clearNull(columnIndex);
|
135
146
|
}
|
136
147
|
|
137
148
|
private int getOffset(int columnIndex)
|
@@ -175,7 +186,7 @@ public class PageBuilder
|
|
175
186
|
|
176
187
|
this.position += nextVariableLengthDataOffset;
|
177
188
|
this.nextVariableLengthDataOffset = fixedRecordSize;
|
178
|
-
Arrays.fill(nullBitSet, (byte)
|
189
|
+
Arrays.fill(nullBitSet, (byte) -1);
|
179
190
|
|
180
191
|
// flush if next record will not fit in this buffer
|
181
192
|
if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) {
|
@@ -27,7 +27,7 @@ public class DynamicPageBuilder
|
|
27
27
|
{
|
28
28
|
private final PageBuilder pageBuilder;
|
29
29
|
private final Schema schema;
|
30
|
-
private final
|
30
|
+
private final DynamicColumnSetter[] setters;
|
31
31
|
private final Map<String, DynamicColumnSetter> columnLookup;
|
32
32
|
|
33
33
|
public static interface BuilderTask
|
@@ -75,7 +75,7 @@ public class DynamicPageBuilder
|
|
75
75
|
setters.add(setter);
|
76
76
|
lookup.put(c.getName(), setter);
|
77
77
|
}
|
78
|
-
this.setters = setters.build();
|
78
|
+
this.setters = setters.build().toArray(new DynamicColumnSetter[0]);
|
79
79
|
this.columnLookup = lookup.build();
|
80
80
|
}
|
81
81
|
|
@@ -86,15 +86,15 @@ public class DynamicPageBuilder
|
|
86
86
|
|
87
87
|
public DynamicColumnSetter column(Column c)
|
88
88
|
{
|
89
|
-
return setters
|
89
|
+
return setters[c.getIndex()];
|
90
90
|
}
|
91
91
|
|
92
92
|
public DynamicColumnSetter column(int index)
|
93
93
|
{
|
94
|
-
if (index < 0 || setters.
|
94
|
+
if (index < 0 || setters.length <= index) {
|
95
95
|
throw new DynamicColumnNotFoundException("Column index '"+index+"' is not exist");
|
96
96
|
}
|
97
|
-
return setters
|
97
|
+
return setters[index];
|
98
98
|
}
|
99
99
|
|
100
100
|
public DynamicColumnSetter lookupColumn(String columnName)
|
@@ -108,10 +108,10 @@ public class DynamicPageBuilder
|
|
108
108
|
|
109
109
|
public DynamicColumnSetter columnOrSkip(int index)
|
110
110
|
{
|
111
|
-
if (index < 0 || setters.
|
111
|
+
if (index < 0 || setters.length <= index) {
|
112
112
|
return SkipColumnSetter.get();
|
113
113
|
}
|
114
|
-
return setters
|
114
|
+
return setters[index];
|
115
115
|
}
|
116
116
|
|
117
117
|
public DynamicColumnSetter columnOrSkip(String columnName)
|
@@ -126,10 +126,10 @@ public class DynamicPageBuilder
|
|
126
126
|
// for jruby
|
127
127
|
protected DynamicColumnSetter columnOrNull(int index)
|
128
128
|
{
|
129
|
-
if (index < 0 || setters.
|
129
|
+
if (index < 0 || setters.length <= index) {
|
130
130
|
return null;
|
131
131
|
}
|
132
|
-
return setters
|
132
|
+
return setters[index];
|
133
133
|
}
|
134
134
|
|
135
135
|
// for jruby
|
@@ -40,9 +40,9 @@ public abstract class AbstractDynamicColumnSetter
|
|
40
40
|
|
41
41
|
public abstract void set(Timestamp value);
|
42
42
|
|
43
|
-
public
|
43
|
+
public void setRubyObject(IRubyObject rubyObject)
|
44
44
|
{
|
45
|
-
if (rubyObject instanceof RubyNil) {
|
45
|
+
if (rubyObject == null || rubyObject instanceof RubyNil) {
|
46
46
|
setNull();
|
47
47
|
} else if (rubyObject instanceof RubyBoolean) {
|
48
48
|
RubyBoolean b = (RubyBoolean) rubyObject;
|
@@ -75,6 +75,5 @@ public abstract class AbstractDynamicColumnSetter
|
|
75
75
|
} else {
|
76
76
|
throw rubyObject.getRuntime().newTypeError("cannot convert instance of " + rubyObject.getMetaClass() + " to nil, true, false, Integer, Float, String, or Time");
|
77
77
|
}
|
78
|
-
return rubyObject.getRuntime().getNil();
|
79
78
|
}
|
80
79
|
}
|
@@ -248,12 +248,12 @@ Options
|
|
248
248
|
+====================+==========+===================================================+============================+
|
249
249
|
| path\_prefix | string | Path prefix of the output files | required |
|
250
250
|
+--------------------+----------+---------------------------------------------------+----------------------------+
|
251
|
-
| sequence\_format | string | Format of the sequence number of the output files |
|
251
|
+
| sequence\_format | string | Format of the sequence number of the output files | ``%03d.%02d.`` by default |
|
252
252
|
+--------------------+----------+---------------------------------------------------+----------------------------+
|
253
|
-
| file\_ext | string | Path suffix of the output files
|
253
|
+
| file\_ext | string | Path suffix of the output files (e.g. ``"csv"``) | required |
|
254
254
|
+--------------------+----------+---------------------------------------------------+----------------------------+
|
255
255
|
|
256
|
-
For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: "
|
256
|
+
For example, if you set ``path_prefix: /path/to/output/sample_``, ``sequence_format: "%03d.%02d."``, and ``file_ext: csv``, name of the output files will be as following:
|
257
257
|
|
258
258
|
::
|
259
259
|
|
@@ -261,10 +261,10 @@ For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: ".%
|
|
261
261
|
`-- path
|
262
262
|
`-- to
|
263
263
|
`-- output
|
264
|
-
|--
|
265
|
-
|--
|
266
|
-
|--
|
267
|
-
|--
|
264
|
+
|-- sample_01.000.csv
|
265
|
+
|-- sample_02.000.csv
|
266
|
+
|-- sample_03.000.csv
|
267
|
+
|-- sample_04.000.csv
|
268
268
|
|
269
269
|
``sequence_format`` formats task index and sequence number in a task.
|
270
270
|
|
@@ -275,8 +275,8 @@ Example
|
|
275
275
|
|
276
276
|
out:
|
277
277
|
type: file
|
278
|
-
path_prefix: /path/to/output/
|
279
|
-
file_ext:
|
278
|
+
path_prefix: /path/to/output/sample_
|
279
|
+
file_ext: csv
|
280
280
|
formatter:
|
281
281
|
...
|
282
282
|
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
Release 0.6.15
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Plugin API
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* PageBuilder sets null to a column if setXxx method is not called to the column.
|
8
|
+
|
9
|
+
|
10
|
+
Built-in plugins
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* **IMPORTANT**: Because of default value of ``sequence_format`` option, ``formatter-csv`` adds ``.`` (dot) before ``file_ext`` rather than after ``path_prefix``. If you're using ``out: type: file``, see the document of file output plugin at :doc:`../built-in`.
|
14
|
+
|
15
|
+
|
16
|
+
General Changes
|
17
|
+
------------------
|
18
|
+
|
19
|
+
* ``preview`` reads next file (task) when the first file (task) is empty.
|
20
|
+
* Fixed serialize/deserialization bug of ColumnConfig.
|
21
|
+
* Fixed NullPointerException caused when ``PageBuilder#setRubyObject(IRubyObject)`` is called with ``nil``.
|
22
|
+
|
23
|
+
|
24
|
+
Release Date
|
25
|
+
------------------
|
26
|
+
2015-06-29
|
@@ -88,13 +88,6 @@ public class CsvParserPlugin
|
|
88
88
|
@Config("allow_extra_columns")
|
89
89
|
@ConfigDefault("false")
|
90
90
|
public boolean getAllowExtraColumns();
|
91
|
-
|
92
|
-
@Config("error")
|
93
|
-
@ConfigDefault("{\"type\": \"warning\"}")
|
94
|
-
public ConfigSource getErrorConfig();
|
95
|
-
|
96
|
-
public TaskSource getErorrTaskSource();
|
97
|
-
public void setErrorTaskSource(TaskSource errorTask);
|
98
91
|
}
|
99
92
|
|
100
93
|
public interface TimestampColumnOption
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -318,6 +318,7 @@ files:
|
|
318
318
|
- embulk-docs/src/release/release-0.6.12.rst
|
319
319
|
- embulk-docs/src/release/release-0.6.13.rst
|
320
320
|
- embulk-docs/src/release/release-0.6.14.rst
|
321
|
+
- embulk-docs/src/release/release-0.6.15.rst
|
321
322
|
- embulk-docs/src/release/release-0.6.2.rst
|
322
323
|
- embulk-docs/src/release/release-0.6.3.rst
|
323
324
|
- embulk-docs/src/release/release-0.6.4.rst
|
@@ -434,8 +435,8 @@ files:
|
|
434
435
|
- classpath/bval-jsr303-0.5.jar
|
435
436
|
- classpath/commons-beanutils-core-1.8.3.jar
|
436
437
|
- classpath/commons-lang3-3.1.jar
|
437
|
-
- classpath/embulk-core-0.6.
|
438
|
-
- classpath/embulk-standards-0.6.
|
438
|
+
- classpath/embulk-core-0.6.15.jar
|
439
|
+
- classpath/embulk-standards-0.6.15.jar
|
439
440
|
- classpath/guava-18.0.jar
|
440
441
|
- classpath/guice-4.0.jar
|
441
442
|
- classpath/guice-multibindings-4.0.jar
|