embulk 0.6.14 → 0.6.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +16 -3
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +21 -8
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +12 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +9 -9
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +2 -3
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +2 -4
- data/embulk-docs/src/built-in.rst +9 -9
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.15.rst +26 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +1 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +0 -7
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +1 -1
- data/lib/embulk/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48e00af06c823ff60ec39a9d020896c6b16442a5
|
4
|
+
data.tar.gz: 711922cebca874837dce9d100cbee5d52e5c833f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 714466b1fbd8794ace91fcc3b074dac84b2c72d5504e9751da9f04045f86861c653b0cd9088e443b3b53fda56b400ba7bb8e1d1a75ca8e0c2e2ba2dcbd66e745
|
7
|
+
data.tar.gz: 7ffa66ba254cd40e2cb4427d356648eec8e61e194c00d94b2f93817f22bb70d182154fe88ce0d38c7088507b8a4c0c445e45f6ab8da37627a8158113824921a5
|
data/build.gradle
CHANGED
@@ -91,7 +91,7 @@ public class PreviewExecutor
|
|
91
91
|
|
92
92
|
try {
|
93
93
|
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
94
|
-
public List<CommitReport> run(final TaskSource inputTask, Schema inputSchema, int taskCount)
|
94
|
+
public List<CommitReport> run(final TaskSource inputTask, Schema inputSchema, final int taskCount)
|
95
95
|
{
|
96
96
|
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
97
97
|
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
@@ -103,8 +103,16 @@ public class PreviewExecutor
|
|
103
103
|
|
104
104
|
PageOutput out = new SamplingPageOutput(task.getSampleRows(), outputSchema);
|
105
105
|
try {
|
106
|
-
|
107
|
-
|
106
|
+
for (int taskIndex=0; taskIndex < taskCount; taskIndex++) {
|
107
|
+
try {
|
108
|
+
out = Filters.open(filterPlugins, filterTasks, filterSchemas, out);
|
109
|
+
input.run(inputTask, inputSchema, taskIndex, out);
|
110
|
+
} catch (NoSampleException ex) {
|
111
|
+
if (taskIndex == taskCount - 1) {
|
112
|
+
throw ex;
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
108
116
|
} finally {
|
109
117
|
out.close();
|
110
118
|
}
|
@@ -135,6 +143,11 @@ public class PreviewExecutor
|
|
135
143
|
this.pages = new ArrayList<Page>();
|
136
144
|
}
|
137
145
|
|
146
|
+
public int getRecordCount()
|
147
|
+
{
|
148
|
+
return recordCount;
|
149
|
+
}
|
150
|
+
|
138
151
|
@Override
|
139
152
|
public void add(Page page)
|
140
153
|
{
|
@@ -3,6 +3,7 @@ package org.embulk.spi;
|
|
3
3
|
import java.util.Objects;
|
4
4
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
5
5
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
6
7
|
import org.embulk.config.ConfigSource;
|
7
8
|
import org.embulk.spi.type.Type;
|
8
9
|
import org.embulk.spi.type.TimestampType;
|
@@ -25,35 +26,47 @@ public class ColumnConfig
|
|
25
26
|
}
|
26
27
|
}
|
27
28
|
|
28
|
-
|
29
|
-
public ColumnConfig(
|
30
|
-
@JsonProperty("name") String name,
|
31
|
-
@JsonProperty("type") Type type,
|
32
|
-
@JsonProperty("option") ConfigSource option)
|
29
|
+
public ColumnConfig(String name, Type type, ConfigSource option)
|
33
30
|
{
|
34
31
|
this.name = name;
|
35
32
|
this.type = type;
|
36
33
|
this.option = option;
|
37
34
|
}
|
38
35
|
|
39
|
-
@
|
36
|
+
@JsonCreator
|
37
|
+
public ColumnConfig(ConfigSource conf)
|
38
|
+
{
|
39
|
+
this.name = conf.get(String.class, "name");
|
40
|
+
this.type = conf.get(Type.class, "type");
|
41
|
+
this.option = conf.deepCopy();
|
42
|
+
option.remove("name");
|
43
|
+
option.remove("type");
|
44
|
+
}
|
45
|
+
|
40
46
|
public String getName()
|
41
47
|
{
|
42
48
|
return name;
|
43
49
|
}
|
44
50
|
|
45
|
-
@JsonProperty("type")
|
46
51
|
public Type getType()
|
47
52
|
{
|
48
53
|
return type;
|
49
54
|
}
|
50
55
|
|
51
|
-
@JsonProperty("option")
|
52
56
|
public ConfigSource getOption()
|
53
57
|
{
|
54
58
|
return option;
|
55
59
|
}
|
56
60
|
|
61
|
+
@JsonValue
|
62
|
+
public ConfigSource getConfigSource()
|
63
|
+
{
|
64
|
+
ConfigSource conf = option.deepCopy();
|
65
|
+
conf.set("name", name);
|
66
|
+
conf.set("type", type);
|
67
|
+
return conf;
|
68
|
+
}
|
69
|
+
|
57
70
|
@Deprecated
|
58
71
|
public String getFormat()
|
59
72
|
{
|
@@ -39,6 +39,7 @@ public class PageBuilder
|
|
39
39
|
this.schema = schema;
|
40
40
|
this.columnOffsets = PageFormat.columnOffsets(schema);
|
41
41
|
this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
|
42
|
+
Arrays.fill(nullBitSet, (byte) -1);
|
42
43
|
this.fixedRecordSize = PageFormat.recordHeaderSize(schema) + PageFormat.totalColumnSize(schema);
|
43
44
|
this.nextVariableLengthDataOffset = fixedRecordSize;
|
44
45
|
newBuffer();
|
@@ -69,6 +70,11 @@ public class PageBuilder
|
|
69
70
|
nullBitSet[columnIndex >>> 3] |= (1 << (columnIndex & 7));
|
70
71
|
}
|
71
72
|
|
73
|
+
private void clearNull(int columnIndex)
|
74
|
+
{
|
75
|
+
nullBitSet[columnIndex >>> 3] &= ~(1 << (columnIndex & 7));
|
76
|
+
}
|
77
|
+
|
72
78
|
public void setBoolean(Column column, boolean value)
|
73
79
|
{
|
74
80
|
// TODO check type?
|
@@ -78,6 +84,7 @@ public class PageBuilder
|
|
78
84
|
public void setBoolean(int columnIndex, boolean value)
|
79
85
|
{
|
80
86
|
bufferSlice.setByte(getOffset(columnIndex), value ? (byte) 1 : (byte) 0);
|
87
|
+
clearNull(columnIndex);
|
81
88
|
}
|
82
89
|
|
83
90
|
public void setLong(Column column, long value)
|
@@ -89,6 +96,7 @@ public class PageBuilder
|
|
89
96
|
public void setLong(int columnIndex, long value)
|
90
97
|
{
|
91
98
|
bufferSlice.setLong(getOffset(columnIndex), value);
|
99
|
+
clearNull(columnIndex);
|
92
100
|
}
|
93
101
|
|
94
102
|
public void setDouble(Column column, double value)
|
@@ -100,6 +108,7 @@ public class PageBuilder
|
|
100
108
|
public void setDouble(int columnIndex, double value)
|
101
109
|
{
|
102
110
|
bufferSlice.setDouble(getOffset(columnIndex), value);
|
111
|
+
clearNull(columnIndex);
|
103
112
|
}
|
104
113
|
|
105
114
|
public void setString(Column column, String value)
|
@@ -119,6 +128,7 @@ public class PageBuilder
|
|
119
128
|
bufferSlice.setInt(getOffset(columnIndex), index);
|
120
129
|
stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
|
121
130
|
}
|
131
|
+
clearNull(columnIndex);
|
122
132
|
}
|
123
133
|
|
124
134
|
public void setTimestamp(Column column, Timestamp value)
|
@@ -132,6 +142,7 @@ public class PageBuilder
|
|
132
142
|
int offset = getOffset(columnIndex);
|
133
143
|
bufferSlice.setLong(offset, value.getEpochSecond());
|
134
144
|
bufferSlice.setInt(offset + 8, value.getNano());
|
145
|
+
clearNull(columnIndex);
|
135
146
|
}
|
136
147
|
|
137
148
|
private int getOffset(int columnIndex)
|
@@ -175,7 +186,7 @@ public class PageBuilder
|
|
175
186
|
|
176
187
|
this.position += nextVariableLengthDataOffset;
|
177
188
|
this.nextVariableLengthDataOffset = fixedRecordSize;
|
178
|
-
Arrays.fill(nullBitSet, (byte)
|
189
|
+
Arrays.fill(nullBitSet, (byte) -1);
|
179
190
|
|
180
191
|
// flush if next record will not fit in this buffer
|
181
192
|
if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) {
|
@@ -27,7 +27,7 @@ public class DynamicPageBuilder
|
|
27
27
|
{
|
28
28
|
private final PageBuilder pageBuilder;
|
29
29
|
private final Schema schema;
|
30
|
-
private final
|
30
|
+
private final DynamicColumnSetter[] setters;
|
31
31
|
private final Map<String, DynamicColumnSetter> columnLookup;
|
32
32
|
|
33
33
|
public static interface BuilderTask
|
@@ -75,7 +75,7 @@ public class DynamicPageBuilder
|
|
75
75
|
setters.add(setter);
|
76
76
|
lookup.put(c.getName(), setter);
|
77
77
|
}
|
78
|
-
this.setters = setters.build();
|
78
|
+
this.setters = setters.build().toArray(new DynamicColumnSetter[0]);
|
79
79
|
this.columnLookup = lookup.build();
|
80
80
|
}
|
81
81
|
|
@@ -86,15 +86,15 @@ public class DynamicPageBuilder
|
|
86
86
|
|
87
87
|
public DynamicColumnSetter column(Column c)
|
88
88
|
{
|
89
|
-
return setters
|
89
|
+
return setters[c.getIndex()];
|
90
90
|
}
|
91
91
|
|
92
92
|
public DynamicColumnSetter column(int index)
|
93
93
|
{
|
94
|
-
if (index < 0 || setters.
|
94
|
+
if (index < 0 || setters.length <= index) {
|
95
95
|
throw new DynamicColumnNotFoundException("Column index '"+index+"' is not exist");
|
96
96
|
}
|
97
|
-
return setters
|
97
|
+
return setters[index];
|
98
98
|
}
|
99
99
|
|
100
100
|
public DynamicColumnSetter lookupColumn(String columnName)
|
@@ -108,10 +108,10 @@ public class DynamicPageBuilder
|
|
108
108
|
|
109
109
|
public DynamicColumnSetter columnOrSkip(int index)
|
110
110
|
{
|
111
|
-
if (index < 0 || setters.
|
111
|
+
if (index < 0 || setters.length <= index) {
|
112
112
|
return SkipColumnSetter.get();
|
113
113
|
}
|
114
|
-
return setters
|
114
|
+
return setters[index];
|
115
115
|
}
|
116
116
|
|
117
117
|
public DynamicColumnSetter columnOrSkip(String columnName)
|
@@ -126,10 +126,10 @@ public class DynamicPageBuilder
|
|
126
126
|
// for jruby
|
127
127
|
protected DynamicColumnSetter columnOrNull(int index)
|
128
128
|
{
|
129
|
-
if (index < 0 || setters.
|
129
|
+
if (index < 0 || setters.length <= index) {
|
130
130
|
return null;
|
131
131
|
}
|
132
|
-
return setters
|
132
|
+
return setters[index];
|
133
133
|
}
|
134
134
|
|
135
135
|
// for jruby
|
@@ -40,9 +40,9 @@ public abstract class AbstractDynamicColumnSetter
|
|
40
40
|
|
41
41
|
public abstract void set(Timestamp value);
|
42
42
|
|
43
|
-
public
|
43
|
+
public void setRubyObject(IRubyObject rubyObject)
|
44
44
|
{
|
45
|
-
if (rubyObject instanceof RubyNil) {
|
45
|
+
if (rubyObject == null || rubyObject instanceof RubyNil) {
|
46
46
|
setNull();
|
47
47
|
} else if (rubyObject instanceof RubyBoolean) {
|
48
48
|
RubyBoolean b = (RubyBoolean) rubyObject;
|
@@ -75,6 +75,5 @@ public abstract class AbstractDynamicColumnSetter
|
|
75
75
|
} else {
|
76
76
|
throw rubyObject.getRuntime().newTypeError("cannot convert instance of " + rubyObject.getMetaClass() + " to nil, true, false, Integer, Float, String, or Time");
|
77
77
|
}
|
78
|
-
return rubyObject.getRuntime().getNil();
|
79
78
|
}
|
80
79
|
}
|
@@ -248,12 +248,12 @@ Options
|
|
248
248
|
+====================+==========+===================================================+============================+
|
249
249
|
| path\_prefix | string | Path prefix of the output files | required |
|
250
250
|
+--------------------+----------+---------------------------------------------------+----------------------------+
|
251
|
-
| sequence\_format | string | Format of the sequence number of the output files |
|
251
|
+
| sequence\_format | string | Format of the sequence number of the output files | ``%03d.%02d.`` by default |
|
252
252
|
+--------------------+----------+---------------------------------------------------+----------------------------+
|
253
|
-
| file\_ext | string | Path suffix of the output files
|
253
|
+
| file\_ext | string | Path suffix of the output files (e.g. ``"csv"``) | required |
|
254
254
|
+--------------------+----------+---------------------------------------------------+----------------------------+
|
255
255
|
|
256
|
-
For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: "
|
256
|
+
For example, if you set ``path_prefix: /path/to/output/sample_``, ``sequence_format: "%03d.%02d."``, and ``file_ext: csv``, name of the output files will be as following:
|
257
257
|
|
258
258
|
::
|
259
259
|
|
@@ -261,10 +261,10 @@ For example, if you set ``path_prefix: /path/to/output``, ``sequence_format: ".%
|
|
261
261
|
`-- path
|
262
262
|
`-- to
|
263
263
|
`-- output
|
264
|
-
|--
|
265
|
-
|--
|
266
|
-
|--
|
267
|
-
|--
|
264
|
+
|-- sample_01.000.csv
|
265
|
+
|-- sample_02.000.csv
|
266
|
+
|-- sample_03.000.csv
|
267
|
+
|-- sample_04.000.csv
|
268
268
|
|
269
269
|
``sequence_format`` formats task index and sequence number in a task.
|
270
270
|
|
@@ -275,8 +275,8 @@ Example
|
|
275
275
|
|
276
276
|
out:
|
277
277
|
type: file
|
278
|
-
path_prefix: /path/to/output/
|
279
|
-
file_ext:
|
278
|
+
path_prefix: /path/to/output/sample_
|
279
|
+
file_ext: csv
|
280
280
|
formatter:
|
281
281
|
...
|
282
282
|
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
Release 0.6.15
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Plugin API
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* PageBuilder sets null to a column if setXxx method is not called to the column.
|
8
|
+
|
9
|
+
|
10
|
+
Built-in plugins
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* **IMPORTANT**: Because of default value of ``sequence_format`` option, ``formatter-csv`` adds ``.`` (dot) before ``file_ext`` rather than after ``path_prefix``. If you're using ``out: type: file``, see the document of file output plugin at :doc:`../built-in`.
|
14
|
+
|
15
|
+
|
16
|
+
General Changes
|
17
|
+
------------------
|
18
|
+
|
19
|
+
* ``preview`` reads next file (task) when the first file (task) is empty.
|
20
|
+
* Fixed serialize/deserialization bug of ColumnConfig.
|
21
|
+
* Fixed NullPointerException caused when ``PageBuilder#setRubyObject(IRubyObject)`` is called with ``nil``.
|
22
|
+
|
23
|
+
|
24
|
+
Release Date
|
25
|
+
------------------
|
26
|
+
2015-06-29
|
@@ -88,13 +88,6 @@ public class CsvParserPlugin
|
|
88
88
|
@Config("allow_extra_columns")
|
89
89
|
@ConfigDefault("false")
|
90
90
|
public boolean getAllowExtraColumns();
|
91
|
-
|
92
|
-
@Config("error")
|
93
|
-
@ConfigDefault("{\"type\": \"warning\"}")
|
94
|
-
public ConfigSource getErrorConfig();
|
95
|
-
|
96
|
-
public TaskSource getErorrTaskSource();
|
97
|
-
public void setErrorTaskSource(TaskSource errorTask);
|
98
91
|
}
|
99
92
|
|
100
93
|
public interface TimestampColumnOption
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -318,6 +318,7 @@ files:
|
|
318
318
|
- embulk-docs/src/release/release-0.6.12.rst
|
319
319
|
- embulk-docs/src/release/release-0.6.13.rst
|
320
320
|
- embulk-docs/src/release/release-0.6.14.rst
|
321
|
+
- embulk-docs/src/release/release-0.6.15.rst
|
321
322
|
- embulk-docs/src/release/release-0.6.2.rst
|
322
323
|
- embulk-docs/src/release/release-0.6.3.rst
|
323
324
|
- embulk-docs/src/release/release-0.6.4.rst
|
@@ -434,8 +435,8 @@ files:
|
|
434
435
|
- classpath/bval-jsr303-0.5.jar
|
435
436
|
- classpath/commons-beanutils-core-1.8.3.jar
|
436
437
|
- classpath/commons-lang3-3.1.jar
|
437
|
-
- classpath/embulk-core-0.6.
|
438
|
-
- classpath/embulk-standards-0.6.
|
438
|
+
- classpath/embulk-core-0.6.15.jar
|
439
|
+
- classpath/embulk-standards-0.6.15.jar
|
439
440
|
- classpath/guava-18.0.jar
|
440
441
|
- classpath/guice-4.0.jar
|
441
442
|
- classpath/guice-multibindings-4.0.jar
|