embulk 0.6.12 → 0.6.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +0 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +1 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnNotFoundException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetter.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +161 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +80 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +64 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +61 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +69 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +34 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +54 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +56 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +64 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.13.rst +23 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +7 -1
- data/lib/embulk/command/embulk.rb +5 -1
- data/lib/embulk/command/embulk_run.rb +13 -1
- data/lib/embulk/guess/csv.rb +52 -22
- data/lib/embulk/java/imports.rb +2 -0
- data/lib/embulk/page_builder.rb +56 -2
- data/lib/embulk/version.rb +1 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98a09da0f4c6425b69f3094d7c5b105eea0e7d36
|
4
|
+
data.tar.gz: 26383243a036baa447c9bf3ab28d554e2a5fc9e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acc2663c2fc200aeb2aeffc5e3156cdffbfbd8321a87bfbac1fa125e46f5c2006bdc9cdf63484b0157c622c32cf8be81bec336a029bd6c655e866537bc689578
|
7
|
+
data.tar.gz: 41696a15d2d1cd12efcd1d35574b19ddeeaa931f386f8c7093ab123328d460d5b1d82a94880f38c679b8f690edd43c7d5fcf679cee08b9ba019236b36460a9c7
|
data/Gemfile.lock
CHANGED
data/build.gradle
CHANGED
@@ -35,8 +35,7 @@ public class TimestampParser
|
|
35
35
|
return defaultTimeZone;
|
36
36
|
}
|
37
37
|
|
38
|
-
|
39
|
-
private TimestampParser(ScriptingContainer jruby, String format, DateTimeZone defaultTimeZone)
|
38
|
+
public TimestampParser(ScriptingContainer jruby, String format, DateTimeZone defaultTimeZone)
|
40
39
|
{
|
41
40
|
JRubyTimeParserHelperFactory helperFactory = (JRubyTimeParserHelperFactory) jruby.runScriptlet("Embulk::Java::TimeParserHelper::Factory.new");
|
42
41
|
// TODO get default current time from ExecTask.getExecTimestamp
|
@@ -0,0 +1,18 @@
|
|
1
|
+
package org.embulk.spi.util;
|
2
|
+
|
3
|
+
import org.embulk.spi.time.Timestamp;
|
4
|
+
|
5
|
+
public interface DynamicColumnSetter
|
6
|
+
{
|
7
|
+
public void setNull();
|
8
|
+
|
9
|
+
public void set(boolean value);
|
10
|
+
|
11
|
+
public void set(long value);
|
12
|
+
|
13
|
+
public void set(double value);
|
14
|
+
|
15
|
+
public void set(String value);
|
16
|
+
|
17
|
+
public void set(Timestamp value);
|
18
|
+
}
|
@@ -0,0 +1,94 @@
|
|
1
|
+
package org.embulk.spi.util;
|
2
|
+
|
3
|
+
import org.joda.time.DateTimeZone;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.embulk.spi.type.Type;
|
6
|
+
import org.embulk.spi.type.BooleanType;
|
7
|
+
import org.embulk.spi.type.LongType;
|
8
|
+
import org.embulk.spi.type.DoubleType;
|
9
|
+
import org.embulk.spi.type.StringType;
|
10
|
+
import org.embulk.spi.type.TimestampType;
|
11
|
+
import org.embulk.spi.util.dynamic.BooleanColumnSetter;
|
12
|
+
import org.embulk.spi.util.dynamic.LongColumnSetter;
|
13
|
+
import org.embulk.spi.util.dynamic.DoubleColumnSetter;
|
14
|
+
import org.embulk.spi.util.dynamic.StringColumnSetter;
|
15
|
+
import org.embulk.spi.util.dynamic.TimestampColumnSetter;
|
16
|
+
import org.embulk.spi.util.dynamic.DefaultValueSetter;
|
17
|
+
import org.embulk.spi.util.dynamic.NullDefaultValueSetter;
|
18
|
+
import org.embulk.spi.time.TimestampFormatter;
|
19
|
+
import org.embulk.spi.time.TimestampParser;
|
20
|
+
import org.embulk.spi.time.TimestampFormat;
|
21
|
+
import org.embulk.spi.Column;
|
22
|
+
import org.embulk.spi.PageBuilder;
|
23
|
+
import org.embulk.config.ConfigException;
|
24
|
+
|
25
|
+
public class DynamicColumnSetterFactory
|
26
|
+
{
|
27
|
+
private final DefaultValueSetter defaultValue;
|
28
|
+
private final DynamicPageBuilder.BuilderTask task;
|
29
|
+
|
30
|
+
DynamicColumnSetterFactory(
|
31
|
+
DynamicPageBuilder.BuilderTask task,
|
32
|
+
DefaultValueSetter defaultValue)
|
33
|
+
{
|
34
|
+
this.defaultValue = defaultValue;
|
35
|
+
this.task = task;
|
36
|
+
}
|
37
|
+
|
38
|
+
public static DefaultValueSetter nullDefaultValue()
|
39
|
+
{
|
40
|
+
return new NullDefaultValueSetter();
|
41
|
+
}
|
42
|
+
|
43
|
+
public DynamicColumnSetter newColumnSetter(PageBuilder pageBuilder, Column column)
|
44
|
+
{
|
45
|
+
Type type = column.getType();
|
46
|
+
if (type instanceof BooleanType) {
|
47
|
+
return new BooleanColumnSetter(pageBuilder, column, defaultValue);
|
48
|
+
} else if (type instanceof LongType) {
|
49
|
+
return new LongColumnSetter(pageBuilder, column, defaultValue);
|
50
|
+
} else if (type instanceof DoubleType) {
|
51
|
+
return new DoubleColumnSetter(pageBuilder, column, defaultValue);
|
52
|
+
} else if (type instanceof StringType) {
|
53
|
+
TimestampFormatter formatter = new TimestampFormatter(task.getJRuby(),
|
54
|
+
getTimestampFormat(column).getFormat(), getTimeZone(column));
|
55
|
+
return new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
|
56
|
+
} else if (type instanceof TimestampType) {
|
57
|
+
// TODO use flexible time format like Ruby's Time.parse
|
58
|
+
TimestampParser parser = new TimestampParser(task.getJRuby(),
|
59
|
+
getTimestampFormat(column).getFormat(), getTimeZone(column));
|
60
|
+
return new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
|
61
|
+
}
|
62
|
+
throw new ConfigException("Unknown column type: "+type);
|
63
|
+
}
|
64
|
+
|
65
|
+
private TimestampFormat getTimestampFormat(Column column)
|
66
|
+
{
|
67
|
+
DynamicPageBuilder.ColumnOption option = getColumnOption(column);
|
68
|
+
if (option != null) {
|
69
|
+
return option.getTimestampFormat();
|
70
|
+
} else {
|
71
|
+
return new TimestampFormat("%Y-%m-%d %H:%M:%S.%6N");
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
private DateTimeZone getTimeZone(Column column)
|
76
|
+
{
|
77
|
+
DynamicPageBuilder.ColumnOption option = getColumnOption(column);
|
78
|
+
if (option != null) {
|
79
|
+
return option.getTimeZone().or(task.getDefaultTimeZone());
|
80
|
+
} else {
|
81
|
+
return task.getDefaultTimeZone();
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
private DynamicPageBuilder.ColumnOption getColumnOption(Column column)
|
86
|
+
{
|
87
|
+
ConfigSource option = task.getColumnOptions().get(column.getName());
|
88
|
+
if (option != null) {
|
89
|
+
return option.loadConfig(DynamicPageBuilder.ColumnOption.class);
|
90
|
+
} else {
|
91
|
+
return null;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
}
|
@@ -0,0 +1,161 @@
|
|
1
|
+
package org.embulk.spi.util;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.Map;
|
5
|
+
import com.google.common.base.Optional;
|
6
|
+
import org.joda.time.DateTimeZone;
|
7
|
+
import org.jruby.embed.ScriptingContainer;
|
8
|
+
import com.google.common.collect.ImmutableList;
|
9
|
+
import com.google.common.collect.ImmutableMap;
|
10
|
+
import org.embulk.config.Config;
|
11
|
+
import org.embulk.config.ConfigDefault;
|
12
|
+
import org.embulk.config.ConfigInject;
|
13
|
+
import org.embulk.config.ConfigSource;
|
14
|
+
import org.embulk.config.Task;
|
15
|
+
import org.embulk.spi.Schema;
|
16
|
+
import org.embulk.spi.Column;
|
17
|
+
import org.embulk.spi.BufferAllocator;
|
18
|
+
import org.embulk.spi.PageBuilder;
|
19
|
+
import org.embulk.spi.PageOutput;
|
20
|
+
import org.embulk.spi.time.TimestampFormatter;
|
21
|
+
import org.embulk.spi.time.TimestampParser;
|
22
|
+
import org.embulk.spi.time.TimestampFormat;
|
23
|
+
import org.embulk.spi.util.dynamic.SkipColumnSetter;
|
24
|
+
|
25
|
+
public class DynamicPageBuilder
|
26
|
+
implements AutoCloseable
|
27
|
+
{
|
28
|
+
private final PageBuilder pageBuilder;
|
29
|
+
private final Schema schema;
|
30
|
+
private final List<DynamicColumnSetter> setters;
|
31
|
+
private final Map<String, DynamicColumnSetter> columnLookup;
|
32
|
+
|
33
|
+
public static interface BuilderTask
|
34
|
+
extends Task
|
35
|
+
{
|
36
|
+
@Config("default_timezone")
|
37
|
+
@ConfigDefault("\"UTC\"")
|
38
|
+
public DateTimeZone getDefaultTimeZone();
|
39
|
+
|
40
|
+
@Config("column_options")
|
41
|
+
@ConfigDefault("{}")
|
42
|
+
public Map<String, ConfigSource> getColumnOptions();
|
43
|
+
|
44
|
+
// required by TimestampFormatter
|
45
|
+
@ConfigInject
|
46
|
+
public ScriptingContainer getJRuby();
|
47
|
+
}
|
48
|
+
|
49
|
+
public static interface ColumnOption
|
50
|
+
extends Task
|
51
|
+
{
|
52
|
+
@Config("timestamp_format")
|
53
|
+
@ConfigDefault("\"%Y-%m-%d %H:%M:%S.%6N\"")
|
54
|
+
public TimestampFormat getTimestampFormat();
|
55
|
+
|
56
|
+
@Config("timezone")
|
57
|
+
@ConfigDefault("null")
|
58
|
+
public Optional<DateTimeZone> getTimeZone();
|
59
|
+
}
|
60
|
+
|
61
|
+
public DynamicPageBuilder(BuilderTask task,
|
62
|
+
BufferAllocator allocator, Schema schema, PageOutput output)
|
63
|
+
{
|
64
|
+
this.pageBuilder = new PageBuilder(allocator, schema, output);
|
65
|
+
this.schema = schema;
|
66
|
+
|
67
|
+
// TODO configurable default value
|
68
|
+
DynamicColumnSetterFactory factory = new DynamicColumnSetterFactory(task,
|
69
|
+
DynamicColumnSetterFactory.nullDefaultValue());
|
70
|
+
|
71
|
+
ImmutableList.Builder<DynamicColumnSetter> setters = ImmutableList.builder();
|
72
|
+
ImmutableMap.Builder<String, DynamicColumnSetter> lookup = ImmutableMap.builder();
|
73
|
+
for (Column c : schema.getColumns()) {
|
74
|
+
DynamicColumnSetter setter = factory.newColumnSetter(pageBuilder, c);
|
75
|
+
setters.add(setter);
|
76
|
+
lookup.put(c.getName(), setter);
|
77
|
+
}
|
78
|
+
this.setters = setters.build();
|
79
|
+
this.columnLookup = lookup.build();
|
80
|
+
}
|
81
|
+
|
82
|
+
public List<Column> getColumns()
|
83
|
+
{
|
84
|
+
return schema.getColumns();
|
85
|
+
}
|
86
|
+
|
87
|
+
public DynamicColumnSetter column(Column c)
|
88
|
+
{
|
89
|
+
return setters.get(c.getIndex());
|
90
|
+
}
|
91
|
+
|
92
|
+
public DynamicColumnSetter column(int index)
|
93
|
+
{
|
94
|
+
if (index < 0 || setters.size() <= index) {
|
95
|
+
throw new DynamicColumnNotFoundException("Column index '"+index+"' is not exist");
|
96
|
+
}
|
97
|
+
return setters.get(index);
|
98
|
+
}
|
99
|
+
|
100
|
+
public DynamicColumnSetter lookupColumn(String columnName)
|
101
|
+
{
|
102
|
+
DynamicColumnSetter setter = columnLookup.get(columnName);
|
103
|
+
if (setter == null) {
|
104
|
+
throw new DynamicColumnNotFoundException("Column '"+columnName+"' is not exist");
|
105
|
+
}
|
106
|
+
return setter;
|
107
|
+
}
|
108
|
+
|
109
|
+
public DynamicColumnSetter columnOrSkip(int index)
|
110
|
+
{
|
111
|
+
if (index < 0 || setters.size() <= index) {
|
112
|
+
return SkipColumnSetter.get();
|
113
|
+
}
|
114
|
+
return setters.get(index);
|
115
|
+
}
|
116
|
+
|
117
|
+
public DynamicColumnSetter columnOrSkip(String columnName)
|
118
|
+
{
|
119
|
+
DynamicColumnSetter setter = columnLookup.get(columnName);
|
120
|
+
if (setter == null) {
|
121
|
+
return SkipColumnSetter.get();
|
122
|
+
}
|
123
|
+
return setter;
|
124
|
+
}
|
125
|
+
|
126
|
+
// for jruby
|
127
|
+
protected DynamicColumnSetter columnOrNull(int index)
|
128
|
+
{
|
129
|
+
if (index < 0 || setters.size() <= index) {
|
130
|
+
return null;
|
131
|
+
}
|
132
|
+
return setters.get(index);
|
133
|
+
}
|
134
|
+
|
135
|
+
// for jruby
|
136
|
+
protected DynamicColumnSetter columnOrNull(String columnName)
|
137
|
+
{
|
138
|
+
return columnLookup.get(columnName);
|
139
|
+
}
|
140
|
+
|
141
|
+
public void addRecord()
|
142
|
+
{
|
143
|
+
pageBuilder.addRecord();
|
144
|
+
}
|
145
|
+
|
146
|
+
public void flush()
|
147
|
+
{
|
148
|
+
pageBuilder.flush();
|
149
|
+
}
|
150
|
+
|
151
|
+
public void finish()
|
152
|
+
{
|
153
|
+
pageBuilder.finish();
|
154
|
+
}
|
155
|
+
|
156
|
+
@Override
|
157
|
+
public void close()
|
158
|
+
{
|
159
|
+
pageBuilder.close();
|
160
|
+
}
|
161
|
+
}
|
@@ -0,0 +1,80 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
4
|
+
import org.jruby.RubyNil;
|
5
|
+
import org.jruby.RubyBoolean;
|
6
|
+
import org.jruby.RubyInteger;
|
7
|
+
import org.jruby.RubyFloat;
|
8
|
+
import org.jruby.RubyString;
|
9
|
+
import org.jruby.RubyTime;
|
10
|
+
import org.jruby.exceptions.RaiseException;
|
11
|
+
import org.embulk.spi.PageBuilder;
|
12
|
+
import org.embulk.spi.Column;
|
13
|
+
import org.embulk.spi.util.DynamicColumnSetter;
|
14
|
+
import org.embulk.spi.time.Timestamp;
|
15
|
+
|
16
|
+
public abstract class AbstractDynamicColumnSetter
|
17
|
+
implements DynamicColumnSetter
|
18
|
+
{
|
19
|
+
protected final PageBuilder pageBuilder;
|
20
|
+
protected final Column column;
|
21
|
+
protected final DefaultValueSetter defaultValue;
|
22
|
+
|
23
|
+
protected AbstractDynamicColumnSetter(PageBuilder pageBuilder, Column column,
|
24
|
+
DefaultValueSetter defaultValue)
|
25
|
+
{
|
26
|
+
this.pageBuilder = pageBuilder;
|
27
|
+
this.column = column;
|
28
|
+
this.defaultValue = defaultValue;
|
29
|
+
}
|
30
|
+
|
31
|
+
public abstract void setNull();
|
32
|
+
|
33
|
+
public abstract void set(boolean value);
|
34
|
+
|
35
|
+
public abstract void set(long value);
|
36
|
+
|
37
|
+
public abstract void set(double value);
|
38
|
+
|
39
|
+
public abstract void set(String value);
|
40
|
+
|
41
|
+
public abstract void set(Timestamp value);
|
42
|
+
|
43
|
+
public IRubyObject setRubyObject(IRubyObject rubyObject)
|
44
|
+
{
|
45
|
+
if (rubyObject instanceof RubyNil) {
|
46
|
+
setNull();
|
47
|
+
} else if (rubyObject instanceof RubyBoolean) {
|
48
|
+
RubyBoolean b = (RubyBoolean) rubyObject;
|
49
|
+
set(b.isTrue());
|
50
|
+
} else if (rubyObject instanceof RubyInteger) {
|
51
|
+
RubyInteger i = (RubyInteger) rubyObject;
|
52
|
+
try {
|
53
|
+
set(i.getLongValue());
|
54
|
+
} catch (RaiseException ex) {
|
55
|
+
if ("RangeError".equals(ex.getException().getMetaClass().getBaseName())) {
|
56
|
+
// integer is too large
|
57
|
+
throw ex; //TODO setDefaultValue();
|
58
|
+
} else {
|
59
|
+
throw ex;
|
60
|
+
}
|
61
|
+
}
|
62
|
+
} else if (rubyObject instanceof RubyFloat) {
|
63
|
+
RubyFloat f = (RubyFloat) rubyObject;
|
64
|
+
set(f.getDoubleValue());
|
65
|
+
} else if (rubyObject instanceof RubyString) {
|
66
|
+
RubyString s = (RubyString) rubyObject;
|
67
|
+
set(s.asJavaString());
|
68
|
+
} else if (rubyObject instanceof RubyTime) {
|
69
|
+
RubyTime time = (RubyTime) rubyObject;
|
70
|
+
long msec = time.getDateTime().getMillis();
|
71
|
+
long nsec = time.getNSec();
|
72
|
+
long sec = msec / 1000 + nsec / 1000000000;
|
73
|
+
int nano = (int) ((msec % 1000) * 1000000 + nsec % 1000000000);
|
74
|
+
set(Timestamp.ofEpochSecond(sec, nano));
|
75
|
+
} else {
|
76
|
+
throw rubyObject.getRuntime().newTypeError("cannot convert instance of " + rubyObject.getMetaClass() + " to nil, true, false, Integer, Float, String, or Time");
|
77
|
+
}
|
78
|
+
return rubyObject.getRuntime().getNil();
|
79
|
+
}
|
80
|
+
}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableSet;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.PageBuilder;
|
6
|
+
import org.embulk.spi.time.Timestamp;
|
7
|
+
|
8
|
+
public class BooleanColumnSetter
|
9
|
+
extends AbstractDynamicColumnSetter
|
10
|
+
{
|
11
|
+
private static final ImmutableSet<String> TRUE_STRINGS =
|
12
|
+
ImmutableSet.of(
|
13
|
+
"true", "True", "TRUE",
|
14
|
+
"yes", "Yes", "YES",
|
15
|
+
"t", "T", "y", "Y",
|
16
|
+
"on", "On", "ON",
|
17
|
+
"1");
|
18
|
+
|
19
|
+
public BooleanColumnSetter(PageBuilder pageBuilder, Column column,
|
20
|
+
DefaultValueSetter defaultValue)
|
21
|
+
{
|
22
|
+
super(pageBuilder, column, defaultValue);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public void setNull()
|
27
|
+
{
|
28
|
+
pageBuilder.setNull(column);
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public void set(boolean v)
|
33
|
+
{
|
34
|
+
pageBuilder.setBoolean(column, v);
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public void set(long v)
|
39
|
+
{
|
40
|
+
pageBuilder.setBoolean(column, v > 0);
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public void set(double v)
|
45
|
+
{
|
46
|
+
pageBuilder.setBoolean(column, v > 0.0);
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void set(String v)
|
51
|
+
{
|
52
|
+
if (TRUE_STRINGS.contains(v)) {
|
53
|
+
pageBuilder.setBoolean(column, true);
|
54
|
+
} else {
|
55
|
+
defaultValue.setDouble(pageBuilder, column);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public void set(Timestamp v)
|
61
|
+
{
|
62
|
+
defaultValue.setBoolean(pageBuilder, column);
|
63
|
+
}
|
64
|
+
}
|
@@ -0,0 +1,18 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import org.embulk.spi.PageBuilder;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
|
7
|
+
public interface DefaultValueSetter
|
8
|
+
{
|
9
|
+
public void setBoolean(PageBuilder pageBuilder, Column c);
|
10
|
+
|
11
|
+
public void setLong(PageBuilder pageBuilder, Column c);
|
12
|
+
|
13
|
+
public void setDouble(PageBuilder pageBuilder, Column c);
|
14
|
+
|
15
|
+
public void setString(PageBuilder pageBuilder, Column c);
|
16
|
+
|
17
|
+
public void setTimestamp(PageBuilder pageBuilder, Column c);
|
18
|
+
}
|
@@ -0,0 +1,61 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import java.math.RoundingMode;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.PageBuilder;
|
6
|
+
import org.embulk.spi.time.Timestamp;
|
7
|
+
|
8
|
+
public class DoubleColumnSetter
|
9
|
+
extends AbstractDynamicColumnSetter
|
10
|
+
{
|
11
|
+
public DoubleColumnSetter(PageBuilder pageBuilder, Column column,
|
12
|
+
DefaultValueSetter defaultValue)
|
13
|
+
{
|
14
|
+
super(pageBuilder, column, defaultValue);
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public void setNull()
|
19
|
+
{
|
20
|
+
pageBuilder.setNull(column);
|
21
|
+
}
|
22
|
+
|
23
|
+
@Override
|
24
|
+
public void set(boolean v)
|
25
|
+
{
|
26
|
+
pageBuilder.setDouble(column, v ? 1.0 : 0.0);
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public void set(long v)
|
31
|
+
{
|
32
|
+
pageBuilder.setDouble(column, (double) v);
|
33
|
+
}
|
34
|
+
|
35
|
+
@Override
|
36
|
+
public void set(double v)
|
37
|
+
{
|
38
|
+
pageBuilder.setDouble(column, v);
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public void set(String v)
|
43
|
+
{
|
44
|
+
double dv;
|
45
|
+
try {
|
46
|
+
dv = Double.parseDouble(v);
|
47
|
+
} catch (NumberFormatException e) {
|
48
|
+
defaultValue.setDouble(pageBuilder, column);
|
49
|
+
return;
|
50
|
+
}
|
51
|
+
pageBuilder.setDouble(column, dv);
|
52
|
+
}
|
53
|
+
|
54
|
+
@Override
|
55
|
+
public void set(Timestamp v)
|
56
|
+
{
|
57
|
+
double sec = (double) v.getEpochSecond();
|
58
|
+
double frac = v.getNano() / 1000000000.0;
|
59
|
+
pageBuilder.setDouble(column, sec + frac);
|
60
|
+
}
|
61
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import java.math.RoundingMode;
|
4
|
+
import com.google.common.math.DoubleMath;
|
5
|
+
import org.embulk.spi.Column;
|
6
|
+
import org.embulk.spi.PageBuilder;
|
7
|
+
import org.embulk.spi.time.Timestamp;
|
8
|
+
|
9
|
+
public class LongColumnSetter
|
10
|
+
extends AbstractDynamicColumnSetter
|
11
|
+
{
|
12
|
+
public LongColumnSetter(PageBuilder pageBuilder, Column column,
|
13
|
+
DefaultValueSetter defaultValue)
|
14
|
+
{
|
15
|
+
super(pageBuilder, column, defaultValue);
|
16
|
+
}
|
17
|
+
|
18
|
+
@Override
|
19
|
+
public void setNull()
|
20
|
+
{
|
21
|
+
pageBuilder.setNull(column);
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public void set(boolean v)
|
26
|
+
{
|
27
|
+
pageBuilder.setLong(column, v ? 1L : 0L);
|
28
|
+
}
|
29
|
+
|
30
|
+
@Override
|
31
|
+
public void set(long v)
|
32
|
+
{
|
33
|
+
pageBuilder.setLong(column, v);
|
34
|
+
}
|
35
|
+
|
36
|
+
@Override
|
37
|
+
public void set(double v)
|
38
|
+
{
|
39
|
+
long lv;
|
40
|
+
try {
|
41
|
+
// TODO configurable rounding mode
|
42
|
+
lv = DoubleMath.roundToLong(v, RoundingMode.HALF_UP);
|
43
|
+
} catch (ArithmeticException ex) {
|
44
|
+
// NaN / Infinite / -Infinite
|
45
|
+
defaultValue.setLong(pageBuilder, column);
|
46
|
+
return;
|
47
|
+
}
|
48
|
+
pageBuilder.setLong(column, lv);
|
49
|
+
}
|
50
|
+
|
51
|
+
@Override
|
52
|
+
public void set(String v)
|
53
|
+
{
|
54
|
+
long lv;
|
55
|
+
try {
|
56
|
+
lv = Long.parseLong(v);
|
57
|
+
} catch (NumberFormatException e) {
|
58
|
+
defaultValue.setLong(pageBuilder, column);
|
59
|
+
return;
|
60
|
+
}
|
61
|
+
pageBuilder.setLong(column, lv);
|
62
|
+
}
|
63
|
+
|
64
|
+
@Override
|
65
|
+
public void set(Timestamp v)
|
66
|
+
{
|
67
|
+
pageBuilder.setDouble(column, v.getEpochSecond());
|
68
|
+
}
|
69
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import org.embulk.spi.PageBuilder;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
|
7
|
+
public class NullDefaultValueSetter
|
8
|
+
implements DefaultValueSetter
|
9
|
+
{
|
10
|
+
public void setBoolean(PageBuilder pageBuilder, Column c)
|
11
|
+
{
|
12
|
+
pageBuilder.setNull(c);
|
13
|
+
}
|
14
|
+
|
15
|
+
public void setLong(PageBuilder pageBuilder, Column c)
|
16
|
+
{
|
17
|
+
pageBuilder.setNull(c);
|
18
|
+
}
|
19
|
+
|
20
|
+
public void setDouble(PageBuilder pageBuilder, Column c)
|
21
|
+
{
|
22
|
+
pageBuilder.setNull(c);
|
23
|
+
}
|
24
|
+
|
25
|
+
public void setString(PageBuilder pageBuilder, Column c)
|
26
|
+
{
|
27
|
+
pageBuilder.setNull(c);
|
28
|
+
}
|
29
|
+
|
30
|
+
public void setTimestamp(PageBuilder pageBuilder, Column c)
|
31
|
+
{
|
32
|
+
pageBuilder.setNull(c);
|
33
|
+
}
|
34
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
4
|
+
import org.embulk.spi.PageBuilder;
|
5
|
+
import org.embulk.spi.Column;
|
6
|
+
import org.embulk.spi.time.Timestamp;
|
7
|
+
import org.embulk.spi.time.TimestampParser;
|
8
|
+
import org.embulk.spi.time.TimestampParseException;
|
9
|
+
|
10
|
+
public class SkipColumnSetter
|
11
|
+
extends AbstractDynamicColumnSetter
|
12
|
+
{
|
13
|
+
private static final SkipColumnSetter instance = new SkipColumnSetter();
|
14
|
+
|
15
|
+
public static SkipColumnSetter get()
|
16
|
+
{
|
17
|
+
return instance;
|
18
|
+
}
|
19
|
+
|
20
|
+
private SkipColumnSetter()
|
21
|
+
{
|
22
|
+
super(null, null, null);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public void setNull()
|
27
|
+
{ }
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public void set(boolean v)
|
31
|
+
{ }
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public void set(long v)
|
35
|
+
{ }
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public void set(double v)
|
39
|
+
{ }
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public void set(String v)
|
43
|
+
{ }
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void set(Timestamp v)
|
47
|
+
{ }
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public IRubyObject setRubyObject(IRubyObject rubyObject)
|
51
|
+
{
|
52
|
+
return rubyObject.getRuntime().getNil();
|
53
|
+
}
|
54
|
+
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import org.embulk.spi.PageBuilder;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampFormatter;
|
7
|
+
|
8
|
+
public class StringColumnSetter
|
9
|
+
extends AbstractDynamicColumnSetter
|
10
|
+
{
|
11
|
+
private final TimestampFormatter timestampFormatter;
|
12
|
+
|
13
|
+
public StringColumnSetter(PageBuilder pageBuilder, Column column,
|
14
|
+
DefaultValueSetter defaultValue,
|
15
|
+
TimestampFormatter timestampFormatter)
|
16
|
+
{
|
17
|
+
super(pageBuilder, column, defaultValue);
|
18
|
+
this.timestampFormatter = timestampFormatter;
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public void setNull()
|
23
|
+
{
|
24
|
+
pageBuilder.setNull(column);
|
25
|
+
}
|
26
|
+
|
27
|
+
@Override
|
28
|
+
public void set(boolean v)
|
29
|
+
{
|
30
|
+
pageBuilder.setString(column, Boolean.toString(v));
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public void set(long v)
|
35
|
+
{
|
36
|
+
pageBuilder.setString(column, Long.toString(v));
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public void set(double v)
|
41
|
+
{
|
42
|
+
pageBuilder.setString(column, Double.toString(v));
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void set(String v)
|
47
|
+
{
|
48
|
+
pageBuilder.setString(column, v);
|
49
|
+
}
|
50
|
+
|
51
|
+
@Override
|
52
|
+
public void set(Timestamp v)
|
53
|
+
{
|
54
|
+
pageBuilder.setString(column, timestampFormatter.format(v));
|
55
|
+
}
|
56
|
+
}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
package org.embulk.spi.util.dynamic;
|
2
|
+
|
3
|
+
import org.embulk.spi.PageBuilder;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.time.Timestamp;
|
6
|
+
import org.embulk.spi.time.TimestampParser;
|
7
|
+
import org.embulk.spi.time.TimestampParseException;
|
8
|
+
|
9
|
+
public class TimestampColumnSetter
|
10
|
+
extends AbstractDynamicColumnSetter
|
11
|
+
{
|
12
|
+
private final TimestampParser timestampParser;
|
13
|
+
|
14
|
+
public TimestampColumnSetter(PageBuilder pageBuilder, Column column,
|
15
|
+
DefaultValueSetter defaultValue,
|
16
|
+
TimestampParser timestampParser)
|
17
|
+
{
|
18
|
+
super(pageBuilder, column, defaultValue);
|
19
|
+
this.timestampParser = timestampParser;
|
20
|
+
}
|
21
|
+
|
22
|
+
@Override
|
23
|
+
public void setNull()
|
24
|
+
{
|
25
|
+
pageBuilder.setNull(column);
|
26
|
+
}
|
27
|
+
|
28
|
+
@Override
|
29
|
+
public void set(boolean v)
|
30
|
+
{
|
31
|
+
defaultValue.setTimestamp(pageBuilder, column);
|
32
|
+
}
|
33
|
+
|
34
|
+
@Override
|
35
|
+
public void set(long v)
|
36
|
+
{
|
37
|
+
pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(v));
|
38
|
+
}
|
39
|
+
|
40
|
+
@Override
|
41
|
+
public void set(double v)
|
42
|
+
{
|
43
|
+
long sec = (long) v;
|
44
|
+
int nsec = (int) ((v - (double) sec) * 1000000000);
|
45
|
+
pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(sec, nsec));
|
46
|
+
defaultValue.setTimestamp(pageBuilder, column);
|
47
|
+
}
|
48
|
+
|
49
|
+
@Override
|
50
|
+
public void set(String v)
|
51
|
+
{
|
52
|
+
try {
|
53
|
+
pageBuilder.setTimestamp(column, timestampParser.parse(v));
|
54
|
+
} catch (TimestampParseException e) {
|
55
|
+
defaultValue.setTimestamp(pageBuilder, column);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
@Override
|
60
|
+
public void set(Timestamp v)
|
61
|
+
{
|
62
|
+
pageBuilder.setTimestamp(column, v);
|
63
|
+
}
|
64
|
+
}
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
Release 0.6.13
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Plugin API
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Aded spi.util.DynamicPageBuilder utility class. This is a wrapper of PageBuilder which converts types automatically.
|
8
|
+
* ``page_builder`` used by Ruby plugins are now backed by DynamicPageBuilder. This means that Ruby plugins don't need type conversion code such as to_i or to_s any more.
|
9
|
+
|
10
|
+
Built-in plugins
|
11
|
+
------------------
|
12
|
+
|
13
|
+
* ``guess-csv`` plugin guesses ``trim_if_not_quoted`` option. This works well if a CSV file includes integers with extra spaces (such as CSV files exported by Excel).
|
14
|
+
* Fixed a problem where ``guess-csv`` plugin guessed type of a column as "string" if it includes non-empty null_string.
|
15
|
+
|
16
|
+
General Changes
|
17
|
+
------------------
|
18
|
+
|
19
|
+
* "-b" option installs bundler automatically if it is not installed rather shows "no such file to load -- bundler" error message.
|
20
|
+
|
21
|
+
Release Date
|
22
|
+
------------------
|
23
|
+
2015-06-25
|
@@ -80,14 +80,20 @@ public class CsvTokenizer
|
|
80
80
|
return input.nextFile();
|
81
81
|
}
|
82
82
|
|
83
|
+
// used by guess-csv
|
83
84
|
public boolean nextRecord()
|
85
|
+
{
|
86
|
+
return nextRecord(true);
|
87
|
+
}
|
88
|
+
|
89
|
+
public boolean nextRecord(boolean skipEmptyLine)
|
84
90
|
{
|
85
91
|
// If at the end of record, read the next line and initialize the state
|
86
92
|
if (recordState != RecordState.END) {
|
87
93
|
throw new TooManyColumnsException("Too many columns");
|
88
94
|
}
|
89
95
|
|
90
|
-
boolean hasNext = nextLine(
|
96
|
+
boolean hasNext = nextLine(skipEmptyLine);
|
91
97
|
if (hasNext) {
|
92
98
|
recordState = RecordState.NOT_END;
|
93
99
|
return true;
|
@@ -18,7 +18,11 @@ if bundle_path
|
|
18
18
|
Gem.clear_paths # force rubygems to reload GEM_HOME
|
19
19
|
|
20
20
|
ENV['BUNDLE_GEMFILE'] = File.expand_path File.join(bundle_path, "Gemfile")
|
21
|
-
|
21
|
+
begin
|
22
|
+
require 'bundler'
|
23
|
+
rescue LoadError => e
|
24
|
+
raise "#{e}\nBundler is not installed. Did you run \`$ embulk bundle #{bundle_path}\` ?"
|
25
|
+
end
|
22
26
|
Bundler.load.setup_environment
|
23
27
|
require 'bundler/setup'
|
24
28
|
# since here, `require` may load files of different (newer) embulk versions
|
@@ -261,7 +261,19 @@ examples:
|
|
261
261
|
|
262
262
|
ENV['BUNDLE_GEMFILE'] = File.expand_path File.join(path, "Gemfile")
|
263
263
|
Dir.chdir(path) do
|
264
|
-
|
264
|
+
begin
|
265
|
+
require 'bundler'
|
266
|
+
rescue LoadError
|
267
|
+
require 'rubygems/gem_runner'
|
268
|
+
begin
|
269
|
+
puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S.%3N %z")}: installing bundler..."
|
270
|
+
Gem::GemRunner.new.run %w[install bundler]
|
271
|
+
rescue Gem::SystemExitException => e
|
272
|
+
raise e if e.exit_code != 0
|
273
|
+
end
|
274
|
+
Gem.clear_paths
|
275
|
+
require 'bundler'
|
276
|
+
end
|
265
277
|
require 'bundler/friendly_errors'
|
266
278
|
require 'bundler/cli'
|
267
279
|
Bundler.with_friendly_errors do
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -60,12 +60,24 @@ module Embulk
|
|
60
60
|
# don't even set null_string to avoid confusion of null and 'null' in YAML format
|
61
61
|
end
|
62
62
|
|
63
|
-
|
64
|
-
|
63
|
+
# guessing skip_header_lines should be before guessing guess_comment_line_marker
|
64
|
+
# because lines supplied to CsvTokenizer already don't include skipped header lines.
|
65
|
+
# skipping empty lines is also disabled here because skipping header lines is done by
|
66
|
+
# CsvParser which doesn't skip empty lines automatically
|
67
|
+
sample_records = split_lines(parser_guessed, false, sample_lines, delim, {})
|
65
68
|
skip_header_lines = guess_skip_header_lines(sample_records)
|
69
|
+
sample_lines = sample_lines[skip_header_lines..-1]
|
66
70
|
sample_records = sample_records[skip_header_lines..-1]
|
67
71
|
|
68
|
-
|
72
|
+
unless parser_guessed.has_key?("comment_line_marker")
|
73
|
+
comment_line_marker, sample_lines =
|
74
|
+
guess_comment_line_marker(sample_lines, delim, parser_guessed["quote"], parser_guessed["null_string"])
|
75
|
+
if comment_line_marker
|
76
|
+
parser_guessed["comment_line_marker"] = comment_line_marker
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
sample_records = split_lines(parser_guessed, true, sample_lines, delim, {})
|
69
81
|
|
70
82
|
first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
|
71
83
|
other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
|
@@ -75,6 +87,17 @@ module Embulk
|
|
75
87
|
return {}
|
76
88
|
end
|
77
89
|
|
90
|
+
unless parser_guessed.has_key?("trim_if_not_quoted")
|
91
|
+
sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
|
92
|
+
other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
|
93
|
+
if other_types != other_types_trimmed
|
94
|
+
parser_guessed["trim_if_not_quoted"] = true
|
95
|
+
other_types = other_types_trimmed
|
96
|
+
else
|
97
|
+
parser_guessed["trim_if_not_quoted"] = false
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
78
101
|
header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) })
|
79
102
|
|
80
103
|
if header_line
|
@@ -83,10 +106,8 @@ module Embulk
|
|
83
106
|
parser_guessed["skip_header_lines"] = skip_header_lines
|
84
107
|
end
|
85
108
|
|
86
|
-
parser_guessed["
|
87
|
-
|
88
|
-
parser_guessed["allow_extra_columns"] = false
|
89
|
-
parser_guessed["allow_optional_columns"] = false
|
109
|
+
parser_guessed["allow_extra_columns"] = false unless parser_guessed.has_key?("allow_extra_columns")
|
110
|
+
parser_guessed["allow_optional_columns"] = false unless parser_guessed.has_key?("allow_optional_columns")
|
90
111
|
|
91
112
|
if header_line
|
92
113
|
column_names = sample_records.first
|
@@ -110,22 +131,26 @@ module Embulk
|
|
110
131
|
|
111
132
|
private
|
112
133
|
|
113
|
-
def split_lines(parser_config, sample_lines, delim)
|
114
|
-
|
115
|
-
|
134
|
+
def split_lines(parser_config, skip_empty_lines, sample_lines, delim, extra_config)
|
135
|
+
null_string = parser_config["null_string"]
|
136
|
+
config = parser_config.merge(extra_config).merge({"charset" => "UTF-8", "columns" => []})
|
137
|
+
parser_task = config.load_config(org.embulk.standards.CsvParserPlugin::PluginTask)
|
138
|
+
data = sample_lines.map {|line| line.force_encoding('UTF-8') }.join(parser_task.getNewline.getString.encode('UTF-8'))
|
116
139
|
sample = Buffer.from_ruby_string(data)
|
117
140
|
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
|
118
141
|
tokenizer = org.embulk.standards.CsvTokenizer.new(decoder, parser_task)
|
119
142
|
rows = []
|
120
143
|
while tokenizer.nextFile
|
121
|
-
while tokenizer.nextRecord
|
144
|
+
while tokenizer.nextRecord(skip_empty_lines)
|
122
145
|
begin
|
123
146
|
columns = []
|
124
147
|
while true
|
125
148
|
begin
|
126
149
|
column = tokenizer.nextColumn
|
127
150
|
quoted = tokenizer.wasQuotedColumn
|
128
|
-
|
151
|
+
if null_string && !quoted && column == null_string
|
152
|
+
column = nil
|
153
|
+
end
|
129
154
|
columns << column
|
130
155
|
rescue org.embulk.standards.CsvTokenizer::TooFewColumnsException
|
131
156
|
rows << columns
|
@@ -228,20 +253,25 @@ module Embulk
|
|
228
253
|
return 0
|
229
254
|
end
|
230
255
|
|
231
|
-
def guess_comment_line_marker(
|
256
|
+
def guess_comment_line_marker(sample_lines, delim, quote, null_string)
|
257
|
+
exclude = []
|
258
|
+
exclude << /^#{Regexp.escape(quote)}/ if quote && !quote.empty?
|
259
|
+
exclude << /^#{Regexp.escape(null_string)}(?:#{Regexp.escape(delim)}|$)/ if null_string
|
260
|
+
|
232
261
|
guessed = COMMENT_LINE_MARKER_CANDIDATES.map do |str|
|
233
262
|
regexp = /^#{Regexp.quote(str)}/
|
234
|
-
|
235
|
-
|
263
|
+
unmatch_lines = sample_lines.reject do |line|
|
264
|
+
exclude.all? {|ex| line !~ ex } && line =~ regexp
|
236
265
|
end
|
237
|
-
|
238
|
-
[str,
|
239
|
-
end.select {|str,
|
240
|
-
|
241
|
-
|
242
|
-
|
266
|
+
match_count = sample_lines.size - unmatch_lines.size
|
267
|
+
[str, match_count, unmatch_lines]
|
268
|
+
end.select {|str,match_count,unmatch_lines| match_count > 0 }.sort_by {|str,match_count,unmatch_lines| -match_count }
|
269
|
+
|
270
|
+
str, match_count, unmatch_lines = guessed.first
|
271
|
+
if str
|
272
|
+
return str, unmatch_lines
|
243
273
|
else
|
244
|
-
return nil,
|
274
|
+
return nil, sample_lines
|
245
275
|
end
|
246
276
|
end
|
247
277
|
|
data/lib/embulk/java/imports.rb
CHANGED
@@ -21,6 +21,7 @@ module Embulk::Java
|
|
21
21
|
java_import 'org.embulk.spi.TransactionalPageOutput'
|
22
22
|
java_import 'org.embulk.spi.PageReader'
|
23
23
|
java_import 'org.embulk.spi.PageBuilder'
|
24
|
+
java_import 'org.embulk.spi.util.DynamicPageBuilder'
|
24
25
|
java_import 'org.embulk.spi.util.LineDecoder'
|
25
26
|
java_import 'org.embulk.spi.util.ListFileInput'
|
26
27
|
java_import 'org.embulk.spi.Schema'
|
@@ -30,6 +31,7 @@ module Embulk::Java
|
|
30
31
|
java_import 'org.embulk.spi.PluginClassLoader'
|
31
32
|
java_import 'org.embulk.spi.FileInputRunner'
|
32
33
|
java_import 'org.embulk.spi.FileOutputRunner'
|
34
|
+
java_import 'org.embulk.spi.Exec'
|
33
35
|
|
34
36
|
# TODO
|
35
37
|
end
|
data/lib/embulk/page_builder.rb
CHANGED
@@ -1,13 +1,67 @@
|
|
1
1
|
module Embulk
|
2
2
|
|
3
|
+
org.embulk.spi.util.dynamic.AbstractDynamicColumnSetter.module_eval do
|
4
|
+
alias_method(:set, :setRubyObject)
|
5
|
+
end
|
6
|
+
|
3
7
|
class PageBuilder
|
4
8
|
def initialize(schema, java_page_output)
|
5
|
-
|
9
|
+
# TODO get task as an argument
|
10
|
+
task = Java::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
|
11
|
+
@page_builder = Java::DynamicPageBuilder.new(task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
|
6
12
|
@schema = schema
|
7
13
|
end
|
8
14
|
|
9
15
|
def add(record)
|
10
|
-
|
16
|
+
i = 0
|
17
|
+
m = record.size
|
18
|
+
while i < m
|
19
|
+
@page_builder.column(i).set(record[i])
|
20
|
+
i += 1
|
21
|
+
end
|
22
|
+
@page_builder.addRecord
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def [](index_or_column)
|
27
|
+
case index_or_column
|
28
|
+
when Integer
|
29
|
+
@page_builder.column_or_null(index_or_column)
|
30
|
+
when Column
|
31
|
+
@page_builder.column_or_null(index_or_column.index)
|
32
|
+
else
|
33
|
+
@page_builder.column_or_null(index_or_column)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def column(index_or_column)
|
38
|
+
case index_or_column
|
39
|
+
when Integer
|
40
|
+
@page_builder.column(index_or_column)
|
41
|
+
when Column
|
42
|
+
@page_builder.column(index_or_column.index)
|
43
|
+
else
|
44
|
+
@page_builder.lookupColumn(index_or_column)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def column_or_skip(index_or_column)
|
49
|
+
case index_or_column
|
50
|
+
when Integer
|
51
|
+
@page_builder.column_or_skip(index_or_column)
|
52
|
+
when Column
|
53
|
+
@page_builder.column_or_skip(index_or_column.index)
|
54
|
+
else
|
55
|
+
@page_builder.column_or_skip(index_or_column)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def add!
|
60
|
+
@page_builder.add_record
|
61
|
+
end
|
62
|
+
|
63
|
+
def flush
|
64
|
+
@page_builder.flush
|
11
65
|
end
|
12
66
|
|
13
67
|
def finish
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -216,6 +216,10 @@ files:
|
|
216
216
|
- embulk-core/src/main/java/org/embulk/spi/unit/ByteSize.java
|
217
217
|
- embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java
|
218
218
|
- embulk-core/src/main/java/org/embulk/spi/util/Decoders.java
|
219
|
+
- embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnNotFoundException.java
|
220
|
+
- embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetter.java
|
221
|
+
- embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java
|
222
|
+
- embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java
|
219
223
|
- embulk-core/src/main/java/org/embulk/spi/util/Encoders.java
|
220
224
|
- embulk-core/src/main/java/org/embulk/spi/util/Executors.java
|
221
225
|
- embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java
|
@@ -232,6 +236,15 @@ files:
|
|
232
236
|
- embulk-core/src/main/java/org/embulk/spi/util/Pages.java
|
233
237
|
- embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java
|
234
238
|
- embulk-core/src/main/java/org/embulk/spi/util/RetryExecutor.java
|
239
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java
|
240
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java
|
241
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java
|
242
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java
|
243
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java
|
244
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java
|
245
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java
|
246
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java
|
247
|
+
- embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java
|
235
248
|
- embulk-core/src/main/resources/embulk/logback-color.xml
|
236
249
|
- embulk-core/src/main/resources/embulk/logback-console.xml
|
237
250
|
- embulk-core/src/main/resources/embulk/logback-file.xml
|
@@ -301,6 +314,7 @@ files:
|
|
301
314
|
- embulk-docs/src/release/release-0.6.10.rst
|
302
315
|
- embulk-docs/src/release/release-0.6.11.rst
|
303
316
|
- embulk-docs/src/release/release-0.6.12.rst
|
317
|
+
- embulk-docs/src/release/release-0.6.13.rst
|
304
318
|
- embulk-docs/src/release/release-0.6.2.rst
|
305
319
|
- embulk-docs/src/release/release-0.6.3.rst
|
306
320
|
- embulk-docs/src/release/release-0.6.4.rst
|
@@ -417,8 +431,8 @@ files:
|
|
417
431
|
- classpath/bval-jsr303-0.5.jar
|
418
432
|
- classpath/commons-beanutils-core-1.8.3.jar
|
419
433
|
- classpath/commons-lang3-3.1.jar
|
420
|
-
- classpath/embulk-core-0.6.
|
421
|
-
- classpath/embulk-standards-0.6.
|
434
|
+
- classpath/embulk-core-0.6.13.jar
|
435
|
+
- classpath/embulk-standards-0.6.13.jar
|
422
436
|
- classpath/guava-18.0.jar
|
423
437
|
- classpath/guice-4.0.jar
|
424
438
|
- classpath/guice-multibindings-4.0.jar
|