embulk 0.6.11 → 0.6.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +1 -1
- data/build.gradle +1 -1
- data/embulk-core/build.gradle +2 -2
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +3 -2
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +26 -12
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +73 -57
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +4 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +1 -1
- data/embulk-core/src/main/resources/embulk/logback-color.xml +72 -0
- data/embulk-core/src/main/resources/embulk/logback-console.xml +14 -0
- data/embulk-core/src/main/resources/embulk/logback-file.xml +25 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.12.rst +31 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +2 -1
- data/lib/embulk.rb +2 -1
- data/lib/embulk/command/embulk_new_plugin.rb +1 -1
- data/lib/embulk/command/embulk_run.rb +4 -4
- data/lib/embulk/data_source.rb +134 -0
- data/lib/embulk/guess/csv.rb +1 -1
- data/lib/embulk/java/time_helper.rb +1 -1
- data/lib/embulk/logger.rb +152 -0
- data/lib/embulk/plugin_registry.rb +2 -2
- data/lib/embulk/version.rb +1 -1
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c73174238cf469f8b78f7e7225d35e2ae9c5d0c0
|
4
|
+
data.tar.gz: c3cb6c769f05c1ed0d636b84232018e97a0a955b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c17333b98eb2d249a909d4b7603718090a8d0cd4fc8bb087b7c4c3ad712cf05c7e2db4c51bd0947004fc40d0f0052e3eeac33bb569116e16031d5a0aa256809
|
7
|
+
data.tar.gz: c439b2981678bcb6698173e535f63baa72411f78e1099b89888a25bd96c758f8d9947622b78cc2506b9369d1086da9598a59a462fa5d15ce6c5569fd06602dca
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -126,7 +126,7 @@ If the transaction fails, embulk stores state some states to the yaml file. You
|
|
126
126
|
embulk run config.yml -r resume-state.yml
|
127
127
|
```
|
128
128
|
|
129
|
-
If you
|
129
|
+
If you give up on resuming the transaction, you can use `embulk cleanup` subcommand to delete intermediate data:
|
130
130
|
|
131
131
|
```
|
132
132
|
embulk cleanup config.yml -r resume-state.yml
|
data/build.gradle
CHANGED
data/embulk-core/build.gradle
CHANGED
@@ -21,9 +21,8 @@ dependencies {
|
|
21
21
|
compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.3'
|
22
22
|
compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.3'
|
23
23
|
compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.3'
|
24
|
-
compile '
|
24
|
+
compile 'ch.qos.logback:logback-classic:1.1.3'
|
25
25
|
compile 'org.slf4j:slf4j-api:1.7.10'
|
26
|
-
compile 'org.slf4j:slf4j-log4j12:1.7.10'
|
27
26
|
compile 'org.jruby:jruby-complete:' + project.jrubyVersion
|
28
27
|
compile 'com.google.code.findbugs:annotations:3.0.0'
|
29
28
|
compile 'org.yaml:snakeyaml:1.14'
|
@@ -32,6 +31,7 @@ dependencies {
|
|
32
31
|
compile 'io.airlift:slice:0.9'
|
33
32
|
compile 'joda-time:joda-time:2.7'
|
34
33
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
34
|
+
compile 'org.fusesource.jansi:jansi:1.11'
|
35
35
|
|
36
36
|
// for embulk/guess/charset.rb
|
37
37
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
@@ -139,7 +139,7 @@ public class GuessExecutor
|
|
139
139
|
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
|
140
140
|
ConfigSource guessInputConfig = originalConfig.deepCopy();
|
141
141
|
guessInputConfig.getNestedOrSetEmpty("parser")
|
142
|
-
.set("type", "system_guess") // override in.parser.type so that
|
142
|
+
.set("type", "system_guess") // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
|
143
143
|
.set("guess_plugins", guessPlugins)
|
144
144
|
.set("orig_config", originalConfig);
|
145
145
|
|
@@ -153,7 +153,6 @@ public class GuessExecutor
|
|
153
153
|
if (taskCount == 0) {
|
154
154
|
throw new NoSampleException("No input files to guess");
|
155
155
|
}
|
156
|
-
// TODO repeat runwith taskIndex++ if NoSampleException happens
|
157
156
|
input.run(inputTaskSource, null, 0, new PageOutput() {
|
158
157
|
@Override
|
159
158
|
public void add(Page page)
|
@@ -170,6 +169,7 @@ public class GuessExecutor
|
|
170
169
|
throw new AssertionError("Guess executor must throw GuessedNoticeError");
|
171
170
|
}
|
172
171
|
});
|
172
|
+
|
173
173
|
throw new AssertionError("Guess executor must throw GuessedNoticeError");
|
174
174
|
|
175
175
|
} catch (GuessedNoticeError error) {
|
@@ -319,6 +319,7 @@ public class GuessExecutor
|
|
319
319
|
|
320
320
|
private static Buffer getFirstBuffer(FileInput input)
|
321
321
|
{
|
322
|
+
// The first buffer is created by SamplingParserPlugin. See FileInputRunner.guess.
|
322
323
|
RuntimeException decodeException = null;
|
323
324
|
try {
|
324
325
|
while (input.nextFile()) {
|
@@ -3,7 +3,11 @@ package org.embulk.exec;
|
|
3
3
|
import java.util.Properties;
|
4
4
|
import org.slf4j.ILoggerFactory;
|
5
5
|
import org.slf4j.LoggerFactory;
|
6
|
-
import
|
6
|
+
import ch.qos.logback.classic.Level;
|
7
|
+
import ch.qos.logback.classic.Logger;
|
8
|
+
import ch.qos.logback.classic.LoggerContext;
|
9
|
+
import ch.qos.logback.classic.joran.JoranConfigurator;
|
10
|
+
import ch.qos.logback.core.joran.spi.JoranException;
|
7
11
|
import com.google.inject.Inject;
|
8
12
|
import com.google.inject.Provider;
|
9
13
|
import org.embulk.config.ConfigSource;
|
@@ -14,13 +18,9 @@ public class LoggerProvider
|
|
14
18
|
@Inject
|
15
19
|
public LoggerProvider(@ForSystemConfig ConfigSource systemConfig)
|
16
20
|
{
|
17
|
-
// TODO system config
|
18
|
-
Properties prop = new Properties();
|
19
|
-
|
20
21
|
final String level;
|
21
22
|
String logLevel = systemConfig.get(String.class, "log_level", "info"); // here can't use loadConfig because ModelManager uses LoggerProvider
|
22
23
|
switch (logLevel) {
|
23
|
-
case "fatal": level = "FATAL"; break;
|
24
24
|
case "error": level = "ERROR"; break;
|
25
25
|
case "warn": level = "WARN"; break;
|
26
26
|
case "info": level = "INFO"; break;
|
@@ -28,16 +28,30 @@ public class LoggerProvider
|
|
28
28
|
case "trace": level = "TRACE"; break;
|
29
29
|
default:
|
30
30
|
throw new IllegalArgumentException(String.format(
|
31
|
-
"System property embulk.logLevel=%s is invalid. Available levels are
|
31
|
+
"System property embulk.logLevel=%s is invalid. Available levels are error, warn, info, debug and trace.", logLevel));
|
32
32
|
}
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
34
|
+
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
|
35
|
+
JoranConfigurator configurator = new JoranConfigurator();
|
36
|
+
configurator.setContext(context);
|
37
|
+
context.reset();
|
38
38
|
|
39
|
-
|
40
|
-
|
39
|
+
String name;
|
40
|
+
if (System.console() != null) {
|
41
|
+
name = "/embulk/logback-color.xml";
|
42
|
+
} else {
|
43
|
+
name = "/embulk/logback-console.xml";
|
44
|
+
}
|
45
|
+
try {
|
46
|
+
configurator.doConfigure(getClass().getResource(name));
|
47
|
+
} catch (JoranException ex) {
|
48
|
+
throw new RuntimeException(ex);
|
49
|
+
}
|
50
|
+
|
51
|
+
org.slf4j.Logger logger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME);
|
52
|
+
if (logger instanceof Logger) {
|
53
|
+
((Logger) logger).setLevel(Level.toLevel(level.toUpperCase(), Level.DEBUG));
|
54
|
+
}
|
41
55
|
}
|
42
56
|
|
43
57
|
public ILoggerFactory get()
|
@@ -2,6 +2,7 @@ package org.embulk.exec;
|
|
2
2
|
|
3
3
|
import java.util.List;
|
4
4
|
import com.google.inject.Inject;
|
5
|
+
import com.google.common.base.Preconditions;
|
5
6
|
import org.embulk.config.TaskSource;
|
6
7
|
import org.embulk.config.ConfigSource;
|
7
8
|
import org.embulk.config.CommitReport;
|
@@ -24,31 +25,9 @@ import static org.embulk.spi.util.Inputs.each;
|
|
24
25
|
public class SamplingParserPlugin
|
25
26
|
implements ParserPlugin
|
26
27
|
{
|
27
|
-
private final int maxSampleSize;
|
28
|
-
|
29
|
-
@Inject
|
30
|
-
public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
|
31
|
-
{
|
32
|
-
this.maxSampleSize = 32*1024; // TODO get sample syze from system config
|
33
|
-
}
|
34
|
-
|
35
|
-
@Override
|
36
|
-
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
37
|
-
{
|
38
|
-
control.run(Exec.newTaskSource(), null);
|
39
|
-
}
|
40
|
-
|
41
|
-
@Override
|
42
|
-
public void run(TaskSource taskSource, Schema schema,
|
43
|
-
FileInput input, PageOutput output)
|
44
|
-
{
|
45
|
-
Buffer buffer = getSample(input, maxSampleSize);
|
46
|
-
throw new SampledNoticeError(buffer);
|
47
|
-
}
|
48
|
-
|
49
28
|
public static Buffer runFileInputSampling(final FileInputRunner runner, ConfigSource inputConfig)
|
50
29
|
{
|
51
|
-
// override in.parser.type so that FileInputRunner creates
|
30
|
+
// override in.parser.type so that FileInputRunner creates SamplingParserPlugin
|
52
31
|
ConfigSource samplingInputConfig = inputConfig.deepCopy();
|
53
32
|
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
|
54
33
|
samplingInputConfig.set("decoders", null);
|
@@ -60,19 +39,24 @@ public class SamplingParserPlugin
|
|
60
39
|
if (taskCount == 0) {
|
61
40
|
throw new NoSampleException("No input files to read sample data");
|
62
41
|
}
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
42
|
+
for (int taskIndex=0; taskIndex < taskCount; taskIndex++) {
|
43
|
+
try {
|
44
|
+
runner.run(taskSource, schema, taskIndex, new PageOutput() {
|
45
|
+
@Override
|
46
|
+
public void add(Page page)
|
47
|
+
{
|
48
|
+
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
|
49
|
+
}
|
70
50
|
|
71
|
-
|
51
|
+
public void finish() { }
|
72
52
|
|
73
|
-
|
74
|
-
|
75
|
-
|
53
|
+
public void close() { }
|
54
|
+
});
|
55
|
+
} catch (NotEnoughSampleError ex) {
|
56
|
+
continue;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
throw new NoSampleException("All input files are smaller than minimum sampling size"); // TODO include minSampleSize in message
|
76
60
|
}
|
77
61
|
});
|
78
62
|
throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
|
@@ -81,29 +65,6 @@ public class SamplingParserPlugin
|
|
81
65
|
}
|
82
66
|
}
|
83
67
|
|
84
|
-
private static Buffer getSample(FileInput fileInput, int maxSampleSize)
|
85
|
-
{
|
86
|
-
if (!fileInput.nextFile()) {
|
87
|
-
// no input files
|
88
|
-
return Buffer.EMPTY;
|
89
|
-
}
|
90
|
-
|
91
|
-
Buffer sample = Buffer.allocate(maxSampleSize);
|
92
|
-
int sampleSize = 0;
|
93
|
-
|
94
|
-
for (Buffer buffer : each(fileInput)) {
|
95
|
-
int size = Math.min(buffer.limit(), sample.capacity() - sampleSize);
|
96
|
-
sample.setBytes(sampleSize, buffer, 0, size);
|
97
|
-
sampleSize += size;
|
98
|
-
buffer.release();
|
99
|
-
if (sampleSize >= maxSampleSize) {
|
100
|
-
break;
|
101
|
-
}
|
102
|
-
}
|
103
|
-
sample.limit(sampleSize);
|
104
|
-
return sample;
|
105
|
-
}
|
106
|
-
|
107
68
|
public static class SampledNoticeError
|
108
69
|
extends Error
|
109
70
|
{
|
@@ -119,4 +80,59 @@ public class SamplingParserPlugin
|
|
119
80
|
return sample;
|
120
81
|
}
|
121
82
|
}
|
83
|
+
|
84
|
+
public static class NotEnoughSampleError
|
85
|
+
extends Error
|
86
|
+
{ }
|
87
|
+
|
88
|
+
private final int minSampleSize;
|
89
|
+
private final int sampleSize;
|
90
|
+
|
91
|
+
@Inject
|
92
|
+
public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
|
93
|
+
{
|
94
|
+
this.minSampleSize = 40; // empty gzip file is 33 bytes. // TODO get sample size from system config
|
95
|
+
this.sampleSize = 32*1024; // TODO get sample size from system config
|
96
|
+
Preconditions.checkArgument(minSampleSize < sampleSize, "minSampleSize must be smaller than sampleSize");
|
97
|
+
}
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
101
|
+
{
|
102
|
+
control.run(Exec.newTaskSource(), null);
|
103
|
+
}
|
104
|
+
|
105
|
+
@Override
|
106
|
+
public void run(TaskSource taskSource, Schema schema,
|
107
|
+
FileInput input, PageOutput output)
|
108
|
+
{
|
109
|
+
Buffer buffer = readSample(input, sampleSize);
|
110
|
+
if (buffer.limit() < minSampleSize) {
|
111
|
+
throw new NotEnoughSampleError();
|
112
|
+
}
|
113
|
+
throw new SampledNoticeError(buffer);
|
114
|
+
}
|
115
|
+
|
116
|
+
private static Buffer readSample(FileInput fileInput, int sampleSize)
|
117
|
+
{
|
118
|
+
if (!fileInput.nextFile()) {
|
119
|
+
// no input files
|
120
|
+
return Buffer.EMPTY;
|
121
|
+
}
|
122
|
+
|
123
|
+
Buffer sample = Buffer.allocate(sampleSize);
|
124
|
+
int offset = 0;
|
125
|
+
|
126
|
+
for (Buffer buffer : each(fileInput)) {
|
127
|
+
int size = Math.min(buffer.limit(), sample.capacity() - offset);
|
128
|
+
sample.setBytes(offset, buffer, 0, size);
|
129
|
+
offset += size;
|
130
|
+
buffer.release();
|
131
|
+
if (offset >= sampleSize) {
|
132
|
+
break;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
sample.limit(offset);
|
136
|
+
return sample;
|
137
|
+
}
|
122
138
|
}
|
@@ -4,6 +4,7 @@ import java.util.List;
|
|
4
4
|
import java.util.ArrayList;
|
5
5
|
import java.util.Set;
|
6
6
|
import java.io.File;
|
7
|
+
import org.slf4j.ILoggerFactory;
|
7
8
|
import com.google.common.collect.ImmutableSet;
|
8
9
|
import com.google.inject.Module;
|
9
10
|
import com.google.inject.Binder;
|
@@ -98,6 +99,14 @@ public class JRubyScriptingModule
|
|
98
99
|
// load embulk.rb
|
99
100
|
jruby.runScriptlet("require 'embulk'");
|
100
101
|
|
102
|
+
// initialize logger
|
103
|
+
jruby.callMethod(
|
104
|
+
jruby.runScriptlet("Embulk"),
|
105
|
+
"logger=",
|
106
|
+
jruby.callMethod(
|
107
|
+
jruby.runScriptlet("Embulk::Logger"),
|
108
|
+
"new", injector.getInstance(ILoggerFactory.class).getLogger("ruby")));
|
109
|
+
|
101
110
|
return jruby;
|
102
111
|
}
|
103
112
|
|
@@ -0,0 +1,72 @@
|
|
1
|
+
<configuration>
|
2
|
+
<appender name="console-error" class="ch.qos.logback.core.ConsoleAppender">
|
3
|
+
<filter class="ch.qos.logback.classic.filter.LevelFilter">
|
4
|
+
<level>ERROR</level>
|
5
|
+
<onMatch>ACCEPT</onMatch>
|
6
|
+
<onMismatch>DENY</onMismatch>
|
7
|
+
</filter>
|
8
|
+
<withJansi>true</withJansi>
|
9
|
+
<encoder>
|
10
|
+
<pattern>%magenta(%d{yyyy-MM-dd HH:mm:ss.SSS Z} [%level] (%thread\): %m%n)</pattern>
|
11
|
+
</encoder>
|
12
|
+
</appender>
|
13
|
+
|
14
|
+
<appender name="console-warn" class="ch.qos.logback.core.ConsoleAppender">
|
15
|
+
<filter class="ch.qos.logback.classic.filter.LevelFilter">
|
16
|
+
<level>WARN</level>
|
17
|
+
<onMatch>ACCEPT</onMatch>
|
18
|
+
<onMismatch>DENY</onMismatch>
|
19
|
+
</filter>
|
20
|
+
<withJansi>true</withJansi>
|
21
|
+
<encoder>
|
22
|
+
<pattern>%yellow(%d{yyyy-MM-dd HH:mm:ss.SSS Z} [%level] (%thread\): %m%n)</pattern>
|
23
|
+
</encoder>
|
24
|
+
</appender>
|
25
|
+
|
26
|
+
<appender name="console-info" class="ch.qos.logback.core.ConsoleAppender">
|
27
|
+
<filter class="ch.qos.logback.classic.filter.LevelFilter">
|
28
|
+
<level>INFO</level>
|
29
|
+
<onMatch>ACCEPT</onMatch>
|
30
|
+
<onMismatch>DENY</onMismatch>
|
31
|
+
</filter>
|
32
|
+
<withJansi>true</withJansi>
|
33
|
+
<encoder>
|
34
|
+
<pattern>%green(%d{yyyy-MM-dd HH:mm:ss.SSS Z} [%level] (%thread\): %m%n)</pattern>
|
35
|
+
</encoder>
|
36
|
+
</appender>
|
37
|
+
|
38
|
+
<appender name="console-debug" class="ch.qos.logback.core.ConsoleAppender">
|
39
|
+
<filter class="ch.qos.logback.classic.filter.LevelFilter">
|
40
|
+
<level>DEBUG</level>
|
41
|
+
<onMatch>ACCEPT</onMatch>
|
42
|
+
<onMismatch>DENY</onMismatch>
|
43
|
+
</filter>
|
44
|
+
<withJansi>true</withJansi>
|
45
|
+
<encoder>
|
46
|
+
<pattern>%white(%d{yyyy-MM-dd HH:mm:ss.SSS Z} [%level] (%thread\): %m%n)</pattern>
|
47
|
+
</encoder>
|
48
|
+
</appender>
|
49
|
+
|
50
|
+
<appender name="console-trace" class="ch.qos.logback.core.ConsoleAppender">
|
51
|
+
<filter class="ch.qos.logback.classic.filter.LevelFilter">
|
52
|
+
<level>TRACE</level>
|
53
|
+
<onMatch>ACCEPT</onMatch>
|
54
|
+
<onMismatch>DENY</onMismatch>
|
55
|
+
</filter>
|
56
|
+
<withJansi>true</withJansi>
|
57
|
+
<encoder>
|
58
|
+
<pattern>%blue(%d{yyyy-MM-dd HH:mm:ss.SSS Z} [%level] (%thread\): %m%n)</pattern>
|
59
|
+
</encoder>
|
60
|
+
</appender>
|
61
|
+
|
62
|
+
<logger name="io.netty.util" level="INFO"/>
|
63
|
+
<logger name="io.netty.buffer" level="INFO"/>
|
64
|
+
|
65
|
+
<root>
|
66
|
+
<appender-ref ref="console-error"/>
|
67
|
+
<appender-ref ref="console-warn"/>
|
68
|
+
<appender-ref ref="console-info"/>
|
69
|
+
<appender-ref ref="console-debug"/>
|
70
|
+
<appender-ref ref="console-trace"/>
|
71
|
+
</root>
|
72
|
+
</configuration>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<configuration>
|
2
|
+
<appender name="console" class="ch.qos.logback.core.ConsoleAppender">
|
3
|
+
<encoder>
|
4
|
+
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS Z} [%level] (%thread\): %m%n</pattern>
|
5
|
+
</encoder>
|
6
|
+
</appender>
|
7
|
+
|
8
|
+
<logger name="io.netty.util" level="INFO"/>
|
9
|
+
<logger name="io.netty.buffer" level="INFO"/>
|
10
|
+
|
11
|
+
<root>
|
12
|
+
<appender-ref ref="console"/>
|
13
|
+
</root>
|
14
|
+
</configuration>
|
@@ -0,0 +1,25 @@
|
|
1
|
+
<configuration>
|
2
|
+
<property name="embulk.logFile" value="embulk.log" />
|
3
|
+
|
4
|
+
<appender name="file" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
5
|
+
<file>${embulk.logFile}</file>
|
6
|
+
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
|
7
|
+
<maxFileSize>5kB</maxFileSize>
|
8
|
+
</triggeringPolicy>
|
9
|
+
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
|
10
|
+
<fileNamePattern>${embulk.logFile}.%i</fileNamePattern>
|
11
|
+
<minIndex>1</minIndex>
|
12
|
+
<maxIndex>5</maxIndex>
|
13
|
+
</rollingPolicy>
|
14
|
+
<encoder>
|
15
|
+
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS Z} [%level] (%thread\): %m%n</pattern>
|
16
|
+
</encoder>
|
17
|
+
</appender>
|
18
|
+
|
19
|
+
<logger name="io.netty.util" level="INFO"/>
|
20
|
+
<logger name="io.netty.buffer" level="INFO"/>
|
21
|
+
|
22
|
+
<root>
|
23
|
+
<appender-ref ref="file"/>
|
24
|
+
</root>
|
25
|
+
</configuration>
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
Release 0.6.12
|
2
|
+
==================================
|
3
|
+
|
4
|
+
Plugin API
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Plugins can use both ``config[:key]`` (``task[:key]``) and ``config['key']`` (``task['key']``).
|
8
|
+
* Added ``Embulk.logger`` for Ruby plugins
|
9
|
+
|
10
|
+
|
11
|
+
Built-in plugins
|
12
|
+
------------------
|
13
|
+
|
14
|
+
* ``guess-csv`` plugin does not raise exceptions when a file has only 1 line
|
15
|
+
* ``parser-csv`` shows number of lines using 1-origin indexing rather than 0-origin
|
16
|
+
|
17
|
+
|
18
|
+
General Changes
|
19
|
+
------------------
|
20
|
+
|
21
|
+
* Guessing skips files smaller than 40 bytes so that guessing works when there is an empty file at the beginning.
|
22
|
+
* Uses ANSI color for log messages
|
23
|
+
* Uses logback instead of log4j for logging backend
|
24
|
+
* Added ``spi.Exec.isPreview()`` as an alias of ``spi.Exec.session().isPreview()``
|
25
|
+
* Plugin template for Ruby uses Xxx instead of XxxInputPlugin for the class name so that class name matches with file name.
|
26
|
+
* When parsing timestamp fails, exception message includes the original text (@yyamano++)
|
27
|
+
|
28
|
+
|
29
|
+
Release Date
|
30
|
+
------------------
|
31
|
+
2015-06-22
|
data/lib/embulk.rb
CHANGED
@@ -26,7 +26,7 @@ module Embulk
|
|
26
26
|
email = `git config user.email`.strip
|
27
27
|
email = "YOUR_NAME" if email.empty?
|
28
28
|
|
29
|
-
ruby_class_name = name.split('-').map {|a| a.capitalize }.join
|
29
|
+
ruby_class_name = name.split('-').map {|a| a.capitalize }.join
|
30
30
|
java_iface = category.to_s.split('_').map {|a| a.capitalize }.join
|
31
31
|
java_class_name = name.split('-').map {|a| a.capitalize }.join + java_iface + "Plugin"
|
32
32
|
display_name = name.split('-').map {|a| a.capitalize }.join(' ')
|
@@ -68,7 +68,7 @@ module Embulk
|
|
68
68
|
|
69
69
|
when :run
|
70
70
|
op.banner = "Usage: run <config.yml>"
|
71
|
-
op.on('-l', '--log-level LEVEL', 'Log level (
|
71
|
+
op.on('-l', '--log-level LEVEL', 'Log level (error, warn, info, debug or trace)') do |level|
|
72
72
|
options[:logLevel] = level
|
73
73
|
end
|
74
74
|
op.on('-L', '--load PATH', 'Add a local plugin path') do |plugin_path|
|
@@ -90,7 +90,7 @@ module Embulk
|
|
90
90
|
|
91
91
|
when :cleanup
|
92
92
|
op.banner = "Usage: cleanup <config.yml>"
|
93
|
-
op.on('-l', '--log-level LEVEL', 'Log level (
|
93
|
+
op.on('-l', '--log-level LEVEL', 'Log level (error, warn, info, debug or trace)') do |level|
|
94
94
|
options[:logLevel] = level
|
95
95
|
end
|
96
96
|
op.on('-L', '--load PATH', 'Add a local plugin path') do |plugin_path|
|
@@ -109,7 +109,7 @@ module Embulk
|
|
109
109
|
|
110
110
|
when :preview
|
111
111
|
op.banner = "Usage: preview <config.yml>"
|
112
|
-
op.on('-l', '--log-level LEVEL', 'Log level (
|
112
|
+
op.on('-l', '--log-level LEVEL', 'Log level (error, warn, info, debug or trace)') do |level|
|
113
113
|
options[:logLevel] = level
|
114
114
|
end
|
115
115
|
op.on('-L', '--load PATH', 'Add a local plugin path') do |plugin_path|
|
@@ -128,7 +128,7 @@ module Embulk
|
|
128
128
|
|
129
129
|
when :guess
|
130
130
|
op.banner = "Usage: guess <partial-config.yml>"
|
131
|
-
op.on('-l', '--log-level LEVEL', 'Log level (
|
131
|
+
op.on('-l', '--log-level LEVEL', 'Log level (error, warn, info, debug or trace)') do |level|
|
132
132
|
options[:logLevel] = level
|
133
133
|
end
|
134
134
|
op.on('-o', '--output PATH', 'Path to a file to write the guessed configuration') do |path|
|
data/lib/embulk/data_source.rb
CHANGED
@@ -1,7 +1,141 @@
|
|
1
1
|
module Embulk
|
2
2
|
require 'json'
|
3
3
|
|
4
|
+
module Impl
|
5
|
+
# copied from https://github.com/intridea/hashie/blob/da232547c29673a0d7a79c7bf2670f1ea76813ed/lib/hashie/extensions/indifferent_access.rb
|
6
|
+
module IndifferentAccess
|
7
|
+
def self.included(base)
|
8
|
+
#Hashie::Extensions::Dash::IndifferentAccess::ClassMethods.tap do |extension|
|
9
|
+
# base.extend(extension) if base <= Hashie::Dash && !base.singleton_class.included_modules.include?(extension)
|
10
|
+
#end
|
11
|
+
|
12
|
+
base.class_eval do
|
13
|
+
alias_method :regular_writer, :[]= unless method_defined?(:regular_writer)
|
14
|
+
alias_method :[]=, :indifferent_writer
|
15
|
+
alias_method :store, :indifferent_writer
|
16
|
+
%w(default update replace fetch delete key? values_at).each do |m|
|
17
|
+
alias_method "regular_#{m}", m unless method_defined?("regular_#{m}")
|
18
|
+
alias_method m, "indifferent_#{m}"
|
19
|
+
end
|
20
|
+
|
21
|
+
%w(include? member? has_key?).each do |key_alias|
|
22
|
+
alias_method key_alias, :indifferent_key?
|
23
|
+
end
|
24
|
+
|
25
|
+
class << self
|
26
|
+
def [](*)
|
27
|
+
super.convert!
|
28
|
+
end
|
29
|
+
|
30
|
+
def try_convert(*)
|
31
|
+
(hash = super) && self[hash]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.inject!(hash)
|
38
|
+
(class << hash; self; end).send :include, IndifferentAccess
|
39
|
+
hash.convert!
|
40
|
+
end
|
41
|
+
|
42
|
+
# Injects indifferent access into a duplicate of the hash
|
43
|
+
# provided. See #inject!
|
44
|
+
def self.inject(hash)
|
45
|
+
inject!(hash.dup)
|
46
|
+
end
|
47
|
+
|
48
|
+
def convert_key(key)
|
49
|
+
key.to_s
|
50
|
+
end
|
51
|
+
|
52
|
+
def convert!
|
53
|
+
keys.each do |k|
|
54
|
+
regular_writer convert_key(k), indifferent_value(regular_delete(k))
|
55
|
+
end
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
59
|
+
def indifferent_value(value)
|
60
|
+
if hash_lacking_indifference?(value)
|
61
|
+
IndifferentAccess.inject!(value)
|
62
|
+
elsif value.is_a?(::Array)
|
63
|
+
value.replace(value.map { |e| indifferent_value(e) })
|
64
|
+
else
|
65
|
+
value
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def indifferent_default(key = nil)
|
70
|
+
return self[convert_key(key)] if key?(key)
|
71
|
+
regular_default(key)
|
72
|
+
end
|
73
|
+
|
74
|
+
def indifferent_update(other_hash)
|
75
|
+
return regular_update(other_hash) if hash_with_indifference?(other_hash)
|
76
|
+
other_hash.each_pair do |k, v|
|
77
|
+
self[k] = v
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def indifferent_writer(key, value)
|
82
|
+
regular_writer convert_key(key), indifferent_value(value)
|
83
|
+
end
|
84
|
+
|
85
|
+
def indifferent_fetch(key, *args, &block)
|
86
|
+
regular_fetch convert_key(key), *args, &block
|
87
|
+
end
|
88
|
+
|
89
|
+
def indifferent_delete(key)
|
90
|
+
regular_delete convert_key(key)
|
91
|
+
end
|
92
|
+
|
93
|
+
def indifferent_key?(key)
|
94
|
+
regular_key? convert_key(key)
|
95
|
+
end
|
96
|
+
|
97
|
+
def indifferent_values_at(*indices)
|
98
|
+
indices.map { |i| self[i] }
|
99
|
+
end
|
100
|
+
|
101
|
+
def indifferent_access?
|
102
|
+
true
|
103
|
+
end
|
104
|
+
|
105
|
+
def indifferent_replace(other_hash)
|
106
|
+
(keys - other_hash.keys).each { |key| delete(key) }
|
107
|
+
other_hash.each { |key, value| self[key] = value }
|
108
|
+
self
|
109
|
+
end
|
110
|
+
|
111
|
+
protected
|
112
|
+
|
113
|
+
def hash_lacking_indifference?(other)
|
114
|
+
other.is_a?(::Hash) &&
|
115
|
+
!(other.respond_to?(:indifferent_access?) &&
|
116
|
+
other.indifferent_access?)
|
117
|
+
end
|
118
|
+
|
119
|
+
def hash_with_indifference?(other)
|
120
|
+
other.is_a?(::Hash) &&
|
121
|
+
other.respond_to?(:indifferent_access?) &&
|
122
|
+
other.indifferent_access?
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
4
127
|
class DataSource < Hash
|
128
|
+
include Impl::IndifferentAccess
|
129
|
+
|
130
|
+
def initialize(hash={}, default=nil, &block)
|
131
|
+
if default.nil?
|
132
|
+
super(&block)
|
133
|
+
else
|
134
|
+
super(default)
|
135
|
+
end
|
136
|
+
hash.each {|key,value| self[key] = value }
|
137
|
+
end
|
138
|
+
|
5
139
|
def param(key, type, options={})
|
6
140
|
if self.has_key?(key)
|
7
141
|
v = self[key]
|
data/lib/embulk/guess/csv.rb
CHANGED
@@ -68,7 +68,7 @@ module Embulk
|
|
68
68
|
comment_line_marker, sample_records = guess_comment_line_marker(sample_records)
|
69
69
|
|
70
70
|
first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
|
71
|
-
other_types = SchemaGuess.types_from_array_records(sample_records[1..-1])
|
71
|
+
other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
|
72
72
|
|
73
73
|
if first_types.size <= 1 || other_types.size <= 1
|
74
74
|
# guess failed
|
@@ -26,7 +26,7 @@ module Embulk
|
|
26
26
|
def strptimeUsec(text)
|
27
27
|
hash = Date._strptime(text, @format_string)
|
28
28
|
unless hash
|
29
|
-
raise Java::TimestampParseException.new
|
29
|
+
raise Java::TimestampParseException.new("Failed to parse '" + text + "'")
|
30
30
|
end
|
31
31
|
|
32
32
|
if seconds = hash[:seconds]
|
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
module Embulk
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
class Logger
|
6
|
+
def initialize(*args)
|
7
|
+
if args.length == 1
|
8
|
+
a = args[0]
|
9
|
+
if a.is_a?(Adapter)
|
10
|
+
@logger = a
|
11
|
+
elsif a.is_a?(::Logger)
|
12
|
+
@logger = StandardLoggerAdapter.new(a)
|
13
|
+
elsif Embulk.java? && (org.slf4j.Logger rescue nil) && a.is_a?(org.slf4j.Logger)
|
14
|
+
@logger = Slf4jAdapter.new(a)
|
15
|
+
else
|
16
|
+
@logger = StandardLoggerAdapter.new(*args)
|
17
|
+
end
|
18
|
+
else
|
19
|
+
@logger = StandardLoggerAdapter.new(*args)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
module Adapter
|
24
|
+
end
|
25
|
+
|
26
|
+
def error(message=nil, &block) @logger.error(message, &block) end
|
27
|
+
def warn(message=nil, &block) @logger.warn(message, &block) end
|
28
|
+
def info(message=nil, &block) @logger.info(message, &block) end
|
29
|
+
def debug(message=nil, &block) @logger.debug(message, &block) end
|
30
|
+
def trace(message=nil, &block) @logger.trace(message, &block) end
|
31
|
+
|
32
|
+
def error?() @logger.error? end
|
33
|
+
def warn?() @logger.warn? end
|
34
|
+
def info?() @logger.info? end
|
35
|
+
def debug?() @logger.debug? end
|
36
|
+
def trace?() @logger.trace? end
|
37
|
+
end
|
38
|
+
|
39
|
+
class StandardLoggerAdapter < ::Logger
|
40
|
+
include Logger::Adapter
|
41
|
+
|
42
|
+
def initialize(*args)
|
43
|
+
super
|
44
|
+
if Embulk.java?
|
45
|
+
self.formatter = lambda do |severity,datetime,progname,message|
|
46
|
+
"#{datetime.strftime("%Y-%m-%d %H:%M:%S.%3N %z")} [#{severity}] (#{java.lang.Thread.currentThread.name}): #{message}\n"
|
47
|
+
end
|
48
|
+
else
|
49
|
+
self.formatter = lambda do |severity,datetime,progname,message|
|
50
|
+
"#{datetime.strftime("%Y-%m-%d %H:%M:%S.%3N %z")} [#{severity}]: #{message}\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def trace(message, &block)
|
56
|
+
debug(message, &block)
|
57
|
+
end
|
58
|
+
|
59
|
+
def trace?
|
60
|
+
debug?
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class Slf4jAdapter
|
65
|
+
include Logger::Adapter
|
66
|
+
|
67
|
+
def initialize(logger)
|
68
|
+
@logger = logger
|
69
|
+
end
|
70
|
+
|
71
|
+
def error(message, &block)
|
72
|
+
if block
|
73
|
+
if @logger.isErrorEnabled
|
74
|
+
@logger.error(block.call)
|
75
|
+
end
|
76
|
+
else
|
77
|
+
@logger.error(message)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def warn(message, &block)
|
82
|
+
if block
|
83
|
+
if @logger.isWarnEnabled
|
84
|
+
@logger.warn(block.call)
|
85
|
+
end
|
86
|
+
else
|
87
|
+
@logger.warn(message)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def info(message, &block)
|
92
|
+
if block
|
93
|
+
if @logger.isInfoEnabled
|
94
|
+
@logger.info(block.call)
|
95
|
+
end
|
96
|
+
else
|
97
|
+
@logger.info(message)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def debug(message, &block)
|
102
|
+
if block
|
103
|
+
if @logger.isDebugEnabled
|
104
|
+
@logger.debug(block.call)
|
105
|
+
end
|
106
|
+
else
|
107
|
+
@logger.debug(message)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def trace(message, &block)
|
112
|
+
if block
|
113
|
+
if @logger.isTraceEnabled
|
114
|
+
@logger.trace(block.call)
|
115
|
+
end
|
116
|
+
else
|
117
|
+
@logger.trace(message)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def fatal?
|
122
|
+
@logger.isErrorEnabled()
|
123
|
+
end
|
124
|
+
|
125
|
+
def error?
|
126
|
+
@logger.isErrorEnabled()
|
127
|
+
end
|
128
|
+
|
129
|
+
def warn?
|
130
|
+
@logger.isWarnEnabled()
|
131
|
+
end
|
132
|
+
|
133
|
+
def debug?
|
134
|
+
@logger.isDebugEnabled()
|
135
|
+
end
|
136
|
+
|
137
|
+
def trace?
|
138
|
+
@logger.isTraceEnabled()
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.logger
|
143
|
+
@@logger
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.logger=(logger)
|
147
|
+
@@logger = logger
|
148
|
+
end
|
149
|
+
|
150
|
+
# default logger
|
151
|
+
@@logger = Logger.new(STDOUT)
|
152
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
|
2
2
|
module Embulk
|
3
3
|
require 'embulk/error'
|
4
|
+
require 'embulk/logger'
|
4
5
|
|
5
6
|
class PluginRegistry
|
6
7
|
def initialize(category, search_prefix)
|
@@ -80,10 +81,9 @@ module Embulk
|
|
80
81
|
end
|
81
82
|
|
82
83
|
def show_loaded_gems
|
83
|
-
# TODO use logger
|
84
84
|
Gem.loaded_specs.each do |name,spec|
|
85
85
|
if !@loaded_gems[name] && name =~ /^embulk/
|
86
|
-
|
86
|
+
Embulk.logger.info "Loaded plugin #{name} (#{spec.version})"
|
87
87
|
@loaded_gems[name] = true
|
88
88
|
end
|
89
89
|
end
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -232,6 +232,9 @@ files:
|
|
232
232
|
- embulk-core/src/main/java/org/embulk/spi/util/Pages.java
|
233
233
|
- embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java
|
234
234
|
- embulk-core/src/main/java/org/embulk/spi/util/RetryExecutor.java
|
235
|
+
- embulk-core/src/main/resources/embulk/logback-color.xml
|
236
|
+
- embulk-core/src/main/resources/embulk/logback-console.xml
|
237
|
+
- embulk-core/src/main/resources/embulk/logback-file.xml
|
235
238
|
- embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java
|
236
239
|
- embulk-core/src/test/java/org/embulk/GuiceBinder.java
|
237
240
|
- embulk-core/src/test/java/org/embulk/RandomManager.java
|
@@ -297,6 +300,7 @@ files:
|
|
297
300
|
- embulk-docs/src/release/release-0.6.1.rst
|
298
301
|
- embulk-docs/src/release/release-0.6.10.rst
|
299
302
|
- embulk-docs/src/release/release-0.6.11.rst
|
303
|
+
- embulk-docs/src/release/release-0.6.12.rst
|
300
304
|
- embulk-docs/src/release/release-0.6.2.rst
|
301
305
|
- embulk-docs/src/release/release-0.6.3.rst
|
302
306
|
- embulk-docs/src/release/release-0.6.4.rst
|
@@ -394,6 +398,7 @@ files:
|
|
394
398
|
- lib/embulk/java/imports.rb
|
395
399
|
- lib/embulk/java/time_helper.rb
|
396
400
|
- lib/embulk/java_plugin.rb
|
401
|
+
- lib/embulk/logger.rb
|
397
402
|
- lib/embulk/output_plugin.rb
|
398
403
|
- lib/embulk/page.rb
|
399
404
|
- lib/embulk/page_builder.rb
|
@@ -412,8 +417,8 @@ files:
|
|
412
417
|
- classpath/bval-jsr303-0.5.jar
|
413
418
|
- classpath/commons-beanutils-core-1.8.3.jar
|
414
419
|
- classpath/commons-lang3-3.1.jar
|
415
|
-
- classpath/embulk-core-0.6.
|
416
|
-
- classpath/embulk-standards-0.6.
|
420
|
+
- classpath/embulk-core-0.6.12.jar
|
421
|
+
- classpath/embulk-standards-0.6.12.jar
|
417
422
|
- classpath/guava-18.0.jar
|
418
423
|
- classpath/guice-4.0.jar
|
419
424
|
- classpath/guice-multibindings-4.0.jar
|
@@ -424,14 +429,15 @@ files:
|
|
424
429
|
- classpath/jackson-datatype-guava-2.5.3.jar
|
425
430
|
- classpath/jackson-datatype-joda-2.5.3.jar
|
426
431
|
- classpath/jackson-module-guice-2.5.3.jar
|
432
|
+
- classpath/jansi-1.11.jar
|
427
433
|
- classpath/javax.inject-1.jar
|
428
434
|
- classpath/joda-time-2.7.jar
|
429
435
|
- classpath/jruby-complete-1.7.19.jar
|
430
|
-
- classpath/
|
436
|
+
- classpath/logback-classic-1.1.3.jar
|
437
|
+
- classpath/logback-core-1.1.3.jar
|
431
438
|
- classpath/netty-buffer-5.0.0.Alpha1.jar
|
432
439
|
- classpath/netty-common-5.0.0.Alpha1.jar
|
433
440
|
- classpath/slf4j-api-1.7.10.jar
|
434
|
-
- classpath/slf4j-log4j12-1.7.10.jar
|
435
441
|
- classpath/slice-0.9.jar
|
436
442
|
- classpath/snakeyaml-1.14.jar
|
437
443
|
- classpath/validation-api-1.1.0.Final.jar
|