embulk 0.8.23-java → 0.8.24-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/embulk +27 -1
- data/build.gradle +1 -1
- data/embulk-cli/src/main/bat/selfrun.bat +58 -0
- data/embulk-cli/src/main/sh/selfrun.sh +40 -1
- data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +18 -2
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +5 -0
- data/embulk-docs/src/built-in.rst +7 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.24.rst +15 -0
- data/embulk-standards/src/main/java/org/embulk/standards/JsonParserPlugin.java +114 -3
- data/embulk-standards/src/test/java/org/embulk/standards/TestJsonParserPlugin.java +182 -0
- data/lib/embulk/command/embulk_bundle.rb +13 -19
- data/lib/embulk/version.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4304649d3c657ab61884394e4b04c32934ecc516
|
4
|
+
data.tar.gz: e99e2b6296c4660d6da94e9465e502dcdd218f22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29a99a36b30105da946f964c8f131c98a5f3dedb2fa16edf6e50b2463f9b08939ee4602e0cf6353bde581ae6b7ee471dbb5da8a0ca6a90d6872308609c15e6ad
|
7
|
+
data.tar.gz: 0496df6ebb63f5bbef27d3dbf0fed1b6a2e8d6609b17651009f2c9498d97df289ca8a9e0f2f1868031b0aa273be28c36609552cc88d47bec9d4592f7cea8c9a8
|
data/bin/embulk
CHANGED
@@ -6,6 +6,33 @@ if RUBY_PLATFORM =~ /java/i
|
|
6
6
|
ENV.delete('EMBULK_BIN_ENABLE_BUNDLE')
|
7
7
|
# include -cp CLASSPATH to LOAD_PATH so that embulk_bundle.rb can load bundler included in embulk-core.jar
|
8
8
|
$LOAD_PATH << "uri:classloader:/"
|
9
|
+
|
10
|
+
# Handle environment variables before entering embulk_bundle.rb.
|
11
|
+
# See: https://github.com/embulk/embulk/pull/604
|
12
|
+
|
13
|
+
bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
|
14
|
+
bundle_path = nil if bundle_path.empty?
|
15
|
+
|
16
|
+
# Search for -b or --bundle, and remove it.
|
17
|
+
if ARGV.find_index {|arg| arg == '-b' || arg == '--bundle' }
|
18
|
+
ARGV.slice!(bundle_path_index, 2)[1]
|
19
|
+
end
|
20
|
+
if bundle_path
|
21
|
+
ENV['EMBULK_BUNDLE_PATH'] = bundle_path
|
22
|
+
ENV['BUNDLE_GEMFILE'] = File.expand_path File.join(bundle_path, "Gemfile")
|
23
|
+
ENV.delete('GEM_HOME')
|
24
|
+
ENV.delete('GEM_PATH')
|
25
|
+
else
|
26
|
+
ENV.delete('EMBULK_BUNDLE_PATH')
|
27
|
+
user_home = java.lang.System.properties["user.home"] || ENV['HOME']
|
28
|
+
unless user_home
|
29
|
+
raise "HOME environment variable is not set."
|
30
|
+
end
|
31
|
+
ENV['GEM_HOME'] = File.expand_path File.join(user_home, '.embulk', Gem.ruby_engine, RbConfig::CONFIG['ruby_version'])
|
32
|
+
ENV['GEM_PATH'] = ''
|
33
|
+
ENV.delete('BUNDLE_GEMFILE')
|
34
|
+
end
|
35
|
+
|
9
36
|
require_relative '../lib/embulk/command/embulk_bundle'
|
10
37
|
else
|
11
38
|
# bin/embulk is run by JRuby (embulk gem for JRuby is installed). disable embulk_bundle not to bother the JRuby's bundler
|
@@ -107,4 +134,3 @@ cmdline << __FILE__
|
|
107
134
|
cmdline.concat ARGV
|
108
135
|
exec env, *cmdline
|
109
136
|
exit 127
|
110
|
-
|
data/build.gradle
CHANGED
@@ -30,6 +30,60 @@ if "%overwrite_optimize%" == "true" (
|
|
30
30
|
)
|
31
31
|
)
|
32
32
|
|
33
|
+
setlocal enabledelayedexpansion
|
34
|
+
|
35
|
+
set found_bundle_option=0
|
36
|
+
|
37
|
+
for %%a in (%*) do (
|
38
|
+
if %%a == -b (
|
39
|
+
set found_bundle_option=1
|
40
|
+
) else if %%a == --bundle (
|
41
|
+
set found_bundle_option=1
|
42
|
+
) else if !found_bundle_option! == 1 (
|
43
|
+
set embulk_bundle_path=%%a
|
44
|
+
set found_bundle_option=2
|
45
|
+
)
|
46
|
+
)
|
47
|
+
|
48
|
+
endlocal && set EMBULK_BUNDLE_PATH=%embulk_bundle_path%
|
49
|
+
|
50
|
+
if not defined EMBULK_BUNDLE_PATH (
|
51
|
+
set EMBULK_BUNDLE_PATH=
|
52
|
+
set GEM_PATH=""
|
53
|
+
) else (
|
54
|
+
if not exist "%EMBULK_BUNDLE_PATH%\" (
|
55
|
+
echo Directory not found: "%EMBULK_BUNDLE_PATH%"
|
56
|
+
exit /b 1
|
57
|
+
)
|
58
|
+
set GEM_PATH=
|
59
|
+
)
|
60
|
+
|
61
|
+
setlocal enabledelayedexpansion
|
62
|
+
|
63
|
+
if not defined EMBULK_BUNDLE_PATH (
|
64
|
+
set bundle_gemfile=
|
65
|
+
) else (
|
66
|
+
call :get_absolute_path %EMBULK_BUNDLE_PATH%
|
67
|
+
set bundle_gemfile=!absolute_path!\Gemfile
|
68
|
+
if not exist !bundle_gemfile! (
|
69
|
+
echo Gemfile not found: "!bundle_gemfile!"
|
70
|
+
exit /b 1
|
71
|
+
)
|
72
|
+
)
|
73
|
+
|
74
|
+
endlocal && set BUNDLE_GEMFILE=%bundle_gemfile%
|
75
|
+
|
76
|
+
setlocal enabledelayedexpansion
|
77
|
+
|
78
|
+
if not defined EMBULK_BUNDLE_PATH (
|
79
|
+
for /f "delims=" %%w in ('java -cp %0 org.jruby.Main -e "print RbConfig::CONFIG['ruby_version']"') do set ruby_version=%%w
|
80
|
+
set gem_home=%USERPROFILE%\.embulk\jruby\!ruby_version!
|
81
|
+
) else (
|
82
|
+
set gem_home=
|
83
|
+
)
|
84
|
+
|
85
|
+
endlocal && set GEM_HOME=%gem_home%
|
86
|
+
|
33
87
|
if "%optimize%" == "true" (
|
34
88
|
set java_args=-XX:+AggressiveOpts -XX:+UseConcMarkSweepGC %java_args%
|
35
89
|
) else (
|
@@ -96,3 +150,7 @@ if not exist "%~1" (
|
|
96
150
|
)
|
97
151
|
set status=
|
98
152
|
exit /b
|
153
|
+
|
154
|
+
:get_absolute_path
|
155
|
+
set absolute_path=%~f1
|
156
|
+
exit /b
|
@@ -44,11 +44,50 @@ while true; do
|
|
44
44
|
esac
|
45
45
|
done
|
46
46
|
|
47
|
+
embulk_args="$@"
|
48
|
+
|
49
|
+
while [ $# -gt 0 ] ; do
|
50
|
+
case "$1" in
|
51
|
+
"-b" | "--bundle")
|
52
|
+
shift
|
53
|
+
EMBULK_BUNDLE_PATH="$1"
|
54
|
+
export EMBULK_BUNDLE_PATH
|
55
|
+
shift
|
56
|
+
break
|
57
|
+
;;
|
58
|
+
*)
|
59
|
+
shift
|
60
|
+
;;
|
61
|
+
esac
|
62
|
+
done
|
63
|
+
|
64
|
+
if test -z ${EMBULK_BUNDLE_PATH}; then
|
65
|
+
unset EMBULK_BUNDLE_PATH
|
66
|
+
unset BUNDLE_GEMFILE
|
67
|
+
GEM_HOME="`cd && pwd`/.embulk/jruby/`java -cp $0 org.jruby.Main -e 'print RbConfig::CONFIG["ruby_version"]'`"
|
68
|
+
export GEM_HOME
|
69
|
+
GEM_PATH=""
|
70
|
+
export GEM_PATH
|
71
|
+
else
|
72
|
+
if test ! -d ${EMBULK_BUNDLE_PATH}; then
|
73
|
+
echo "Directory not found: \"${EMBULK_BUNDLE_PATH}\""
|
74
|
+
exit 127
|
75
|
+
fi
|
76
|
+
BUNDLE_GEMFILE="`cd ${EMBULK_BUNDLE_PATH} && pwd`/Gemfile"
|
77
|
+
if test ! -f ${BUNDLE_GEMFILE}; then
|
78
|
+
echo "Gemfile not found: \"${BUNDLE_GEMFILE}\""
|
79
|
+
exit 127
|
80
|
+
fi
|
81
|
+
export BUNDLE_GEMFILE
|
82
|
+
unset GEM_HOME
|
83
|
+
unset GEM_PATH
|
84
|
+
fi
|
85
|
+
|
47
86
|
if test "$overwrite_optimize" = "true" -o "$default_optimize" -a "$overwrite_optimize" != "false"; then
|
48
87
|
java_args="-XX:+AggressiveOpts -XX:+UseConcMarkSweepGC $java_args"
|
49
88
|
else
|
50
89
|
java_args="-XX:+AggressiveOpts -XX:+TieredCompilation -XX:TieredStopAtLevel=1 -Xverify:none $java_args"
|
51
90
|
fi
|
52
91
|
|
53
|
-
exec java $java_args -jar "$0" $jruby_args
|
92
|
+
exec java $java_args -jar "$0" $jruby_args $embulk_args
|
54
93
|
exit 127
|
@@ -16,6 +16,8 @@ import org.embulk.config.TaskSource;
|
|
16
16
|
import org.embulk.config.ConfigDiff;
|
17
17
|
import org.embulk.config.TaskReport;
|
18
18
|
import org.embulk.plugin.PluginType;
|
19
|
+
import org.embulk.spi.FileInputRunner;
|
20
|
+
import org.embulk.spi.FileOutputRunner;
|
19
21
|
import org.embulk.spi.Schema;
|
20
22
|
import org.embulk.spi.Exec;
|
21
23
|
import org.embulk.spi.ExecSession;
|
@@ -502,10 +504,24 @@ public class BulkLoader
|
|
502
504
|
}
|
503
505
|
}
|
504
506
|
|
505
|
-
|
507
|
+
final TaskSource inputTaskSource;
|
508
|
+
if (plugins.getInputPlugin() instanceof FileInputRunner) {
|
509
|
+
inputTaskSource = FileInputRunner.getFileInputTaskSource(resume.getInputTaskSource());
|
510
|
+
}
|
511
|
+
else {
|
512
|
+
inputTaskSource = resume.getInputTaskSource();
|
513
|
+
}
|
514
|
+
plugins.getInputPlugin().cleanup(inputTaskSource, resume.getInputSchema(),
|
506
515
|
resume.getInputTaskReports().size(), successfulInputTaskReports.build());
|
507
516
|
|
508
|
-
|
517
|
+
final TaskSource outputTaskSource;
|
518
|
+
if (plugins.getOutputPlugin() instanceof FileOutputRunner) {
|
519
|
+
outputTaskSource = FileOutputRunner.getFileOutputTaskSource(resume.getOutputTaskSource());
|
520
|
+
}
|
521
|
+
else {
|
522
|
+
outputTaskSource = resume.getOutputTaskSource();
|
523
|
+
}
|
524
|
+
plugins.getOutputPlugin().cleanup(outputTaskSource, resume.getOutputSchema(),
|
509
525
|
resume.getOutputTaskReports().size(), successfulOutputTaskReports.build());
|
510
526
|
}
|
511
527
|
|
@@ -241,6 +241,9 @@ The ``columns`` option declares the list of columns. This CSV parser plugin igno
|
|
241
241
|
| date | Set date part if the format doesn’t include date part |
|
242
242
|
+----------+--------------------------------------------------------+
|
243
243
|
|
244
|
+
.. note::
|
245
|
+
|
246
|
+
The Timestamp format refers to `Ruby strftime format <https://docs.ruby-lang.org/en/2.4.0/Date.html#method-i-strftime>`_
|
244
247
|
|
245
248
|
List of types:
|
246
249
|
|
@@ -497,6 +500,10 @@ The ``column_options`` option is a map whose keys are name of columns, and value
|
|
497
500
|
| format | string | Timestamp format if type of this column is timestamp. | ``%Y-%m-%d %H:%M:%S.%6N %z`` by default |
|
498
501
|
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
499
502
|
|
503
|
+
.. note::
|
504
|
+
|
505
|
+
The Timestamp format refers to `Ruby strftime format <https://docs.ruby-lang.org/en/2.4.0/Date.html#method-i-strftime>`_
|
506
|
+
|
500
507
|
Example
|
501
508
|
~~~~~~~~
|
502
509
|
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,15 @@
|
|
1
|
+
Release 0.8.24
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Make environment variables immutable in JRuby/Java [#604] [#670]
|
8
|
+
* Add a note on the timestamp format in embulk-doc with a link to Ruby documents [#648]
|
9
|
+
* Add clean illegal characters mode to json parser. [#651]
|
10
|
+
* embulk-core: Fix BulkLoader to pass file input/output plugins' task source to plugins' cleanup [#663]
|
11
|
+
|
12
|
+
|
13
|
+
Release Date
|
14
|
+
------------------
|
15
|
+
2017-06-14
|
@@ -1,6 +1,10 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
3
|
import com.google.common.annotations.VisibleForTesting;
|
4
|
+
import com.google.common.base.Function;
|
5
|
+
import com.google.common.collect.Lists;
|
6
|
+
import com.google.common.io.CharSource;
|
7
|
+
import com.google.common.io.CharStreams;
|
4
8
|
import org.embulk.config.Config;
|
5
9
|
import org.embulk.config.ConfigDefault;
|
6
10
|
import org.embulk.config.ConfigSource;
|
@@ -18,20 +22,50 @@ import org.embulk.spi.json.JsonParseException;
|
|
18
22
|
import org.embulk.spi.json.JsonParser;
|
19
23
|
import org.embulk.spi.type.Types;
|
20
24
|
import org.embulk.spi.util.FileInputInputStream;
|
25
|
+
import org.jruby.embed.io.ReaderInputStream;
|
26
|
+
import org.msgpack.core.Preconditions;
|
21
27
|
import org.msgpack.value.Value;
|
22
28
|
import org.slf4j.Logger;
|
23
29
|
|
30
|
+
import javax.annotation.Nullable;
|
31
|
+
import java.io.BufferedReader;
|
24
32
|
import java.io.IOException;
|
33
|
+
import java.io.InputStreamReader;
|
34
|
+
import java.util.regex.Pattern;
|
25
35
|
|
26
36
|
public class JsonParserPlugin
|
27
37
|
implements ParserPlugin
|
28
38
|
{
|
39
|
+
|
40
|
+
public enum InvalidEscapeStringPolicy
|
41
|
+
{
|
42
|
+
PASSTHROUGH("PASSTHROUGH"),
|
43
|
+
SKIP("SKIP"),
|
44
|
+
UNESCAPE("UNESCAPE");
|
45
|
+
|
46
|
+
private final String string;
|
47
|
+
|
48
|
+
private InvalidEscapeStringPolicy(String string)
|
49
|
+
{
|
50
|
+
this.string = string;
|
51
|
+
}
|
52
|
+
|
53
|
+
public String getString()
|
54
|
+
{
|
55
|
+
return string;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
29
59
|
public interface PluginTask
|
30
60
|
extends Task
|
31
61
|
{
|
32
62
|
@Config("stop_on_invalid_record")
|
33
63
|
@ConfigDefault("false")
|
34
64
|
boolean getStopOnInvalidRecord();
|
65
|
+
|
66
|
+
@Config("invalid_string_escapes")
|
67
|
+
@ConfigDefault("\"PASSTHROUGH\"")
|
68
|
+
InvalidEscapeStringPolicy getInvalidEscapeStringPolicy();
|
35
69
|
}
|
36
70
|
|
37
71
|
private final Logger log;
|
@@ -66,7 +100,7 @@ public class JsonParserPlugin
|
|
66
100
|
FileInputInputStream in = new FileInputInputStream(input)) {
|
67
101
|
while (in.nextFile()) {
|
68
102
|
boolean evenOneJsonParsed = false;
|
69
|
-
try (JsonParser.Stream stream = newJsonStream(in)) {
|
103
|
+
try (JsonParser.Stream stream = newJsonStream(in, task)) {
|
70
104
|
Value value;
|
71
105
|
while ((value = stream.next()) != null) {
|
72
106
|
try {
|
@@ -107,10 +141,87 @@ public class JsonParserPlugin
|
|
107
141
|
return new PageBuilder(Exec.getBufferAllocator(), schema, output);
|
108
142
|
}
|
109
143
|
|
110
|
-
private JsonParser.Stream newJsonStream(FileInputInputStream in)
|
144
|
+
private JsonParser.Stream newJsonStream(FileInputInputStream in, PluginTask task)
|
111
145
|
throws IOException
|
112
146
|
{
|
113
|
-
|
147
|
+
InvalidEscapeStringPolicy policy = task.getInvalidEscapeStringPolicy();
|
148
|
+
switch (policy) {
|
149
|
+
case SKIP:
|
150
|
+
case UNESCAPE:
|
151
|
+
Iterable<CharSource> lines = Lists.transform(CharStreams.readLines(new BufferedReader(new InputStreamReader(in))),
|
152
|
+
invalidEscapeStringFunction(policy));
|
153
|
+
return new JsonParser().open(new ReaderInputStream(CharSource.concat(lines).openStream()));
|
154
|
+
case PASSTHROUGH:
|
155
|
+
default:
|
156
|
+
return new JsonParser().open(in);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
Function<String, CharSource> invalidEscapeStringFunction(final InvalidEscapeStringPolicy policy)
|
161
|
+
{
|
162
|
+
return new Function<String, CharSource>()
|
163
|
+
{
|
164
|
+
final Pattern digitsPattern = Pattern.compile("\\p{XDigit}+");
|
165
|
+
|
166
|
+
@Override
|
167
|
+
public CharSource apply(@Nullable String input)
|
168
|
+
{
|
169
|
+
Preconditions.checkNotNull(input);
|
170
|
+
if (policy == InvalidEscapeStringPolicy.PASSTHROUGH) {
|
171
|
+
return CharSource.wrap(input);
|
172
|
+
}
|
173
|
+
StringBuilder builder = new StringBuilder();
|
174
|
+
char[] charArray = input.toCharArray();
|
175
|
+
for (int characterIndex = 0; characterIndex < charArray.length; characterIndex++) {
|
176
|
+
char c = charArray[characterIndex];
|
177
|
+
if (c == '\\') {
|
178
|
+
if (charArray.length > characterIndex + 1) {
|
179
|
+
char next = charArray[characterIndex + 1];
|
180
|
+
switch (next) {
|
181
|
+
case 'b':
|
182
|
+
case 'f':
|
183
|
+
case 'n':
|
184
|
+
case 'r':
|
185
|
+
case 't':
|
186
|
+
case '"':
|
187
|
+
case '\\':
|
188
|
+
case '/':
|
189
|
+
builder.append(c);
|
190
|
+
break;
|
191
|
+
case 'u': // hexstring such as \u0001
|
192
|
+
if (charArray.length > characterIndex + 5) {
|
193
|
+
char[] hexChars = {charArray[characterIndex + 2], charArray[characterIndex + 3], charArray[characterIndex + 4],
|
194
|
+
charArray[characterIndex + 5]};
|
195
|
+
String hexString = new String(hexChars);
|
196
|
+
if (digitsPattern.matcher(hexString).matches()) {
|
197
|
+
builder.append(c);
|
198
|
+
} else {
|
199
|
+
if (policy == InvalidEscapeStringPolicy.SKIP) {
|
200
|
+
// remove \\u
|
201
|
+
characterIndex++;
|
202
|
+
}
|
203
|
+
}
|
204
|
+
}
|
205
|
+
break;
|
206
|
+
default:
|
207
|
+
switch (policy) {
|
208
|
+
case SKIP:
|
209
|
+
characterIndex++;
|
210
|
+
break;
|
211
|
+
case UNESCAPE:
|
212
|
+
break;
|
213
|
+
}
|
214
|
+
break;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
}
|
218
|
+
else {
|
219
|
+
builder.append(c);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
return CharSource.wrap(builder.toString());
|
223
|
+
}
|
224
|
+
};
|
114
225
|
}
|
115
226
|
|
116
227
|
static class JsonRecordValidateException
|
@@ -1,10 +1,12 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.io.CharSource;
|
4
5
|
import org.embulk.EmbulkTestRuntime;
|
5
6
|
import org.embulk.config.ConfigSource;
|
6
7
|
import org.embulk.config.TaskSource;
|
7
8
|
import org.embulk.spi.DataException;
|
9
|
+
import org.embulk.spi.Exec;
|
8
10
|
import org.embulk.spi.FileInput;
|
9
11
|
import org.embulk.spi.ParserPlugin;
|
10
12
|
import org.embulk.spi.Schema;
|
@@ -22,6 +24,9 @@ import java.io.InputStream;
|
|
22
24
|
import java.util.List;
|
23
25
|
import java.util.Map;
|
24
26
|
|
27
|
+
import static org.embulk.standards.JsonParserPlugin.InvalidEscapeStringPolicy.PASSTHROUGH;
|
28
|
+
import static org.embulk.standards.JsonParserPlugin.InvalidEscapeStringPolicy.SKIP;
|
29
|
+
import static org.embulk.standards.JsonParserPlugin.InvalidEscapeStringPolicy.UNESCAPE;
|
25
30
|
import static org.junit.Assert.assertEquals;
|
26
31
|
import static org.junit.Assert.assertTrue;
|
27
32
|
import static org.junit.Assert.fail;
|
@@ -48,6 +53,17 @@ public class TestJsonParserPlugin
|
|
48
53
|
output = new MockPageOutput();
|
49
54
|
}
|
50
55
|
|
56
|
+
@Test
|
57
|
+
public void checkDefaultValues()
|
58
|
+
{
|
59
|
+
ConfigSource config = Exec.newConfigSource();
|
60
|
+
|
61
|
+
JsonParserPlugin.PluginTask task = config.loadConfig(JsonParserPlugin.PluginTask.class);
|
62
|
+
assertEquals(false, task.getStopOnInvalidRecord());
|
63
|
+
assertEquals(JsonParserPlugin.InvalidEscapeStringPolicy.PASSTHROUGH, task.getInvalidEscapeStringPolicy());
|
64
|
+
}
|
65
|
+
|
66
|
+
|
51
67
|
@Test
|
52
68
|
public void readNormalJson()
|
53
69
|
throws Exception
|
@@ -132,6 +148,172 @@ public class TestJsonParserPlugin
|
|
132
148
|
}
|
133
149
|
}
|
134
150
|
|
151
|
+
@Test
|
152
|
+
public void useDefaultInvalidEscapeStringFunction()
|
153
|
+
throws Exception
|
154
|
+
{
|
155
|
+
try {
|
156
|
+
transaction(config, fileInput(
|
157
|
+
"{\"\\a\":\"b\"}\\" // throw DataException
|
158
|
+
));
|
159
|
+
fail();
|
160
|
+
}
|
161
|
+
catch (Throwable t) {
|
162
|
+
assertTrue(t instanceof DataException);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
@Test
|
167
|
+
public void usePassthroughInvalidEscapeStringFunction()
|
168
|
+
throws Exception
|
169
|
+
{
|
170
|
+
try {
|
171
|
+
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "PASSTHROUGH");
|
172
|
+
transaction(config, fileInput(
|
173
|
+
"{\"\\a\":\"b\"}\\" // throw DataException
|
174
|
+
));
|
175
|
+
fail();
|
176
|
+
}
|
177
|
+
catch (Throwable t) {
|
178
|
+
assertTrue(t instanceof DataException);
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
@Test
|
183
|
+
public void useSkipInvalidEscapeString()
|
184
|
+
throws Exception
|
185
|
+
{
|
186
|
+
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "SKIP");
|
187
|
+
transaction(config, fileInput(
|
188
|
+
"{\"\\a\":\"b\"}\\"
|
189
|
+
));
|
190
|
+
|
191
|
+
List<Object[]> records = Pages.toObjects(plugin.newSchema(), output.pages);
|
192
|
+
assertEquals(1, records.size());
|
193
|
+
Object[] record = records.get(0);
|
194
|
+
Map<Value, Value> map = ((Value)record[0]).asMapValue().map();
|
195
|
+
assertEquals(newString("b"), map.get(newString("")));
|
196
|
+
}
|
197
|
+
|
198
|
+
@Test
|
199
|
+
public void useUnEscapeInvalidEscapeString()
|
200
|
+
throws Exception
|
201
|
+
{
|
202
|
+
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "UNESCAPE");
|
203
|
+
transaction(config, fileInput(
|
204
|
+
"{\"\\a\":\"b\"}\\"
|
205
|
+
));
|
206
|
+
|
207
|
+
List<Object[]> records = Pages.toObjects(plugin.newSchema(), output.pages);
|
208
|
+
assertEquals(1, records.size());
|
209
|
+
Object[] record = records.get(0);
|
210
|
+
Map<Value, Value> map = ((Value)record[0]).asMapValue().map();
|
211
|
+
assertEquals(newString("b"), map.get(newString("a")));
|
212
|
+
}
|
213
|
+
|
214
|
+
@Test
|
215
|
+
public void checkInvalidEscapeStringFunction()
|
216
|
+
throws Exception
|
217
|
+
{
|
218
|
+
//PASSTHROUGH
|
219
|
+
{
|
220
|
+
String json = "{\\\"_c0\\\":true,\\\"_c1\\\":10,\\\"_c2\\\":\\\"embulk\\\",\\\"_c3\\\":{\\\"k\\\":\\\"v\\\"}}";
|
221
|
+
CharSource actual = plugin.invalidEscapeStringFunction(PASSTHROUGH).apply(json);
|
222
|
+
assertEquals(json , actual.read());
|
223
|
+
}
|
224
|
+
|
225
|
+
{
|
226
|
+
String json = "{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}\\";
|
227
|
+
CharSource actual = plugin.invalidEscapeStringFunction(PASSTHROUGH).apply(json);
|
228
|
+
assertEquals(json , actual.read());
|
229
|
+
}
|
230
|
+
|
231
|
+
{
|
232
|
+
String json = "{\"\\a\":\"b\"}\\";
|
233
|
+
CharSource actual = plugin.invalidEscapeStringFunction(PASSTHROUGH).apply(json);
|
234
|
+
assertEquals(json , actual.read());
|
235
|
+
}
|
236
|
+
|
237
|
+
//SKIP
|
238
|
+
{
|
239
|
+
String json = "{\\\"_c0\\\":true,\\\"_c1\\\":10,\\\"_c2\\\":\\\"embulk\\\",\\\"_c3\\\":{\\\"k\\\":\\\"v\\\"}}";
|
240
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
241
|
+
assertEquals(json , actual.read());
|
242
|
+
}
|
243
|
+
|
244
|
+
{
|
245
|
+
// valid charset u0001
|
246
|
+
String json = "{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}\\";
|
247
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
248
|
+
assertEquals("{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}" , actual.read());
|
249
|
+
}
|
250
|
+
|
251
|
+
|
252
|
+
{
|
253
|
+
// invalid charset \\u12xY remove forwarding backslash and u
|
254
|
+
String json = "{\"\\u12xY\":\"efg\"}\\";
|
255
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
256
|
+
assertEquals("{\"12xY\":\"efg\"}" , actual.read());
|
257
|
+
}
|
258
|
+
|
259
|
+
{
|
260
|
+
String json = "{\"\\a\":\"b\"}\\";
|
261
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
262
|
+
// backslash and `a` will removed.
|
263
|
+
assertEquals("{\"\":\"b\"}" , actual.read());
|
264
|
+
}
|
265
|
+
|
266
|
+
{
|
267
|
+
// end of lines backspash.
|
268
|
+
String json = "{\"\\a\":\"b\"}" +
|
269
|
+
"\n" +
|
270
|
+
"\\";
|
271
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
272
|
+
// backslash and `a` will removed.
|
273
|
+
assertEquals("{\"\":\"b\"}\n" , actual.read());
|
274
|
+
}
|
275
|
+
|
276
|
+
//UNESCAPE
|
277
|
+
{
|
278
|
+
String json = "{\\\"_c0\\\":true,\\\"_c1\\\":10,\\\"_c2\\\":\\\"embulk\\\",\\\"_c3\\\":{\\\"k\\\":\\\"v\\\"}}";
|
279
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
280
|
+
assertEquals(json , actual.read());
|
281
|
+
}
|
282
|
+
|
283
|
+
{
|
284
|
+
String json = "{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}\\";
|
285
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
286
|
+
assertEquals("{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}" , actual.read());
|
287
|
+
}
|
288
|
+
|
289
|
+
{
|
290
|
+
// invalid charset u000x remove forwarding backslash
|
291
|
+
String json = "{\"\\u000x\":\"efg\"}\\";
|
292
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
293
|
+
assertEquals("{\"u000x\":\"efg\"}" , actual.read());
|
294
|
+
}
|
295
|
+
|
296
|
+
|
297
|
+
{
|
298
|
+
String json = "{\"\\a\":\"b\"}\\";
|
299
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
300
|
+
// backslash will removed.
|
301
|
+
assertEquals("{\"a\":\"b\"}" , actual.read());
|
302
|
+
}
|
303
|
+
|
304
|
+
{
|
305
|
+
// end of lines backspash.
|
306
|
+
String json = "{\"\\a\":\"b\"}" +
|
307
|
+
"\n" +
|
308
|
+
"\\";
|
309
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
310
|
+
// backslash and `a` will removed.
|
311
|
+
assertEquals("{\"\":\"b\"}\n" , actual.read());
|
312
|
+
}
|
313
|
+
|
314
|
+
|
315
|
+
}
|
316
|
+
|
135
317
|
private ConfigSource config()
|
136
318
|
{
|
137
319
|
return runtime.getExec().newConfigSource();
|
@@ -1,20 +1,19 @@
|
|
1
|
-
|
2
1
|
bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
|
3
2
|
bundle_path = nil if bundle_path.empty?
|
4
3
|
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
bundle_path = ARGV.slice!(bundle_path_index, 2)[1]
|
4
|
+
# Search for -b or --bundle, and remove it.
|
5
|
+
if ARGV.find_index {|arg| arg == '-b' || arg == '--bundle' }
|
6
|
+
ARGV.slice!(bundle_path_index, 2)[1]
|
9
7
|
end
|
10
8
|
|
11
9
|
if bundle_path
|
12
|
-
|
13
|
-
ENV['
|
10
|
+
# In the selfrun script:
|
11
|
+
# ENV['EMBULK_BUNDLE_PATH']: set through '-b' | '--bundle', or inherit from the runtime environment
|
12
|
+
# ENV['BUNDLE_GEMFILE']: set for "ENV['EMBULK_BUNDLE_PATH']/Gemfile"
|
13
|
+
# ENV['GEM_HOME']: unset
|
14
|
+
# ENV['GEM_PATH']: unset
|
14
15
|
|
15
16
|
# bundler is included in embulk-core.jar
|
16
|
-
ENV.delete('GEM_HOME')
|
17
|
-
ENV.delete('GEM_PATH')
|
18
17
|
Gem.clear_paths
|
19
18
|
require 'bundler'
|
20
19
|
|
@@ -34,17 +33,12 @@ if bundle_path
|
|
34
33
|
end
|
35
34
|
|
36
35
|
else
|
37
|
-
#
|
38
|
-
#
|
39
|
-
ENV
|
40
|
-
|
41
|
-
|
42
|
-
raise "HOME environment variable is not set."
|
43
|
-
end
|
44
|
-
ENV['GEM_HOME'] = File.expand_path File.join(user_home, '.embulk', Gem.ruby_engine, RbConfig::CONFIG['ruby_version'])
|
45
|
-
ENV['GEM_PATH'] = ''
|
36
|
+
# In the selfrun script:
|
37
|
+
# ENV['EMBULK_BUNDLE_PATH']: unset
|
38
|
+
# ENV['BUNDLE_GEMFILE']: unset
|
39
|
+
# ENV['GEM_HOME']: set for "~/.embulk/jruby/${ruby-version}"
|
40
|
+
# ENV['GEM_PATH']: set for ""
|
46
41
|
|
47
|
-
ENV.delete('BUNDLE_GEMFILE')
|
48
42
|
Gem.clear_paths # force rubygems to reload GEM_HOME
|
49
43
|
|
50
44
|
$LOAD_PATH << File.expand_path('../../', File.dirname(__FILE__))
|
data/lib/embulk/version.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
module Embulk
|
4
4
|
@@warned = false
|
5
5
|
|
6
|
-
VERSION_INTERNAL = '0.8.
|
6
|
+
VERSION_INTERNAL = '0.8.24'
|
7
7
|
|
8
8
|
DEPRECATED_MESSAGE = 'Embulk::VERSION in (J)Ruby is deprecated. Use org.embulk.EmbulkVersion::VERSION instead. If this message is from a plugin, please tell this to the author of the plugin!'
|
9
9
|
def self.const_missing(name)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.24
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,9 +150,9 @@ files:
|
|
150
150
|
- classpath/commons-compress-1.10.jar
|
151
151
|
- classpath/commons-lang-2.4.jar
|
152
152
|
- classpath/commons-lang3-3.4.jar
|
153
|
-
- classpath/embulk-cli-0.8.
|
154
|
-
- classpath/embulk-core-0.8.
|
155
|
-
- classpath/embulk-standards-0.8.
|
153
|
+
- classpath/embulk-cli-0.8.24.jar
|
154
|
+
- classpath/embulk-core-0.8.24.jar
|
155
|
+
- classpath/embulk-standards-0.8.24.jar
|
156
156
|
- classpath/guava-18.0.jar
|
157
157
|
- classpath/guice-4.0.jar
|
158
158
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -502,6 +502,7 @@ files:
|
|
502
502
|
- embulk-docs/src/release/release-0.8.21.rst
|
503
503
|
- embulk-docs/src/release/release-0.8.22.rst
|
504
504
|
- embulk-docs/src/release/release-0.8.23.rst
|
505
|
+
- embulk-docs/src/release/release-0.8.24.rst
|
505
506
|
- embulk-docs/src/release/release-0.8.3.rst
|
506
507
|
- embulk-docs/src/release/release-0.8.4.rst
|
507
508
|
- embulk-docs/src/release/release-0.8.5.rst
|