embulk 0.8.23-java → 0.8.24-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/embulk +27 -1
- data/build.gradle +1 -1
- data/embulk-cli/src/main/bat/selfrun.bat +58 -0
- data/embulk-cli/src/main/sh/selfrun.sh +40 -1
- data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +18 -2
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +5 -0
- data/embulk-docs/src/built-in.rst +7 -0
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.24.rst +15 -0
- data/embulk-standards/src/main/java/org/embulk/standards/JsonParserPlugin.java +114 -3
- data/embulk-standards/src/test/java/org/embulk/standards/TestJsonParserPlugin.java +182 -0
- data/lib/embulk/command/embulk_bundle.rb +13 -19
- data/lib/embulk/version.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4304649d3c657ab61884394e4b04c32934ecc516
|
4
|
+
data.tar.gz: e99e2b6296c4660d6da94e9465e502dcdd218f22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29a99a36b30105da946f964c8f131c98a5f3dedb2fa16edf6e50b2463f9b08939ee4602e0cf6353bde581ae6b7ee471dbb5da8a0ca6a90d6872308609c15e6ad
|
7
|
+
data.tar.gz: 0496df6ebb63f5bbef27d3dbf0fed1b6a2e8d6609b17651009f2c9498d97df289ca8a9e0f2f1868031b0aa273be28c36609552cc88d47bec9d4592f7cea8c9a8
|
data/bin/embulk
CHANGED
@@ -6,6 +6,33 @@ if RUBY_PLATFORM =~ /java/i
|
|
6
6
|
ENV.delete('EMBULK_BIN_ENABLE_BUNDLE')
|
7
7
|
# include -cp CLASSPATH to LOAD_PATH so that embulk_bundle.rb can load bundler included in embulk-core.jar
|
8
8
|
$LOAD_PATH << "uri:classloader:/"
|
9
|
+
|
10
|
+
# Handle environment variables before entering embulk_bundle.rb.
|
11
|
+
# See: https://github.com/embulk/embulk/pull/604
|
12
|
+
|
13
|
+
bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
|
14
|
+
bundle_path = nil if bundle_path.empty?
|
15
|
+
|
16
|
+
# Search for -b or --bundle, and remove it.
|
17
|
+
if ARGV.find_index {|arg| arg == '-b' || arg == '--bundle' }
|
18
|
+
ARGV.slice!(bundle_path_index, 2)[1]
|
19
|
+
end
|
20
|
+
if bundle_path
|
21
|
+
ENV['EMBULK_BUNDLE_PATH'] = bundle_path
|
22
|
+
ENV['BUNDLE_GEMFILE'] = File.expand_path File.join(bundle_path, "Gemfile")
|
23
|
+
ENV.delete('GEM_HOME')
|
24
|
+
ENV.delete('GEM_PATH')
|
25
|
+
else
|
26
|
+
ENV.delete('EMBULK_BUNDLE_PATH')
|
27
|
+
user_home = java.lang.System.properties["user.home"] || ENV['HOME']
|
28
|
+
unless user_home
|
29
|
+
raise "HOME environment variable is not set."
|
30
|
+
end
|
31
|
+
ENV['GEM_HOME'] = File.expand_path File.join(user_home, '.embulk', Gem.ruby_engine, RbConfig::CONFIG['ruby_version'])
|
32
|
+
ENV['GEM_PATH'] = ''
|
33
|
+
ENV.delete('BUNDLE_GEMFILE')
|
34
|
+
end
|
35
|
+
|
9
36
|
require_relative '../lib/embulk/command/embulk_bundle'
|
10
37
|
else
|
11
38
|
# bin/embulk is run by JRuby (embulk gem for JRuby is installed). disable embulk_bundle not to bother the JRuby's bundler
|
@@ -107,4 +134,3 @@ cmdline << __FILE__
|
|
107
134
|
cmdline.concat ARGV
|
108
135
|
exec env, *cmdline
|
109
136
|
exit 127
|
110
|
-
|
data/build.gradle
CHANGED
@@ -30,6 +30,60 @@ if "%overwrite_optimize%" == "true" (
|
|
30
30
|
)
|
31
31
|
)
|
32
32
|
|
33
|
+
setlocal enabledelayedexpansion
|
34
|
+
|
35
|
+
set found_bundle_option=0
|
36
|
+
|
37
|
+
for %%a in (%*) do (
|
38
|
+
if %%a == -b (
|
39
|
+
set found_bundle_option=1
|
40
|
+
) else if %%a == --bundle (
|
41
|
+
set found_bundle_option=1
|
42
|
+
) else if !found_bundle_option! == 1 (
|
43
|
+
set embulk_bundle_path=%%a
|
44
|
+
set found_bundle_option=2
|
45
|
+
)
|
46
|
+
)
|
47
|
+
|
48
|
+
endlocal && set EMBULK_BUNDLE_PATH=%embulk_bundle_path%
|
49
|
+
|
50
|
+
if not defined EMBULK_BUNDLE_PATH (
|
51
|
+
set EMBULK_BUNDLE_PATH=
|
52
|
+
set GEM_PATH=""
|
53
|
+
) else (
|
54
|
+
if not exist "%EMBULK_BUNDLE_PATH%\" (
|
55
|
+
echo Directory not found: "%EMBULK_BUNDLE_PATH%"
|
56
|
+
exit /b 1
|
57
|
+
)
|
58
|
+
set GEM_PATH=
|
59
|
+
)
|
60
|
+
|
61
|
+
setlocal enabledelayedexpansion
|
62
|
+
|
63
|
+
if not defined EMBULK_BUNDLE_PATH (
|
64
|
+
set bundle_gemfile=
|
65
|
+
) else (
|
66
|
+
call :get_absolute_path %EMBULK_BUNDLE_PATH%
|
67
|
+
set bundle_gemfile=!absolute_path!\Gemfile
|
68
|
+
if not exist !bundle_gemfile! (
|
69
|
+
echo Gemfile not found: "!bundle_gemfile!"
|
70
|
+
exit /b 1
|
71
|
+
)
|
72
|
+
)
|
73
|
+
|
74
|
+
endlocal && set BUNDLE_GEMFILE=%bundle_gemfile%
|
75
|
+
|
76
|
+
setlocal enabledelayedexpansion
|
77
|
+
|
78
|
+
if not defined EMBULK_BUNDLE_PATH (
|
79
|
+
for /f "delims=" %%w in ('java -cp %0 org.jruby.Main -e "print RbConfig::CONFIG['ruby_version']"') do set ruby_version=%%w
|
80
|
+
set gem_home=%USERPROFILE%\.embulk\jruby\!ruby_version!
|
81
|
+
) else (
|
82
|
+
set gem_home=
|
83
|
+
)
|
84
|
+
|
85
|
+
endlocal && set GEM_HOME=%gem_home%
|
86
|
+
|
33
87
|
if "%optimize%" == "true" (
|
34
88
|
set java_args=-XX:+AggressiveOpts -XX:+UseConcMarkSweepGC %java_args%
|
35
89
|
) else (
|
@@ -96,3 +150,7 @@ if not exist "%~1" (
|
|
96
150
|
)
|
97
151
|
set status=
|
98
152
|
exit /b
|
153
|
+
|
154
|
+
:get_absolute_path
|
155
|
+
set absolute_path=%~f1
|
156
|
+
exit /b
|
@@ -44,11 +44,50 @@ while true; do
|
|
44
44
|
esac
|
45
45
|
done
|
46
46
|
|
47
|
+
embulk_args="$@"
|
48
|
+
|
49
|
+
while [ $# -gt 0 ] ; do
|
50
|
+
case "$1" in
|
51
|
+
"-b" | "--bundle")
|
52
|
+
shift
|
53
|
+
EMBULK_BUNDLE_PATH="$1"
|
54
|
+
export EMBULK_BUNDLE_PATH
|
55
|
+
shift
|
56
|
+
break
|
57
|
+
;;
|
58
|
+
*)
|
59
|
+
shift
|
60
|
+
;;
|
61
|
+
esac
|
62
|
+
done
|
63
|
+
|
64
|
+
if test -z ${EMBULK_BUNDLE_PATH}; then
|
65
|
+
unset EMBULK_BUNDLE_PATH
|
66
|
+
unset BUNDLE_GEMFILE
|
67
|
+
GEM_HOME="`cd && pwd`/.embulk/jruby/`java -cp $0 org.jruby.Main -e 'print RbConfig::CONFIG["ruby_version"]'`"
|
68
|
+
export GEM_HOME
|
69
|
+
GEM_PATH=""
|
70
|
+
export GEM_PATH
|
71
|
+
else
|
72
|
+
if test ! -d ${EMBULK_BUNDLE_PATH}; then
|
73
|
+
echo "Directory not found: \"${EMBULK_BUNDLE_PATH}\""
|
74
|
+
exit 127
|
75
|
+
fi
|
76
|
+
BUNDLE_GEMFILE="`cd ${EMBULK_BUNDLE_PATH} && pwd`/Gemfile"
|
77
|
+
if test ! -f ${BUNDLE_GEMFILE}; then
|
78
|
+
echo "Gemfile not found: \"${BUNDLE_GEMFILE}\""
|
79
|
+
exit 127
|
80
|
+
fi
|
81
|
+
export BUNDLE_GEMFILE
|
82
|
+
unset GEM_HOME
|
83
|
+
unset GEM_PATH
|
84
|
+
fi
|
85
|
+
|
47
86
|
if test "$overwrite_optimize" = "true" -o "$default_optimize" -a "$overwrite_optimize" != "false"; then
|
48
87
|
java_args="-XX:+AggressiveOpts -XX:+UseConcMarkSweepGC $java_args"
|
49
88
|
else
|
50
89
|
java_args="-XX:+AggressiveOpts -XX:+TieredCompilation -XX:TieredStopAtLevel=1 -Xverify:none $java_args"
|
51
90
|
fi
|
52
91
|
|
53
|
-
exec java $java_args -jar "$0" $jruby_args
|
92
|
+
exec java $java_args -jar "$0" $jruby_args $embulk_args
|
54
93
|
exit 127
|
@@ -16,6 +16,8 @@ import org.embulk.config.TaskSource;
|
|
16
16
|
import org.embulk.config.ConfigDiff;
|
17
17
|
import org.embulk.config.TaskReport;
|
18
18
|
import org.embulk.plugin.PluginType;
|
19
|
+
import org.embulk.spi.FileInputRunner;
|
20
|
+
import org.embulk.spi.FileOutputRunner;
|
19
21
|
import org.embulk.spi.Schema;
|
20
22
|
import org.embulk.spi.Exec;
|
21
23
|
import org.embulk.spi.ExecSession;
|
@@ -502,10 +504,24 @@ public class BulkLoader
|
|
502
504
|
}
|
503
505
|
}
|
504
506
|
|
505
|
-
|
507
|
+
final TaskSource inputTaskSource;
|
508
|
+
if (plugins.getInputPlugin() instanceof FileInputRunner) {
|
509
|
+
inputTaskSource = FileInputRunner.getFileInputTaskSource(resume.getInputTaskSource());
|
510
|
+
}
|
511
|
+
else {
|
512
|
+
inputTaskSource = resume.getInputTaskSource();
|
513
|
+
}
|
514
|
+
plugins.getInputPlugin().cleanup(inputTaskSource, resume.getInputSchema(),
|
506
515
|
resume.getInputTaskReports().size(), successfulInputTaskReports.build());
|
507
516
|
|
508
|
-
|
517
|
+
final TaskSource outputTaskSource;
|
518
|
+
if (plugins.getOutputPlugin() instanceof FileOutputRunner) {
|
519
|
+
outputTaskSource = FileOutputRunner.getFileOutputTaskSource(resume.getOutputTaskSource());
|
520
|
+
}
|
521
|
+
else {
|
522
|
+
outputTaskSource = resume.getOutputTaskSource();
|
523
|
+
}
|
524
|
+
plugins.getOutputPlugin().cleanup(outputTaskSource, resume.getOutputSchema(),
|
509
525
|
resume.getOutputTaskReports().size(), successfulOutputTaskReports.build());
|
510
526
|
}
|
511
527
|
|
@@ -241,6 +241,9 @@ The ``columns`` option declares the list of columns. This CSV parser plugin igno
|
|
241
241
|
| date | Set date part if the format doesn’t include date part |
|
242
242
|
+----------+--------------------------------------------------------+
|
243
243
|
|
244
|
+
.. note::
|
245
|
+
|
246
|
+
The Timestamp format refers to `Ruby strftime format <https://docs.ruby-lang.org/en/2.4.0/Date.html#method-i-strftime>`_
|
244
247
|
|
245
248
|
List of types:
|
246
249
|
|
@@ -497,6 +500,10 @@ The ``column_options`` option is a map whose keys are name of columns, and value
|
|
497
500
|
| format | string | Timestamp format if type of this column is timestamp. | ``%Y-%m-%d %H:%M:%S.%6N %z`` by default |
|
498
501
|
+----------------------+---------+-------------------------------------------------------------------------------------------------------+-----------------------------------------+
|
499
502
|
|
503
|
+
.. note::
|
504
|
+
|
505
|
+
The Timestamp format refers to `Ruby strftime format <https://docs.ruby-lang.org/en/2.4.0/Date.html#method-i-strftime>`_
|
506
|
+
|
500
507
|
Example
|
501
508
|
~~~~~~~~
|
502
509
|
|
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,15 @@
|
|
1
|
+
Release 0.8.24
|
2
|
+
==================================
|
3
|
+
|
4
|
+
General Changes
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Make environment variables immutable in JRuby/Java [#604] [#670]
|
8
|
+
* Add a note on the timestamp format in embulk-doc with a link to Ruby documents [#648]
|
9
|
+
* Add clean illegal characters mode to json parser. [#651]
|
10
|
+
* embulk-core: Fix BulkLoader to pass file input/output plugins' task source to plugins' cleanup [#663]
|
11
|
+
|
12
|
+
|
13
|
+
Release Date
|
14
|
+
------------------
|
15
|
+
2017-06-14
|
@@ -1,6 +1,10 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
3
|
import com.google.common.annotations.VisibleForTesting;
|
4
|
+
import com.google.common.base.Function;
|
5
|
+
import com.google.common.collect.Lists;
|
6
|
+
import com.google.common.io.CharSource;
|
7
|
+
import com.google.common.io.CharStreams;
|
4
8
|
import org.embulk.config.Config;
|
5
9
|
import org.embulk.config.ConfigDefault;
|
6
10
|
import org.embulk.config.ConfigSource;
|
@@ -18,20 +22,50 @@ import org.embulk.spi.json.JsonParseException;
|
|
18
22
|
import org.embulk.spi.json.JsonParser;
|
19
23
|
import org.embulk.spi.type.Types;
|
20
24
|
import org.embulk.spi.util.FileInputInputStream;
|
25
|
+
import org.jruby.embed.io.ReaderInputStream;
|
26
|
+
import org.msgpack.core.Preconditions;
|
21
27
|
import org.msgpack.value.Value;
|
22
28
|
import org.slf4j.Logger;
|
23
29
|
|
30
|
+
import javax.annotation.Nullable;
|
31
|
+
import java.io.BufferedReader;
|
24
32
|
import java.io.IOException;
|
33
|
+
import java.io.InputStreamReader;
|
34
|
+
import java.util.regex.Pattern;
|
25
35
|
|
26
36
|
public class JsonParserPlugin
|
27
37
|
implements ParserPlugin
|
28
38
|
{
|
39
|
+
|
40
|
+
public enum InvalidEscapeStringPolicy
|
41
|
+
{
|
42
|
+
PASSTHROUGH("PASSTHROUGH"),
|
43
|
+
SKIP("SKIP"),
|
44
|
+
UNESCAPE("UNESCAPE");
|
45
|
+
|
46
|
+
private final String string;
|
47
|
+
|
48
|
+
private InvalidEscapeStringPolicy(String string)
|
49
|
+
{
|
50
|
+
this.string = string;
|
51
|
+
}
|
52
|
+
|
53
|
+
public String getString()
|
54
|
+
{
|
55
|
+
return string;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
29
59
|
public interface PluginTask
|
30
60
|
extends Task
|
31
61
|
{
|
32
62
|
@Config("stop_on_invalid_record")
|
33
63
|
@ConfigDefault("false")
|
34
64
|
boolean getStopOnInvalidRecord();
|
65
|
+
|
66
|
+
@Config("invalid_string_escapes")
|
67
|
+
@ConfigDefault("\"PASSTHROUGH\"")
|
68
|
+
InvalidEscapeStringPolicy getInvalidEscapeStringPolicy();
|
35
69
|
}
|
36
70
|
|
37
71
|
private final Logger log;
|
@@ -66,7 +100,7 @@ public class JsonParserPlugin
|
|
66
100
|
FileInputInputStream in = new FileInputInputStream(input)) {
|
67
101
|
while (in.nextFile()) {
|
68
102
|
boolean evenOneJsonParsed = false;
|
69
|
-
try (JsonParser.Stream stream = newJsonStream(in)) {
|
103
|
+
try (JsonParser.Stream stream = newJsonStream(in, task)) {
|
70
104
|
Value value;
|
71
105
|
while ((value = stream.next()) != null) {
|
72
106
|
try {
|
@@ -107,10 +141,87 @@ public class JsonParserPlugin
|
|
107
141
|
return new PageBuilder(Exec.getBufferAllocator(), schema, output);
|
108
142
|
}
|
109
143
|
|
110
|
-
private JsonParser.Stream newJsonStream(FileInputInputStream in)
|
144
|
+
private JsonParser.Stream newJsonStream(FileInputInputStream in, PluginTask task)
|
111
145
|
throws IOException
|
112
146
|
{
|
113
|
-
|
147
|
+
InvalidEscapeStringPolicy policy = task.getInvalidEscapeStringPolicy();
|
148
|
+
switch (policy) {
|
149
|
+
case SKIP:
|
150
|
+
case UNESCAPE:
|
151
|
+
Iterable<CharSource> lines = Lists.transform(CharStreams.readLines(new BufferedReader(new InputStreamReader(in))),
|
152
|
+
invalidEscapeStringFunction(policy));
|
153
|
+
return new JsonParser().open(new ReaderInputStream(CharSource.concat(lines).openStream()));
|
154
|
+
case PASSTHROUGH:
|
155
|
+
default:
|
156
|
+
return new JsonParser().open(in);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
Function<String, CharSource> invalidEscapeStringFunction(final InvalidEscapeStringPolicy policy)
|
161
|
+
{
|
162
|
+
return new Function<String, CharSource>()
|
163
|
+
{
|
164
|
+
final Pattern digitsPattern = Pattern.compile("\\p{XDigit}+");
|
165
|
+
|
166
|
+
@Override
|
167
|
+
public CharSource apply(@Nullable String input)
|
168
|
+
{
|
169
|
+
Preconditions.checkNotNull(input);
|
170
|
+
if (policy == InvalidEscapeStringPolicy.PASSTHROUGH) {
|
171
|
+
return CharSource.wrap(input);
|
172
|
+
}
|
173
|
+
StringBuilder builder = new StringBuilder();
|
174
|
+
char[] charArray = input.toCharArray();
|
175
|
+
for (int characterIndex = 0; characterIndex < charArray.length; characterIndex++) {
|
176
|
+
char c = charArray[characterIndex];
|
177
|
+
if (c == '\\') {
|
178
|
+
if (charArray.length > characterIndex + 1) {
|
179
|
+
char next = charArray[characterIndex + 1];
|
180
|
+
switch (next) {
|
181
|
+
case 'b':
|
182
|
+
case 'f':
|
183
|
+
case 'n':
|
184
|
+
case 'r':
|
185
|
+
case 't':
|
186
|
+
case '"':
|
187
|
+
case '\\':
|
188
|
+
case '/':
|
189
|
+
builder.append(c);
|
190
|
+
break;
|
191
|
+
case 'u': // hexstring such as \u0001
|
192
|
+
if (charArray.length > characterIndex + 5) {
|
193
|
+
char[] hexChars = {charArray[characterIndex + 2], charArray[characterIndex + 3], charArray[characterIndex + 4],
|
194
|
+
charArray[characterIndex + 5]};
|
195
|
+
String hexString = new String(hexChars);
|
196
|
+
if (digitsPattern.matcher(hexString).matches()) {
|
197
|
+
builder.append(c);
|
198
|
+
} else {
|
199
|
+
if (policy == InvalidEscapeStringPolicy.SKIP) {
|
200
|
+
// remove \\u
|
201
|
+
characterIndex++;
|
202
|
+
}
|
203
|
+
}
|
204
|
+
}
|
205
|
+
break;
|
206
|
+
default:
|
207
|
+
switch (policy) {
|
208
|
+
case SKIP:
|
209
|
+
characterIndex++;
|
210
|
+
break;
|
211
|
+
case UNESCAPE:
|
212
|
+
break;
|
213
|
+
}
|
214
|
+
break;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
}
|
218
|
+
else {
|
219
|
+
builder.append(c);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
return CharSource.wrap(builder.toString());
|
223
|
+
}
|
224
|
+
};
|
114
225
|
}
|
115
226
|
|
116
227
|
static class JsonRecordValidateException
|
@@ -1,10 +1,12 @@
|
|
1
1
|
package org.embulk.standards;
|
2
2
|
|
3
3
|
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.io.CharSource;
|
4
5
|
import org.embulk.EmbulkTestRuntime;
|
5
6
|
import org.embulk.config.ConfigSource;
|
6
7
|
import org.embulk.config.TaskSource;
|
7
8
|
import org.embulk.spi.DataException;
|
9
|
+
import org.embulk.spi.Exec;
|
8
10
|
import org.embulk.spi.FileInput;
|
9
11
|
import org.embulk.spi.ParserPlugin;
|
10
12
|
import org.embulk.spi.Schema;
|
@@ -22,6 +24,9 @@ import java.io.InputStream;
|
|
22
24
|
import java.util.List;
|
23
25
|
import java.util.Map;
|
24
26
|
|
27
|
+
import static org.embulk.standards.JsonParserPlugin.InvalidEscapeStringPolicy.PASSTHROUGH;
|
28
|
+
import static org.embulk.standards.JsonParserPlugin.InvalidEscapeStringPolicy.SKIP;
|
29
|
+
import static org.embulk.standards.JsonParserPlugin.InvalidEscapeStringPolicy.UNESCAPE;
|
25
30
|
import static org.junit.Assert.assertEquals;
|
26
31
|
import static org.junit.Assert.assertTrue;
|
27
32
|
import static org.junit.Assert.fail;
|
@@ -48,6 +53,17 @@ public class TestJsonParserPlugin
|
|
48
53
|
output = new MockPageOutput();
|
49
54
|
}
|
50
55
|
|
56
|
+
@Test
|
57
|
+
public void checkDefaultValues()
|
58
|
+
{
|
59
|
+
ConfigSource config = Exec.newConfigSource();
|
60
|
+
|
61
|
+
JsonParserPlugin.PluginTask task = config.loadConfig(JsonParserPlugin.PluginTask.class);
|
62
|
+
assertEquals(false, task.getStopOnInvalidRecord());
|
63
|
+
assertEquals(JsonParserPlugin.InvalidEscapeStringPolicy.PASSTHROUGH, task.getInvalidEscapeStringPolicy());
|
64
|
+
}
|
65
|
+
|
66
|
+
|
51
67
|
@Test
|
52
68
|
public void readNormalJson()
|
53
69
|
throws Exception
|
@@ -132,6 +148,172 @@ public class TestJsonParserPlugin
|
|
132
148
|
}
|
133
149
|
}
|
134
150
|
|
151
|
+
@Test
|
152
|
+
public void useDefaultInvalidEscapeStringFunction()
|
153
|
+
throws Exception
|
154
|
+
{
|
155
|
+
try {
|
156
|
+
transaction(config, fileInput(
|
157
|
+
"{\"\\a\":\"b\"}\\" // throw DataException
|
158
|
+
));
|
159
|
+
fail();
|
160
|
+
}
|
161
|
+
catch (Throwable t) {
|
162
|
+
assertTrue(t instanceof DataException);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
@Test
|
167
|
+
public void usePassthroughInvalidEscapeStringFunction()
|
168
|
+
throws Exception
|
169
|
+
{
|
170
|
+
try {
|
171
|
+
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "PASSTHROUGH");
|
172
|
+
transaction(config, fileInput(
|
173
|
+
"{\"\\a\":\"b\"}\\" // throw DataException
|
174
|
+
));
|
175
|
+
fail();
|
176
|
+
}
|
177
|
+
catch (Throwable t) {
|
178
|
+
assertTrue(t instanceof DataException);
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
@Test
|
183
|
+
public void useSkipInvalidEscapeString()
|
184
|
+
throws Exception
|
185
|
+
{
|
186
|
+
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "SKIP");
|
187
|
+
transaction(config, fileInput(
|
188
|
+
"{\"\\a\":\"b\"}\\"
|
189
|
+
));
|
190
|
+
|
191
|
+
List<Object[]> records = Pages.toObjects(plugin.newSchema(), output.pages);
|
192
|
+
assertEquals(1, records.size());
|
193
|
+
Object[] record = records.get(0);
|
194
|
+
Map<Value, Value> map = ((Value)record[0]).asMapValue().map();
|
195
|
+
assertEquals(newString("b"), map.get(newString("")));
|
196
|
+
}
|
197
|
+
|
198
|
+
@Test
|
199
|
+
public void useUnEscapeInvalidEscapeString()
|
200
|
+
throws Exception
|
201
|
+
{
|
202
|
+
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "UNESCAPE");
|
203
|
+
transaction(config, fileInput(
|
204
|
+
"{\"\\a\":\"b\"}\\"
|
205
|
+
));
|
206
|
+
|
207
|
+
List<Object[]> records = Pages.toObjects(plugin.newSchema(), output.pages);
|
208
|
+
assertEquals(1, records.size());
|
209
|
+
Object[] record = records.get(0);
|
210
|
+
Map<Value, Value> map = ((Value)record[0]).asMapValue().map();
|
211
|
+
assertEquals(newString("b"), map.get(newString("a")));
|
212
|
+
}
|
213
|
+
|
214
|
+
@Test
|
215
|
+
public void checkInvalidEscapeStringFunction()
|
216
|
+
throws Exception
|
217
|
+
{
|
218
|
+
//PASSTHROUGH
|
219
|
+
{
|
220
|
+
String json = "{\\\"_c0\\\":true,\\\"_c1\\\":10,\\\"_c2\\\":\\\"embulk\\\",\\\"_c3\\\":{\\\"k\\\":\\\"v\\\"}}";
|
221
|
+
CharSource actual = plugin.invalidEscapeStringFunction(PASSTHROUGH).apply(json);
|
222
|
+
assertEquals(json , actual.read());
|
223
|
+
}
|
224
|
+
|
225
|
+
{
|
226
|
+
String json = "{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}\\";
|
227
|
+
CharSource actual = plugin.invalidEscapeStringFunction(PASSTHROUGH).apply(json);
|
228
|
+
assertEquals(json , actual.read());
|
229
|
+
}
|
230
|
+
|
231
|
+
{
|
232
|
+
String json = "{\"\\a\":\"b\"}\\";
|
233
|
+
CharSource actual = plugin.invalidEscapeStringFunction(PASSTHROUGH).apply(json);
|
234
|
+
assertEquals(json , actual.read());
|
235
|
+
}
|
236
|
+
|
237
|
+
//SKIP
|
238
|
+
{
|
239
|
+
String json = "{\\\"_c0\\\":true,\\\"_c1\\\":10,\\\"_c2\\\":\\\"embulk\\\",\\\"_c3\\\":{\\\"k\\\":\\\"v\\\"}}";
|
240
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
241
|
+
assertEquals(json , actual.read());
|
242
|
+
}
|
243
|
+
|
244
|
+
{
|
245
|
+
// valid charset u0001
|
246
|
+
String json = "{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}\\";
|
247
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
248
|
+
assertEquals("{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}" , actual.read());
|
249
|
+
}
|
250
|
+
|
251
|
+
|
252
|
+
{
|
253
|
+
// invalid charset \\u12xY remove forwarding backslash and u
|
254
|
+
String json = "{\"\\u12xY\":\"efg\"}\\";
|
255
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
256
|
+
assertEquals("{\"12xY\":\"efg\"}" , actual.read());
|
257
|
+
}
|
258
|
+
|
259
|
+
{
|
260
|
+
String json = "{\"\\a\":\"b\"}\\";
|
261
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
262
|
+
// backslash and `a` will removed.
|
263
|
+
assertEquals("{\"\":\"b\"}" , actual.read());
|
264
|
+
}
|
265
|
+
|
266
|
+
{
|
267
|
+
// end of lines backspash.
|
268
|
+
String json = "{\"\\a\":\"b\"}" +
|
269
|
+
"\n" +
|
270
|
+
"\\";
|
271
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
272
|
+
// backslash and `a` will removed.
|
273
|
+
assertEquals("{\"\":\"b\"}\n" , actual.read());
|
274
|
+
}
|
275
|
+
|
276
|
+
//UNESCAPE
|
277
|
+
{
|
278
|
+
String json = "{\\\"_c0\\\":true,\\\"_c1\\\":10,\\\"_c2\\\":\\\"embulk\\\",\\\"_c3\\\":{\\\"k\\\":\\\"v\\\"}}";
|
279
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
280
|
+
assertEquals(json , actual.read());
|
281
|
+
}
|
282
|
+
|
283
|
+
{
|
284
|
+
String json = "{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}\\";
|
285
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
286
|
+
assertEquals("{\"abc\b\f\n\r\t\\\\u0001\":\"efg\"}" , actual.read());
|
287
|
+
}
|
288
|
+
|
289
|
+
{
|
290
|
+
// invalid charset u000x remove forwarding backslash
|
291
|
+
String json = "{\"\\u000x\":\"efg\"}\\";
|
292
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
293
|
+
assertEquals("{\"u000x\":\"efg\"}" , actual.read());
|
294
|
+
}
|
295
|
+
|
296
|
+
|
297
|
+
{
|
298
|
+
String json = "{\"\\a\":\"b\"}\\";
|
299
|
+
CharSource actual = plugin.invalidEscapeStringFunction(UNESCAPE).apply(json);
|
300
|
+
// backslash will removed.
|
301
|
+
assertEquals("{\"a\":\"b\"}" , actual.read());
|
302
|
+
}
|
303
|
+
|
304
|
+
{
|
305
|
+
// end of lines backspash.
|
306
|
+
String json = "{\"\\a\":\"b\"}" +
|
307
|
+
"\n" +
|
308
|
+
"\\";
|
309
|
+
CharSource actual = plugin.invalidEscapeStringFunction(SKIP).apply(json);
|
310
|
+
// backslash and `a` will removed.
|
311
|
+
assertEquals("{\"\":\"b\"}\n" , actual.read());
|
312
|
+
}
|
313
|
+
|
314
|
+
|
315
|
+
}
|
316
|
+
|
135
317
|
private ConfigSource config()
|
136
318
|
{
|
137
319
|
return runtime.getExec().newConfigSource();
|
@@ -1,20 +1,19 @@
|
|
1
|
-
|
2
1
|
bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
|
3
2
|
bundle_path = nil if bundle_path.empty?
|
4
3
|
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
bundle_path = ARGV.slice!(bundle_path_index, 2)[1]
|
4
|
+
# Search for -b or --bundle, and remove it.
|
5
|
+
if ARGV.find_index {|arg| arg == '-b' || arg == '--bundle' }
|
6
|
+
ARGV.slice!(bundle_path_index, 2)[1]
|
9
7
|
end
|
10
8
|
|
11
9
|
if bundle_path
|
12
|
-
|
13
|
-
ENV['
|
10
|
+
# In the selfrun script:
|
11
|
+
# ENV['EMBULK_BUNDLE_PATH']: set through '-b' | '--bundle', or inherit from the runtime environment
|
12
|
+
# ENV['BUNDLE_GEMFILE']: set for "ENV['EMBULK_BUNDLE_PATH']/Gemfile"
|
13
|
+
# ENV['GEM_HOME']: unset
|
14
|
+
# ENV['GEM_PATH']: unset
|
14
15
|
|
15
16
|
# bundler is included in embulk-core.jar
|
16
|
-
ENV.delete('GEM_HOME')
|
17
|
-
ENV.delete('GEM_PATH')
|
18
17
|
Gem.clear_paths
|
19
18
|
require 'bundler'
|
20
19
|
|
@@ -34,17 +33,12 @@ if bundle_path
|
|
34
33
|
end
|
35
34
|
|
36
35
|
else
|
37
|
-
#
|
38
|
-
#
|
39
|
-
ENV
|
40
|
-
|
41
|
-
|
42
|
-
raise "HOME environment variable is not set."
|
43
|
-
end
|
44
|
-
ENV['GEM_HOME'] = File.expand_path File.join(user_home, '.embulk', Gem.ruby_engine, RbConfig::CONFIG['ruby_version'])
|
45
|
-
ENV['GEM_PATH'] = ''
|
36
|
+
# In the selfrun script:
|
37
|
+
# ENV['EMBULK_BUNDLE_PATH']: unset
|
38
|
+
# ENV['BUNDLE_GEMFILE']: unset
|
39
|
+
# ENV['GEM_HOME']: set for "~/.embulk/jruby/${ruby-version}"
|
40
|
+
# ENV['GEM_PATH']: set for ""
|
46
41
|
|
47
|
-
ENV.delete('BUNDLE_GEMFILE')
|
48
42
|
Gem.clear_paths # force rubygems to reload GEM_HOME
|
49
43
|
|
50
44
|
$LOAD_PATH << File.expand_path('../../', File.dirname(__FILE__))
|
data/lib/embulk/version.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
module Embulk
|
4
4
|
@@warned = false
|
5
5
|
|
6
|
-
VERSION_INTERNAL = '0.8.
|
6
|
+
VERSION_INTERNAL = '0.8.24'
|
7
7
|
|
8
8
|
DEPRECATED_MESSAGE = 'Embulk::VERSION in (J)Ruby is deprecated. Use org.embulk.EmbulkVersion::VERSION instead. If this message is from a plugin, please tell this to the author of the plugin!'
|
9
9
|
def self.const_missing(name)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.24
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,9 +150,9 @@ files:
|
|
150
150
|
- classpath/commons-compress-1.10.jar
|
151
151
|
- classpath/commons-lang-2.4.jar
|
152
152
|
- classpath/commons-lang3-3.4.jar
|
153
|
-
- classpath/embulk-cli-0.8.
|
154
|
-
- classpath/embulk-core-0.8.
|
155
|
-
- classpath/embulk-standards-0.8.
|
153
|
+
- classpath/embulk-cli-0.8.24.jar
|
154
|
+
- classpath/embulk-core-0.8.24.jar
|
155
|
+
- classpath/embulk-standards-0.8.24.jar
|
156
156
|
- classpath/guava-18.0.jar
|
157
157
|
- classpath/guice-4.0.jar
|
158
158
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -502,6 +502,7 @@ files:
|
|
502
502
|
- embulk-docs/src/release/release-0.8.21.rst
|
503
503
|
- embulk-docs/src/release/release-0.8.22.rst
|
504
504
|
- embulk-docs/src/release/release-0.8.23.rst
|
505
|
+
- embulk-docs/src/release/release-0.8.24.rst
|
505
506
|
- embulk-docs/src/release/release-0.8.3.rst
|
506
507
|
- embulk-docs/src/release/release-0.8.4.rst
|
507
508
|
- embulk-docs/src/release/release-0.8.5.rst
|