embulk 0.8.31 → 0.8.32

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,13 +16,7 @@ import org.embulk.config.ConfigSource;
16
16
  */
17
17
  public class EmbulkSetup
18
18
  {
19
- public static EmbulkRunner setup(
20
- final List<String> jrubyOptions,
21
- final Map<String, Object> systemConfigGiven,
22
- final List<String> loadPaths,
23
- final List<String> pluginPaths,
24
- final List<String> classpaths,
25
- final String bundlePath)
19
+ public static EmbulkRunner setup(final Map<String, Object> systemConfigGiven)
26
20
  {
27
21
  // NOTE: When it was in Ruby "require 'json'" was required to format the system config into a JSON string.
28
22
 
@@ -50,6 +44,6 @@ public class EmbulkSetup
50
44
  bootstrap.setSystemConfig(systemConfig);
51
45
  final EmbulkEmbed embed = bootstrap.initialize(); // see embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.
52
46
 
53
- return new EmbulkRunner(embed, jrubyOptions, loadPaths, pluginPaths, classpaths, bundlePath);
47
+ return new EmbulkRunner(embed);
54
48
  }
55
49
  }
@@ -1,8 +1,17 @@
1
1
  package org.embulk.jruby;
2
2
 
3
+ import java.net.URISyntaxException;
4
+ import java.net.URL;
5
+ import java.nio.file.Files;
6
+ import java.nio.file.Path;
7
+ import java.nio.file.Paths;
8
+ import java.security.CodeSource;
9
+ import java.security.ProtectionDomain;
3
10
  import java.util.List;
4
11
  import java.util.ArrayList;
12
+ import java.util.Collections;
5
13
  import java.util.Set;
14
+ import org.slf4j.Logger;
6
15
  import org.slf4j.ILoggerFactory;
7
16
  import com.google.common.collect.ImmutableSet;
8
17
  import com.google.inject.Module;
@@ -14,8 +23,11 @@ import com.google.inject.Injector;
14
23
  import com.google.inject.Key;
15
24
  import com.google.inject.spi.Dependency;
16
25
  import com.google.inject.spi.ProviderWithDependencies;
26
+ import org.jruby.RubyInstanceConfig;
17
27
  import org.jruby.embed.LocalContextScope;
28
+ import org.jruby.embed.LocalVariableBehavior;
18
29
  import org.jruby.embed.ScriptingContainer;
30
+ import org.jruby.util.cli.Options;
19
31
  import org.embulk.plugin.PluginSource;
20
32
  import org.embulk.config.ConfigSource;
21
33
  import org.embulk.config.ModelManager;
@@ -44,6 +56,10 @@ public class JRubyScriptingModule
44
56
  private final Injector injector;
45
57
  private final boolean useGlobalRubyRuntime;
46
58
  private final String gemHome;
59
+ private final List<String> jrubyClasspath;
60
+ private final List<String> jrubyLoadPath;
61
+ private final List<String> jrubyOptions;
62
+ private final String jrubyBundlerPluginSourceDirectory;
47
63
 
48
64
  @Inject
49
65
  public ScriptingContainerProvider(Injector injector, @ForSystemConfig ConfigSource systemConfig)
@@ -57,13 +73,101 @@ public class JRubyScriptingModule
57
73
  this.gemHome = systemConfig.get(String.class, "gem_home", null);
58
74
 
59
75
  // TODO get jruby-home from systemConfig to call jruby.container.setHomeDirectory
60
- // TODO get jruby-load-paths from systemConfig to call jruby.container.setLoadPaths
76
+
77
+ final List jrubyLoadPathNonGeneric = systemConfig.get(List.class, "jruby_load_path", null);
78
+ final ArrayList<String> jrubyLoadPathBuilt = new ArrayList<String>();
79
+ if (jrubyLoadPathNonGeneric != null) {
80
+ for (final Object oneJRubyLoadPath : jrubyLoadPathNonGeneric) {
81
+ if (oneJRubyLoadPath instanceof String) {
82
+ jrubyLoadPathBuilt.add((String) oneJRubyLoadPath);
83
+ }
84
+ else {
85
+ // It should happen only in very irregular cases. Okay to create |Logger| every time.
86
+ Logger logger = injector.getInstance(ILoggerFactory.class).getLogger("init");
87
+ logger.warn("System config \"jruby_load_path\" contains non-String.");
88
+ jrubyLoadPathBuilt.add(oneJRubyLoadPath.toString());
89
+ }
90
+ }
91
+ }
92
+ this.jrubyLoadPath = Collections.unmodifiableList(jrubyLoadPathBuilt);
93
+
94
+ final List jrubyClasspathNonGeneric = systemConfig.get(List.class, "jruby_classpath", new ArrayList());
95
+ final ArrayList<String> jrubyClasspathBuilt = new ArrayList<String>();
96
+ if (jrubyClasspathNonGeneric != null) {
97
+ for (final Object oneJRubyClasspath : jrubyClasspathNonGeneric) {
98
+ if (oneJRubyClasspath instanceof String) {
99
+ jrubyClasspathBuilt.add((String) oneJRubyClasspath);
100
+ }
101
+ else {
102
+ // It should happen only in very irregular cases. Okay to create |Logger| every time.
103
+ Logger logger = injector.getInstance(ILoggerFactory.class).getLogger("init");
104
+ logger.warn("System config \"jruby_classpath\" contains non-String.");
105
+ jrubyClasspathBuilt.add(oneJRubyClasspath.toString());
106
+ }
107
+ }
108
+ }
109
+ this.jrubyClasspath = Collections.unmodifiableList(jrubyClasspathBuilt);
110
+
111
+ final List jrubyOptionsNonGeneric = systemConfig.get(List.class, "jruby_command_line_options", null);
112
+ final ArrayList<String> jrubyOptionsBuilt = new ArrayList<String>();
113
+ if (jrubyOptionsNonGeneric != null) {
114
+ for (final Object oneJRubyOption : jrubyOptionsNonGeneric) {
115
+ if (oneJRubyOption instanceof String) {
116
+ jrubyOptionsBuilt.add((String) oneJRubyOption);
117
+ }
118
+ else {
119
+ // It should happen only in very irregular cases. Okay to create |Logger| every time.
120
+ Logger logger = injector.getInstance(ILoggerFactory.class).getLogger("init");
121
+ logger.warn("System config \"jruby_command_line_options\" contains non-String.");
122
+ jrubyOptionsBuilt.add(oneJRubyOption.toString());
123
+ }
124
+ }
125
+ }
126
+ this.jrubyOptions = Collections.unmodifiableList(jrubyOptionsBuilt);
127
+
128
+ this.jrubyBundlerPluginSourceDirectory =
129
+ systemConfig.get(String.class, "jruby_global_bundler_plugin_source_directory", null);
61
130
  }
62
131
 
63
132
  public ScriptingContainer get()
64
133
  {
65
134
  LocalContextScope scope = (useGlobalRubyRuntime ? LocalContextScope.SINGLETON : LocalContextScope.SINGLETHREAD);
66
- ScriptingContainer jruby = new ScriptingContainer(scope);
135
+ ScriptingContainer jruby = new ScriptingContainer(scope, LocalVariableBehavior.PERSISTENT);
136
+ final RubyInstanceConfig jrubyInstanceConfig = jruby.getProvider().getRubyInstanceConfig();
137
+ for (final String jrubyOption : this.jrubyOptions) {
138
+ try {
139
+ processJRubyOption(jrubyOption, jrubyInstanceConfig);
140
+ }
141
+ catch (UnrecognizedJRubyOptionException ex) {
142
+ final Logger logger = this.injector.getInstance(ILoggerFactory.class).getLogger("init");
143
+ logger.error("The \"-R\" option(s) are not recognized in Embulk: -R" + jrubyOption +
144
+ ". Please add your requests at: https://github.com/embulk/embulk/issues/707", ex);
145
+ throw new RuntimeException(ex);
146
+
147
+ }
148
+ catch (NotWorkingJRubyOptionException ex) {
149
+ final Logger logger = this.injector.getInstance(ILoggerFactory.class).getLogger("init");
150
+ logger.warn("The \"-R\" option(s) do not work in Embulk: -R" + jrubyOption + ".", ex);
151
+ }
152
+ }
153
+
154
+ setBundlerPluginSourceDirectory(jruby, this.jrubyBundlerPluginSourceDirectory);
155
+
156
+ for (final String oneJRubyLoadPath : this.jrubyLoadPath) {
157
+ // ruby script directory (use unshift to make it highest priority)
158
+ jruby.put("__internal_load_path__", oneJRubyLoadPath);
159
+ // TODO: Check if $LOAD_PATH already contains it.
160
+ jruby.runScriptlet("$LOAD_PATH.unshift File.expand_path(__internal_load_path__)");
161
+ jruby.remove("__internal_load_path__");
162
+ }
163
+
164
+ for (final String oneJRubyClasspath : this.jrubyClasspath) {
165
+ jruby.put("__internal_classpath__", oneJRubyClasspath);
166
+ // $CLASSPATH object doesn't have concat method
167
+ // TODO: Check if $CLASSPATH already contains it.
168
+ jruby.runScriptlet("$CLASSPATH << __internal_classpath__");
169
+ jruby.remove("__internal_classpath__");
170
+ }
67
171
 
68
172
  // Search embulk/java/bootstrap.rb from a $LOAD_PATH.
69
173
  // $LOAD_PATH is set by lib/embulk/command/embulk_run.rb if Embulk starts
@@ -128,5 +232,158 @@ public class JRubyScriptingModule
128
232
  Dependency.get(Key.get(ModelManager.class)),
129
233
  Dependency.get(Key.get(BufferAllocator.class)));
130
234
  }
235
+
236
+ private static final class UnrecognizedJRubyOptionException extends Exception {}
237
+ private static final class NotWorkingJRubyOptionException extends Exception {}
238
+ private static final class UnrecognizedJRubyLoadPathException extends Exception {
239
+ public UnrecognizedJRubyLoadPathException(final String message)
240
+ {
241
+ super(message);
242
+ }
243
+
244
+ public UnrecognizedJRubyLoadPathException(final String message, final Throwable cause)
245
+ {
246
+ super(message, cause);
247
+ }
248
+ }
249
+
250
+ private void setBundlerPluginSourceDirectory(final ScriptingContainer jruby, final String directory)
251
+ {
252
+ if (directory != null) {
253
+ /* Environment variables are set in the selfrun script or bin/embulk:
254
+ * ENV['EMBULK_BUNDLE_PATH']: set through '-b' | '--bundle', or inherit from the runtime environment
255
+ * ENV['BUNDLE_GEMFILE']: set for "ENV['EMBULK_BUNDLE_PATH']/Gemfile"
256
+ * ENV['GEM_HOME']: unset
257
+ * ENV['GEM_PATH']: unset
258
+ */
259
+
260
+ // bundler is included in embulk-core.jar
261
+ jruby.runScriptlet("Gem.clear_paths");
262
+ jruby.runScriptlet("require 'bundler'");
263
+
264
+ jruby.runScriptlet("Bundler.load.setup_environment");
265
+ jruby.runScriptlet("require 'bundler/setup'");
266
+ // since here, `require` may load files of different (newer) embulk versions
267
+ // especially following 'embulk/command/embulk_main'.
268
+
269
+ // NOTE: It is intentionally not done by building a Ruby statement string from |directory|.
270
+ // It can cause insecure injections.
271
+ //
272
+ // add bundle directory path to load local plugins at ./embulk
273
+ jruby.put("__internal_bundler_plugin_source_directory__", directory);
274
+ jruby.runScriptlet("$LOAD_PATH << File.expand_path(__internal_bundler_plugin_source_directory__)");
275
+ jruby.remove("__internal_bundler_plugin_source_directory__");
276
+ }
277
+ else {
278
+ /* Environment variables are set in the selfrun script or bin/embulk:
279
+ * ENV['EMBULK_BUNDLE_PATH']: unset
280
+ * ENV['BUNDLE_GEMFILE']: unset
281
+ * ENV['GEM_HOME']: set for "~/.embulk/jruby/${ruby-version}"
282
+ * ENV['GEM_PATH']: set for ""
283
+ */
284
+
285
+ jruby.runScriptlet("Gem.clear_paths"); // force rubygems to reload GEM_HOME
286
+
287
+ // NOTE: The path from |buildJRubyLoadPath()| is added in $LOAD_PATH just in case.
288
+ // Though it is not mandatory just to run "embulk_main.rb", it may be required in later steps.
289
+ //
290
+ // NOTE: It is intentionally not done by building a Ruby statement string from |buildJRubyLoadPath()|.
291
+ // It can cause insecure injections.
292
+ //
293
+ // NOTE: It was written in Ruby as follows:
294
+ // $LOAD_PATH << File.expand_path('../../', File.dirname(__FILE__))
295
+ final String jrubyLoadPath;
296
+ try {
297
+ jrubyLoadPath = buildJRubyLoadPath();
298
+ }
299
+ catch (UnrecognizedJRubyLoadPathException ex) {
300
+ final Logger logger = this.injector.getInstance(ILoggerFactory.class).getLogger("init");
301
+ logger.error("Failed to retrieve Embulk's location.", ex);
302
+ throw new RuntimeException(ex);
303
+ }
304
+ jruby.put("__internal_load_path__", jrubyLoadPath);
305
+ jruby.runScriptlet("$LOAD_PATH << File.expand_path(__internal_load_path__)");
306
+ jruby.remove("__internal_load_path__");
307
+ }
308
+ }
309
+
310
+ private static void processJRubyOption(final String jrubyOption, final RubyInstanceConfig jrubyInstanceConfig)
311
+ throws UnrecognizedJRubyOptionException, NotWorkingJRubyOptionException
312
+ {
313
+ if (jrubyOption.charAt(0) != '-') {
314
+ throw new UnrecognizedJRubyOptionException();
315
+ }
316
+
317
+ for (int index = 1; index < jrubyOption.length(); ++index) {
318
+ switch (jrubyOption.charAt(index)) {
319
+ case '-':
320
+ if (jrubyOption.equals("--dev")) {
321
+ // They are not all of "--dev", but they are most possible configurations after JVM boot.
322
+ Options.COMPILE_INVOKEDYNAMIC.force("false"); // NOTE: Options is global.
323
+ jrubyInstanceConfig.setCompileMode(RubyInstanceConfig.CompileMode.OFF);
324
+ return;
325
+ }
326
+ else if (jrubyOption.equals("--client")) {
327
+ throw new NotWorkingJRubyOptionException();
328
+ }
329
+ else if (jrubyOption.equals("--server")) {
330
+ throw new NotWorkingJRubyOptionException();
331
+ }
332
+ throw new UnrecognizedJRubyOptionException();
333
+ default:
334
+ throw new UnrecognizedJRubyOptionException();
335
+ }
336
+ }
337
+ }
338
+
339
+ /**
340
+ * Returns a path to be added in JRuby's $LOAD_PATH.
341
+ *
342
+ * In case Embulk runs from the Embulk JAR file (normal case):
343
+ * "file:/some/directory/embulk.jar!"
344
+ *
345
+ * In case Embulk runs out of a JAR file (irregular case):
346
+ * "/some/directory"
347
+ */
348
+ private static String buildJRubyLoadPath()
349
+ throws UnrecognizedJRubyLoadPathException
350
+ {
351
+ final ProtectionDomain protectionDomain;
352
+ try {
353
+ protectionDomain = JRubyScriptingModule.class.getProtectionDomain();
354
+ }
355
+ catch (SecurityException ex) {
356
+ throw new UnrecognizedJRubyLoadPathException("Failed to achieve ProtectionDomain", ex);
357
+ }
358
+
359
+ final CodeSource codeSource = protectionDomain.getCodeSource();
360
+ if (codeSource == null) {
361
+ throw new UnrecognizedJRubyLoadPathException("Failed to achieve CodeSource");
362
+ }
363
+
364
+ final URL locationUrl = codeSource.getLocation();
365
+ if (locationUrl == null) {
366
+ throw new UnrecognizedJRubyLoadPathException("Failed to achieve location");
367
+ }
368
+ else if (!locationUrl.getProtocol().equals("file")) {
369
+ throw new UnrecognizedJRubyLoadPathException("Invalid location: " + locationUrl.toString());
370
+ }
371
+
372
+ final Path locationPath;
373
+ try {
374
+ locationPath = Paths.get(locationUrl.toURI());
375
+ }
376
+ catch (URISyntaxException ex) {
377
+ throw new UnrecognizedJRubyLoadPathException("Invalid location: " + locationUrl.toString(), ex);
378
+ }
379
+
380
+ if (Files.isDirectory(locationPath)) { // Out of a JAR file
381
+ System.err.println("[WARN] Embulk looks running out of the Embulk jar file. It is unsupported.");
382
+ return locationPath.toString();
383
+ }
384
+
385
+ // TODO: Consider checking the file is really a JAR file.
386
+ return locationUrl.toString() + "!"; // Inside the Embulk JAR file
387
+ }
131
388
  }
132
389
  }
@@ -3,6 +3,22 @@ package org.embulk.spi;
3
3
  import java.util.List;
4
4
  import org.msgpack.value.ImmutableValue;
5
5
 
6
+ /**
7
+ * Page is an in-process (in-JVM) container of data records.
8
+ *
9
+ * It serializes records to byte[] (in org.embulk.spi.Buffer) in order to:
10
+ * A) Avoid slowness by handling many Java Objects
11
+ * B) Avoid complexity by type-safe primitive arrays
12
+ * C) Track memory consumption by records
13
+ * D) Use off-heap memory
14
+ *
15
+ * (C) and (D) may not be so meaningful as of v0.7+ (or since earlier) as recent Embulk unlikely
16
+ * allocates so many Pages at the same time. Recent Embulk is streaming-driven instead of
17
+ * multithreaded queue-based.
18
+ *
19
+ * Page is NOT for inter-process communication. For multi-process execution such as MapReduce
20
+ * Executor, the executor plugin takes responsibility about interoperable serialization.
21
+ */
6
22
  public class Page
7
23
  {
8
24
  private final Buffer buffer;
@@ -0,0 +1,102 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import org.junit.Test;
4
+ import org.msgpack.value.Value;
5
+ import org.msgpack.value.ValueType;
6
+
7
+ import static org.junit.Assert.assertEquals;
8
+ import static org.junit.Assert.assertFalse;
9
+ import static org.junit.Assert.assertTrue;
10
+ import static org.junit.Assert.fail;
11
+
12
+ public class TestJsonParser
13
+ {
14
+ @Test
15
+ public void testString() throws Exception
16
+ {
17
+ final JsonParser parser = new JsonParser();
18
+ final Value msgpackValue = parser.parse("\"foobar\"");
19
+ assertFalse(msgpackValue.getValueType().isNumberType());
20
+ assertTrue(msgpackValue.getValueType().isStringType());
21
+ assertEquals("foobar", msgpackValue.asStringValue().toString());
22
+ }
23
+
24
+ @Test(expected = JsonParseException.class)
25
+ public void testStringUnquoted() throws Exception
26
+ {
27
+ final JsonParser parser = new JsonParser();
28
+ parser.parse("foobar");
29
+ }
30
+
31
+ @Test
32
+ public void testOrdinaryInteger() throws Exception
33
+ {
34
+ final JsonParser parser = new JsonParser();
35
+ final Value msgpackValue = parser.parse("12345");
36
+ assertTrue(msgpackValue.getValueType().isNumberType());
37
+ assertTrue(msgpackValue.getValueType().isIntegerType());
38
+ assertFalse(msgpackValue.getValueType().isFloatType());
39
+ assertFalse(msgpackValue.getValueType().isStringType());
40
+ assertEquals(12345, msgpackValue.asIntegerValue().asInt());
41
+ }
42
+
43
+ @Test
44
+ public void testExponentialInteger1() throws Exception
45
+ {
46
+ final JsonParser parser = new JsonParser();
47
+ final Value msgpackValue = parser.parse("12345e3");
48
+ assertTrue(msgpackValue.getValueType().isNumberType());
49
+ // TODO: Consider this needs to be an integer?
50
+ // See: https://github.com/embulk/embulk/issues/775
51
+ assertTrue(msgpackValue.getValueType().isFloatType());
52
+ assertFalse(msgpackValue.getValueType().isIntegerType());
53
+ assertFalse(msgpackValue.getValueType().isStringType());
54
+ assertEquals(12345000.0, msgpackValue.asFloatValue().toDouble(), 0.000000001);
55
+ // Not sure this |toString| is to be tested...
56
+ assertEquals("1.2345E7", msgpackValue.asFloatValue().toString());
57
+ }
58
+
59
+ @Test
60
+ public void testExponentialInteger2() throws Exception
61
+ {
62
+ final JsonParser parser = new JsonParser();
63
+ final Value msgpackValue = parser.parse("123e2");
64
+ assertTrue(msgpackValue.getValueType().isNumberType());
65
+ // TODO: Consider this needs to be an integer?
66
+ // See: https://github.com/embulk/embulk/issues/775
67
+ assertTrue(msgpackValue.getValueType().isFloatType());
68
+ assertFalse(msgpackValue.getValueType().isIntegerType());
69
+ assertFalse(msgpackValue.getValueType().isStringType());
70
+ assertEquals(12300.0, msgpackValue.asFloatValue().toDouble(), 0.000000001);
71
+ // Not sure this |toString| is to be tested...
72
+ assertEquals("12300.0", msgpackValue.asFloatValue().toString());
73
+ }
74
+
75
+ @Test
76
+ public void testOrdinaryFloat() throws Exception
77
+ {
78
+ final JsonParser parser = new JsonParser();
79
+ final Value msgpackValue = parser.parse("12345.12");
80
+ assertTrue(msgpackValue.getValueType().isNumberType());
81
+ assertTrue(msgpackValue.getValueType().isFloatType());
82
+ assertFalse(msgpackValue.getValueType().isIntegerType());
83
+ assertFalse(msgpackValue.getValueType().isStringType());
84
+ assertEquals(12345.12, msgpackValue.asFloatValue().toDouble(), 0.000000001);
85
+ // Not sure this |toString| is to be tested...
86
+ assertEquals("12345.12", msgpackValue.asFloatValue().toString());
87
+ }
88
+
89
+ @Test
90
+ public void testExponentialFloat() throws Exception
91
+ {
92
+ final JsonParser parser = new JsonParser();
93
+ final Value msgpackValue = parser.parse("1.234512E4");
94
+ assertTrue(msgpackValue.getValueType().isNumberType());
95
+ assertTrue(msgpackValue.getValueType().isFloatType());
96
+ assertFalse(msgpackValue.getValueType().isIntegerType());
97
+ assertFalse(msgpackValue.getValueType().isStringType());
98
+ assertEquals(12345.12, msgpackValue.asFloatValue().toDouble(), 0.000000001);
99
+ // Not sure this |toString| is to be tested...
100
+ assertEquals("12345.12", msgpackValue.asFloatValue().toString());
101
+ }
102
+ }