embulk 0.8.31-java → 0.8.32-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,13 +16,7 @@ import org.embulk.config.ConfigSource;
16
16
  */
17
17
  public class EmbulkSetup
18
18
  {
19
- public static EmbulkRunner setup(
20
- final List<String> jrubyOptions,
21
- final Map<String, Object> systemConfigGiven,
22
- final List<String> loadPaths,
23
- final List<String> pluginPaths,
24
- final List<String> classpaths,
25
- final String bundlePath)
19
+ public static EmbulkRunner setup(final Map<String, Object> systemConfigGiven)
26
20
  {
27
21
  // NOTE: When it was in Ruby "require 'json'" was required to format the system config into a JSON string.
28
22
 
@@ -50,6 +44,6 @@ public class EmbulkSetup
50
44
  bootstrap.setSystemConfig(systemConfig);
51
45
  final EmbulkEmbed embed = bootstrap.initialize(); // see embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.
52
46
 
53
- return new EmbulkRunner(embed, jrubyOptions, loadPaths, pluginPaths, classpaths, bundlePath);
47
+ return new EmbulkRunner(embed);
54
48
  }
55
49
  }
@@ -1,8 +1,17 @@
1
1
  package org.embulk.jruby;
2
2
 
3
+ import java.net.URISyntaxException;
4
+ import java.net.URL;
5
+ import java.nio.file.Files;
6
+ import java.nio.file.Path;
7
+ import java.nio.file.Paths;
8
+ import java.security.CodeSource;
9
+ import java.security.ProtectionDomain;
3
10
  import java.util.List;
4
11
  import java.util.ArrayList;
12
+ import java.util.Collections;
5
13
  import java.util.Set;
14
+ import org.slf4j.Logger;
6
15
  import org.slf4j.ILoggerFactory;
7
16
  import com.google.common.collect.ImmutableSet;
8
17
  import com.google.inject.Module;
@@ -14,8 +23,11 @@ import com.google.inject.Injector;
14
23
  import com.google.inject.Key;
15
24
  import com.google.inject.spi.Dependency;
16
25
  import com.google.inject.spi.ProviderWithDependencies;
26
+ import org.jruby.RubyInstanceConfig;
17
27
  import org.jruby.embed.LocalContextScope;
28
+ import org.jruby.embed.LocalVariableBehavior;
18
29
  import org.jruby.embed.ScriptingContainer;
30
+ import org.jruby.util.cli.Options;
19
31
  import org.embulk.plugin.PluginSource;
20
32
  import org.embulk.config.ConfigSource;
21
33
  import org.embulk.config.ModelManager;
@@ -44,6 +56,10 @@ public class JRubyScriptingModule
44
56
  private final Injector injector;
45
57
  private final boolean useGlobalRubyRuntime;
46
58
  private final String gemHome;
59
+ private final List<String> jrubyClasspath;
60
+ private final List<String> jrubyLoadPath;
61
+ private final List<String> jrubyOptions;
62
+ private final String jrubyBundlerPluginSourceDirectory;
47
63
 
48
64
  @Inject
49
65
  public ScriptingContainerProvider(Injector injector, @ForSystemConfig ConfigSource systemConfig)
@@ -57,13 +73,101 @@ public class JRubyScriptingModule
57
73
  this.gemHome = systemConfig.get(String.class, "gem_home", null);
58
74
 
59
75
  // TODO get jruby-home from systemConfig to call jruby.container.setHomeDirectory
60
- // TODO get jruby-load-paths from systemConfig to call jruby.container.setLoadPaths
76
+
77
+ final List jrubyLoadPathNonGeneric = systemConfig.get(List.class, "jruby_load_path", null);
78
+ final ArrayList<String> jrubyLoadPathBuilt = new ArrayList<String>();
79
+ if (jrubyLoadPathNonGeneric != null) {
80
+ for (final Object oneJRubyLoadPath : jrubyLoadPathNonGeneric) {
81
+ if (oneJRubyLoadPath instanceof String) {
82
+ jrubyLoadPathBuilt.add((String) oneJRubyLoadPath);
83
+ }
84
+ else {
85
+ // It should happen only in very irregular cases. Okay to create |Logger| every time.
86
+ Logger logger = injector.getInstance(ILoggerFactory.class).getLogger("init");
87
+ logger.warn("System config \"jruby_load_path\" contains non-String.");
88
+ jrubyLoadPathBuilt.add(oneJRubyLoadPath.toString());
89
+ }
90
+ }
91
+ }
92
+ this.jrubyLoadPath = Collections.unmodifiableList(jrubyLoadPathBuilt);
93
+
94
+ final List jrubyClasspathNonGeneric = systemConfig.get(List.class, "jruby_classpath", new ArrayList());
95
+ final ArrayList<String> jrubyClasspathBuilt = new ArrayList<String>();
96
+ if (jrubyClasspathNonGeneric != null) {
97
+ for (final Object oneJRubyClasspath : jrubyClasspathNonGeneric) {
98
+ if (oneJRubyClasspath instanceof String) {
99
+ jrubyClasspathBuilt.add((String) oneJRubyClasspath);
100
+ }
101
+ else {
102
+ // It should happen only in very irregular cases. Okay to create |Logger| every time.
103
+ Logger logger = injector.getInstance(ILoggerFactory.class).getLogger("init");
104
+ logger.warn("System config \"jruby_classpath\" contains non-String.");
105
+ jrubyClasspathBuilt.add(oneJRubyClasspath.toString());
106
+ }
107
+ }
108
+ }
109
+ this.jrubyClasspath = Collections.unmodifiableList(jrubyClasspathBuilt);
110
+
111
+ final List jrubyOptionsNonGeneric = systemConfig.get(List.class, "jruby_command_line_options", null);
112
+ final ArrayList<String> jrubyOptionsBuilt = new ArrayList<String>();
113
+ if (jrubyOptionsNonGeneric != null) {
114
+ for (final Object oneJRubyOption : jrubyOptionsNonGeneric) {
115
+ if (oneJRubyOption instanceof String) {
116
+ jrubyOptionsBuilt.add((String) oneJRubyOption);
117
+ }
118
+ else {
119
+ // It should happen only in very irregular cases. Okay to create |Logger| every time.
120
+ Logger logger = injector.getInstance(ILoggerFactory.class).getLogger("init");
121
+ logger.warn("System config \"jruby_command_line_options\" contains non-String.");
122
+ jrubyOptionsBuilt.add(oneJRubyOption.toString());
123
+ }
124
+ }
125
+ }
126
+ this.jrubyOptions = Collections.unmodifiableList(jrubyOptionsBuilt);
127
+
128
+ this.jrubyBundlerPluginSourceDirectory =
129
+ systemConfig.get(String.class, "jruby_global_bundler_plugin_source_directory", null);
61
130
  }
62
131
 
63
132
  public ScriptingContainer get()
64
133
  {
65
134
  LocalContextScope scope = (useGlobalRubyRuntime ? LocalContextScope.SINGLETON : LocalContextScope.SINGLETHREAD);
66
- ScriptingContainer jruby = new ScriptingContainer(scope);
135
+ ScriptingContainer jruby = new ScriptingContainer(scope, LocalVariableBehavior.PERSISTENT);
136
+ final RubyInstanceConfig jrubyInstanceConfig = jruby.getProvider().getRubyInstanceConfig();
137
+ for (final String jrubyOption : this.jrubyOptions) {
138
+ try {
139
+ processJRubyOption(jrubyOption, jrubyInstanceConfig);
140
+ }
141
+ catch (UnrecognizedJRubyOptionException ex) {
142
+ final Logger logger = this.injector.getInstance(ILoggerFactory.class).getLogger("init");
143
+ logger.error("The \"-R\" option(s) are not recognized in Embulk: -R" + jrubyOption +
144
+ ". Please add your requests at: https://github.com/embulk/embulk/issues/707", ex);
145
+ throw new RuntimeException(ex);
146
+
147
+ }
148
+ catch (NotWorkingJRubyOptionException ex) {
149
+ final Logger logger = this.injector.getInstance(ILoggerFactory.class).getLogger("init");
150
+ logger.warn("The \"-R\" option(s) do not work in Embulk: -R" + jrubyOption + ".", ex);
151
+ }
152
+ }
153
+
154
+ setBundlerPluginSourceDirectory(jruby, this.jrubyBundlerPluginSourceDirectory);
155
+
156
+ for (final String oneJRubyLoadPath : this.jrubyLoadPath) {
157
+ // ruby script directory (use unshift to make it highest priority)
158
+ jruby.put("__internal_load_path__", oneJRubyLoadPath);
159
+ // TODO: Check if $LOAD_PATH already contains it.
160
+ jruby.runScriptlet("$LOAD_PATH.unshift File.expand_path(__internal_load_path__)");
161
+ jruby.remove("__internal_load_path__");
162
+ }
163
+
164
+ for (final String oneJRubyClasspath : this.jrubyClasspath) {
165
+ jruby.put("__internal_classpath__", oneJRubyClasspath);
166
+ // $CLASSPATH object doesn't have concat method
167
+ // TODO: Check if $CLASSPATH already contains it.
168
+ jruby.runScriptlet("$CLASSPATH << __internal_classpath__");
169
+ jruby.remove("__internal_classpath__");
170
+ }
67
171
 
68
172
  // Search embulk/java/bootstrap.rb from a $LOAD_PATH.
69
173
  // $LOAD_PATH is set by lib/embulk/command/embulk_run.rb if Embulk starts
@@ -128,5 +232,158 @@ public class JRubyScriptingModule
128
232
  Dependency.get(Key.get(ModelManager.class)),
129
233
  Dependency.get(Key.get(BufferAllocator.class)));
130
234
  }
235
+
236
+ private static final class UnrecognizedJRubyOptionException extends Exception {}
237
+ private static final class NotWorkingJRubyOptionException extends Exception {}
238
+ private static final class UnrecognizedJRubyLoadPathException extends Exception {
239
+ public UnrecognizedJRubyLoadPathException(final String message)
240
+ {
241
+ super(message);
242
+ }
243
+
244
+ public UnrecognizedJRubyLoadPathException(final String message, final Throwable cause)
245
+ {
246
+ super(message, cause);
247
+ }
248
+ }
249
+
250
+ private void setBundlerPluginSourceDirectory(final ScriptingContainer jruby, final String directory)
251
+ {
252
+ if (directory != null) {
253
+ /* Environment variables are set in the selfrun script or bin/embulk:
254
+ * ENV['EMBULK_BUNDLE_PATH']: set through '-b' | '--bundle', or inherit from the runtime environment
255
+ * ENV['BUNDLE_GEMFILE']: set for "ENV['EMBULK_BUNDLE_PATH']/Gemfile"
256
+ * ENV['GEM_HOME']: unset
257
+ * ENV['GEM_PATH']: unset
258
+ */
259
+
260
+ // bundler is included in embulk-core.jar
261
+ jruby.runScriptlet("Gem.clear_paths");
262
+ jruby.runScriptlet("require 'bundler'");
263
+
264
+ jruby.runScriptlet("Bundler.load.setup_environment");
265
+ jruby.runScriptlet("require 'bundler/setup'");
266
+ // since here, `require` may load files of different (newer) embulk versions
267
+ // especially following 'embulk/command/embulk_main'.
268
+
269
+ // NOTE: It is intentionally not done by building a Ruby statement string from |directory|.
270
+ // It can cause insecure injections.
271
+ //
272
+ // add bundle directory path to load local plugins at ./embulk
273
+ jruby.put("__internal_bundler_plugin_source_directory__", directory);
274
+ jruby.runScriptlet("$LOAD_PATH << File.expand_path(__internal_bundler_plugin_source_directory__)");
275
+ jruby.remove("__internal_bundler_plugin_source_directory__");
276
+ }
277
+ else {
278
+ /* Environment variables are set in the selfrun script or bin/embulk:
279
+ * ENV['EMBULK_BUNDLE_PATH']: unset
280
+ * ENV['BUNDLE_GEMFILE']: unset
281
+ * ENV['GEM_HOME']: set for "~/.embulk/jruby/${ruby-version}"
282
+ * ENV['GEM_PATH']: set for ""
283
+ */
284
+
285
+ jruby.runScriptlet("Gem.clear_paths"); // force rubygems to reload GEM_HOME
286
+
287
+ // NOTE: The path from |buildJRubyLoadPath()| is added in $LOAD_PATH just in case.
288
+ // Though it is not mandatory just to run "embulk_main.rb", it may be required in later steps.
289
+ //
290
+ // NOTE: It is intentionally not done by building a Ruby statement string from |buildJRubyLoadPath()|.
291
+ // It can cause insecure injections.
292
+ //
293
+ // NOTE: It was written in Ruby as follows:
294
+ // $LOAD_PATH << File.expand_path('../../', File.dirname(__FILE__))
295
+ final String jrubyLoadPath;
296
+ try {
297
+ jrubyLoadPath = buildJRubyLoadPath();
298
+ }
299
+ catch (UnrecognizedJRubyLoadPathException ex) {
300
+ final Logger logger = this.injector.getInstance(ILoggerFactory.class).getLogger("init");
301
+ logger.error("Failed to retrieve Embulk's location.", ex);
302
+ throw new RuntimeException(ex);
303
+ }
304
+ jruby.put("__internal_load_path__", jrubyLoadPath);
305
+ jruby.runScriptlet("$LOAD_PATH << File.expand_path(__internal_load_path__)");
306
+ jruby.remove("__internal_load_path__");
307
+ }
308
+ }
309
+
310
+ private static void processJRubyOption(final String jrubyOption, final RubyInstanceConfig jrubyInstanceConfig)
311
+ throws UnrecognizedJRubyOptionException, NotWorkingJRubyOptionException
312
+ {
313
+ if (jrubyOption.charAt(0) != '-') {
314
+ throw new UnrecognizedJRubyOptionException();
315
+ }
316
+
317
+ for (int index = 1; index < jrubyOption.length(); ++index) {
318
+ switch (jrubyOption.charAt(index)) {
319
+ case '-':
320
+ if (jrubyOption.equals("--dev")) {
321
+ // They are not all of "--dev", but they are most possible configurations after JVM boot.
322
+ Options.COMPILE_INVOKEDYNAMIC.force("false"); // NOTE: Options is global.
323
+ jrubyInstanceConfig.setCompileMode(RubyInstanceConfig.CompileMode.OFF);
324
+ return;
325
+ }
326
+ else if (jrubyOption.equals("--client")) {
327
+ throw new NotWorkingJRubyOptionException();
328
+ }
329
+ else if (jrubyOption.equals("--server")) {
330
+ throw new NotWorkingJRubyOptionException();
331
+ }
332
+ throw new UnrecognizedJRubyOptionException();
333
+ default:
334
+ throw new UnrecognizedJRubyOptionException();
335
+ }
336
+ }
337
+ }
338
+
339
+ /**
340
+ * Returns a path to be added in JRuby's $LOAD_PATH.
341
+ *
342
+ * In case Embulk runs from the Embulk JAR file (normal case):
343
+ * "file:/some/directory/embulk.jar!"
344
+ *
345
+ * In case Embulk runs out of a JAR file (irregular case):
346
+ * "/some/directory"
347
+ */
348
+ private static String buildJRubyLoadPath()
349
+ throws UnrecognizedJRubyLoadPathException
350
+ {
351
+ final ProtectionDomain protectionDomain;
352
+ try {
353
+ protectionDomain = JRubyScriptingModule.class.getProtectionDomain();
354
+ }
355
+ catch (SecurityException ex) {
356
+ throw new UnrecognizedJRubyLoadPathException("Failed to achieve ProtectionDomain", ex);
357
+ }
358
+
359
+ final CodeSource codeSource = protectionDomain.getCodeSource();
360
+ if (codeSource == null) {
361
+ throw new UnrecognizedJRubyLoadPathException("Failed to achieve CodeSource");
362
+ }
363
+
364
+ final URL locationUrl = codeSource.getLocation();
365
+ if (locationUrl == null) {
366
+ throw new UnrecognizedJRubyLoadPathException("Failed to achieve location");
367
+ }
368
+ else if (!locationUrl.getProtocol().equals("file")) {
369
+ throw new UnrecognizedJRubyLoadPathException("Invalid location: " + locationUrl.toString());
370
+ }
371
+
372
+ final Path locationPath;
373
+ try {
374
+ locationPath = Paths.get(locationUrl.toURI());
375
+ }
376
+ catch (URISyntaxException ex) {
377
+ throw new UnrecognizedJRubyLoadPathException("Invalid location: " + locationUrl.toString(), ex);
378
+ }
379
+
380
+ if (Files.isDirectory(locationPath)) { // Out of a JAR file
381
+ System.err.println("[WARN] Embulk looks running out of the Embulk jar file. It is unsupported.");
382
+ return locationPath.toString();
383
+ }
384
+
385
+ // TODO: Consider checking the file is really a JAR file.
386
+ return locationUrl.toString() + "!"; // Inside the Embulk JAR file
387
+ }
131
388
  }
132
389
  }
@@ -3,6 +3,22 @@ package org.embulk.spi;
3
3
  import java.util.List;
4
4
  import org.msgpack.value.ImmutableValue;
5
5
 
6
+ /**
7
+ * Page is an in-process (in-JVM) container of data records.
8
+ *
9
+ * It serializes records to byte[] (in org.embulk.spi.Buffer) in order to:
10
+ * A) Avoid slowness by handling many Java Objects
11
+ * B) Avoid complexity by type-safe primitive arrays
12
+ * C) Track memory consumption by records
13
+ * D) Use off-heap memory
14
+ *
15
+ * (C) and (D) may not be so meaningful as of v0.7+ (or since earlier) as recent Embulk unlikely
16
+ * allocates so many Pages at the same time. Recent Embulk is streaming-driven instead of
17
+ * multithreaded queue-based.
18
+ *
19
+ * Page is NOT for inter-process communication. For multi-process execution such as MapReduce
20
+ * Executor, the executor plugin takes responsibility about interoperable serialization.
21
+ */
6
22
  public class Page
7
23
  {
8
24
  private final Buffer buffer;
@@ -0,0 +1,102 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import org.junit.Test;
4
+ import org.msgpack.value.Value;
5
+ import org.msgpack.value.ValueType;
6
+
7
+ import static org.junit.Assert.assertEquals;
8
+ import static org.junit.Assert.assertFalse;
9
+ import static org.junit.Assert.assertTrue;
10
+ import static org.junit.Assert.fail;
11
+
12
+ public class TestJsonParser
13
+ {
14
+ @Test
15
+ public void testString() throws Exception
16
+ {
17
+ final JsonParser parser = new JsonParser();
18
+ final Value msgpackValue = parser.parse("\"foobar\"");
19
+ assertFalse(msgpackValue.getValueType().isNumberType());
20
+ assertTrue(msgpackValue.getValueType().isStringType());
21
+ assertEquals("foobar", msgpackValue.asStringValue().toString());
22
+ }
23
+
24
+ @Test(expected = JsonParseException.class)
25
+ public void testStringUnquoted() throws Exception
26
+ {
27
+ final JsonParser parser = new JsonParser();
28
+ parser.parse("foobar");
29
+ }
30
+
31
+ @Test
32
+ public void testOrdinaryInteger() throws Exception
33
+ {
34
+ final JsonParser parser = new JsonParser();
35
+ final Value msgpackValue = parser.parse("12345");
36
+ assertTrue(msgpackValue.getValueType().isNumberType());
37
+ assertTrue(msgpackValue.getValueType().isIntegerType());
38
+ assertFalse(msgpackValue.getValueType().isFloatType());
39
+ assertFalse(msgpackValue.getValueType().isStringType());
40
+ assertEquals(12345, msgpackValue.asIntegerValue().asInt());
41
+ }
42
+
43
+ @Test
44
+ public void testExponentialInteger1() throws Exception
45
+ {
46
+ final JsonParser parser = new JsonParser();
47
+ final Value msgpackValue = parser.parse("12345e3");
48
+ assertTrue(msgpackValue.getValueType().isNumberType());
49
+ // TODO: Consider this needs to be an integer?
50
+ // See: https://github.com/embulk/embulk/issues/775
51
+ assertTrue(msgpackValue.getValueType().isFloatType());
52
+ assertFalse(msgpackValue.getValueType().isIntegerType());
53
+ assertFalse(msgpackValue.getValueType().isStringType());
54
+ assertEquals(12345000.0, msgpackValue.asFloatValue().toDouble(), 0.000000001);
55
+ // Not sure this |toString| is to be tested...
56
+ assertEquals("1.2345E7", msgpackValue.asFloatValue().toString());
57
+ }
58
+
59
+ @Test
60
+ public void testExponentialInteger2() throws Exception
61
+ {
62
+ final JsonParser parser = new JsonParser();
63
+ final Value msgpackValue = parser.parse("123e2");
64
+ assertTrue(msgpackValue.getValueType().isNumberType());
65
+ // TODO: Consider this needs to be an integer?
66
+ // See: https://github.com/embulk/embulk/issues/775
67
+ assertTrue(msgpackValue.getValueType().isFloatType());
68
+ assertFalse(msgpackValue.getValueType().isIntegerType());
69
+ assertFalse(msgpackValue.getValueType().isStringType());
70
+ assertEquals(12300.0, msgpackValue.asFloatValue().toDouble(), 0.000000001);
71
+ // Not sure this |toString| is to be tested...
72
+ assertEquals("12300.0", msgpackValue.asFloatValue().toString());
73
+ }
74
+
75
+ @Test
76
+ public void testOrdinaryFloat() throws Exception
77
+ {
78
+ final JsonParser parser = new JsonParser();
79
+ final Value msgpackValue = parser.parse("12345.12");
80
+ assertTrue(msgpackValue.getValueType().isNumberType());
81
+ assertTrue(msgpackValue.getValueType().isFloatType());
82
+ assertFalse(msgpackValue.getValueType().isIntegerType());
83
+ assertFalse(msgpackValue.getValueType().isStringType());
84
+ assertEquals(12345.12, msgpackValue.asFloatValue().toDouble(), 0.000000001);
85
+ // Not sure this |toString| is to be tested...
86
+ assertEquals("12345.12", msgpackValue.asFloatValue().toString());
87
+ }
88
+
89
+ @Test
90
+ public void testExponentialFloat() throws Exception
91
+ {
92
+ final JsonParser parser = new JsonParser();
93
+ final Value msgpackValue = parser.parse("1.234512E4");
94
+ assertTrue(msgpackValue.getValueType().isNumberType());
95
+ assertTrue(msgpackValue.getValueType().isFloatType());
96
+ assertFalse(msgpackValue.getValueType().isIntegerType());
97
+ assertFalse(msgpackValue.getValueType().isStringType());
98
+ assertEquals(12345.12, msgpackValue.asFloatValue().toDouble(), 0.000000001);
99
+ // Not sure this |toString| is to be tested...
100
+ assertEquals("12345.12", msgpackValue.asFloatValue().toString());
101
+ }
102
+ }