embulk 0.7.11-java → 0.8.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/README.md +1 -1
  4. data/build.gradle +2 -2
  5. data/embulk-core/build.gradle +2 -0
  6. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
  7. data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
  9. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
  10. data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
  13. data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
  15. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
  16. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
  25. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
  27. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
  28. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
  29. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
  30. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
  32. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
  33. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
  35. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
  36. data/embulk-docs/src/built-in.rst +40 -3
  37. data/embulk-docs/src/conf.py +2 -2
  38. data/embulk-docs/src/release.rst +1 -1
  39. data/embulk-docs/src/release/release-0.8.0.rst +68 -0
  40. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
  41. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
  42. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
  43. data/embulk.gemspec +1 -1
  44. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  45. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  46. data/gradlew +3 -7
  47. data/lib/embulk/column.rb +2 -0
  48. data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
  49. data/lib/embulk/command/embulk_new_plugin.rb +2 -0
  50. data/lib/embulk/command/embulk_run.rb +17 -10
  51. data/lib/embulk/data/bundle/.ruby-version +1 -1
  52. data/lib/embulk/data/new/java/build.gradle.erb +21 -0
  53. data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
  54. data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
  55. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  56. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  57. data/lib/embulk/data/new/java/gradlew +3 -7
  58. data/lib/embulk/data/new/ruby/.ruby-version +1 -1
  59. data/lib/embulk/guess/csv.rb +1 -1
  60. data/lib/embulk/guess/schema_guess.rb +6 -0
  61. data/lib/embulk/guess_plugin.rb +1 -1
  62. data/lib/embulk/java/imports.rb +4 -0
  63. data/lib/embulk/plugin_registry.rb +8 -12
  64. data/lib/embulk/schema.rb +6 -0
  65. data/lib/embulk/version.rb +1 -1
  66. data/test/guess/test_csv_guess.rb +170 -0
  67. data/test/helper.rb +2 -0
  68. metadata +17 -15
  69. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
  70. data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
  71. data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
  72. data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
  73. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
  74. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
  75. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
  76. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
  77. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
  78. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
  79. data/embulk-docs/src/release/release-0.7.11.rst +0 -13
@@ -9,6 +9,7 @@ import org.embulk.spi.type.DoubleType;
9
9
  import org.embulk.spi.type.LongType;
10
10
  import org.embulk.spi.type.TimestampType;
11
11
  import org.embulk.spi.type.StringType;
12
+ import org.embulk.spi.type.JsonType;
12
13
 
13
14
  public class Column
14
15
  {
@@ -57,6 +58,8 @@ public class Column
57
58
  visitor.stringColumn(this);
58
59
  } else if (type instanceof TimestampType) {
59
60
  visitor.timestampColumn(this);
61
+ } else if (type instanceof JsonType) {
62
+ visitor.jsonColumn(this);
60
63
  } else {
61
64
  assert(false);
62
65
  }
@@ -11,4 +11,6 @@ public interface ColumnVisitor
11
11
  void stringColumn(Column column);
12
12
 
13
13
  void timestampColumn(Column column);
14
+
15
+ void jsonColumn(Column column);
14
16
  }
@@ -47,6 +47,7 @@ public class ExecSession
47
47
  public static class Builder
48
48
  {
49
49
  private final Injector injector;
50
+ private ILoggerFactory loggerFactory;
50
51
  private Timestamp transactionTime;
51
52
 
52
53
  public Builder(Injector injector)
@@ -60,6 +61,12 @@ public class ExecSession
60
61
  return this;
61
62
  }
62
63
 
64
+ public Builder setLoggerFactory(ILoggerFactory loggerFactory)
65
+ {
66
+ this.loggerFactory = loggerFactory;
67
+ return this;
68
+ }
69
+
63
70
  public Builder setTransactionTime(Timestamp timestamp)
64
71
  {
65
72
  this.transactionTime = timestamp;
@@ -71,7 +78,7 @@ public class ExecSession
71
78
  if (transactionTime == null) {
72
79
  transactionTime = Timestamp.ofEpochMilli(System.currentTimeMillis()); // TODO get nanoseconds for default
73
80
  }
74
- return new ExecSession(injector, transactionTime);
81
+ return new ExecSession(injector, transactionTime, Optional.fromNullable(loggerFactory));
75
82
  }
76
83
  }
77
84
 
@@ -86,14 +93,14 @@ public class ExecSession
86
93
  this(injector,
87
94
  configSource.loadConfig(SessionTask.class).getTransactionTime().or(
88
95
  Timestamp.ofEpochMilli(System.currentTimeMillis())
89
- )
90
- ); // TODO get nanoseconds for default
96
+ ), // TODO get nanoseconds for default
97
+ null);
91
98
  }
92
99
 
93
- private ExecSession(Injector injector, Timestamp transactionTime)
100
+ private ExecSession(Injector injector, Timestamp transactionTime, Optional<ILoggerFactory> loggerFactory)
94
101
  {
95
102
  this.injector = injector;
96
- this.loggerFactory = injector.getInstance(ILoggerFactory.class);
103
+ this.loggerFactory = loggerFactory.or(injector.getInstance(ILoggerFactory.class));
97
104
  this.modelManager = injector.getInstance(ModelManager.class);
98
105
  this.pluginManager = injector.getInstance(PluginManager.class);
99
106
  this.bufferAllocator = injector.getInstance(BufferAllocator.class);
@@ -1,11 +1,13 @@
1
1
  package org.embulk.spi;
2
2
 
3
3
  import java.util.List;
4
+ import org.msgpack.value.ImmutableValue;
4
5
 
5
6
  public class Page
6
7
  {
7
8
  private final Buffer buffer;
8
9
  private List<String> stringReferences;
10
+ private List<ImmutableValue> valueReferences;
9
11
 
10
12
  protected Page(Buffer buffer)
11
13
  {
@@ -28,17 +30,34 @@ public class Page
28
30
  return this;
29
31
  }
30
32
 
33
+ public Page setValueReferences(List<ImmutableValue> values)
34
+ {
35
+ this.valueReferences = values;
36
+ return this;
37
+ }
38
+
31
39
  public List<String> getStringReferences()
32
40
  {
33
41
  // TODO used by mapreduce executor
34
42
  return stringReferences;
35
43
  }
36
44
 
45
+ public List<ImmutableValue> getValueReferences()
46
+ {
47
+ // TODO used by mapreduce executor
48
+ return valueReferences;
49
+ }
50
+
37
51
  public String getStringReference(int index)
38
52
  {
39
53
  return stringReferences.get(index);
40
54
  }
41
55
 
56
+ public ImmutableValue getValueReference(int index)
57
+ {
58
+ return valueReferences.get(index);
59
+ }
60
+
42
61
  public void release()
43
62
  {
44
63
  buffer.release();
@@ -11,6 +11,8 @@ import com.google.common.collect.BiMap;
11
11
  import com.google.common.collect.HashBiMap;
12
12
  import io.airlift.slice.Slice;
13
13
  import io.airlift.slice.Slices;
14
+ import org.msgpack.value.Value;
15
+ import org.msgpack.value.ImmutableValue;
14
16
  import org.embulk.spi.time.Timestamp;
15
17
 
16
18
  public class PageBuilder
@@ -29,7 +31,8 @@ public class PageBuilder
29
31
  private int position;
30
32
  private final byte[] nullBitSet;
31
33
  private final BiMap<String, Integer> stringReferences = HashBiMap.create();
32
- private int stringReferenceSize;
34
+ private List<ImmutableValue> valueReferences = new ArrayList<>();
35
+ private int referenceSize;
33
36
  private int nextVariableLengthDataOffset;
34
37
 
35
38
  public PageBuilder(BufferAllocator allocator, Schema schema, PageOutput output)
@@ -52,7 +55,8 @@ public class PageBuilder
52
55
  this.count = 0;
53
56
  this.position = PageFormat.PAGE_HEADER_SIZE;
54
57
  this.stringReferences.clear();
55
- this.stringReferenceSize = 0;
58
+ this.valueReferences = new ArrayList<>();
59
+ this.referenceSize = 0;
56
60
  }
57
61
 
58
62
  public Schema getSchema()
@@ -126,11 +130,26 @@ public class PageBuilder
126
130
  int index = stringReferences.size();
127
131
  stringReferences.put(value, index);
128
132
  bufferSlice.setInt(getOffset(columnIndex), index);
129
- stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
133
+ referenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
130
134
  }
131
135
  clearNull(columnIndex);
132
136
  }
133
137
 
138
+ public void setJson(Column column, Value value)
139
+ {
140
+ // TODO check type?
141
+ setJson(column.getIndex(), value);
142
+ }
143
+
144
+ public void setJson(int columnIndex, Value value)
145
+ {
146
+ int index = valueReferences.size();
147
+ valueReferences.add(value.immutableValue());
148
+ bufferSlice.setInt(getOffset(columnIndex), index);
149
+ referenceSize += 256; // TODO how to estimate size of the value?
150
+ clearNull(columnIndex);
151
+ }
152
+
134
153
  public void setTimestamp(Column column, Timestamp value)
135
154
  {
136
155
  // TODO check type?
@@ -189,7 +208,7 @@ public class PageBuilder
189
208
  Arrays.fill(nullBitSet, (byte) -1);
190
209
 
191
210
  // flush if next record will not fit in this buffer
192
- if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) {
211
+ if (buffer.capacity() < position + nextVariableLengthDataOffset + referenceSize) {
193
212
  flush();
194
213
  }
195
214
  }
@@ -202,7 +221,9 @@ public class PageBuilder
202
221
  buffer.limit(position);
203
222
 
204
223
  // flush page
205
- Page page = Page.wrap(buffer).setStringReferences(getSortedStringReferences());
224
+ Page page = Page.wrap(buffer)
225
+ .setStringReferences(getSortedStringReferences())
226
+ .setValueReferences(valueReferences);
206
227
  buffer = null;
207
228
  bufferSlice = null;
208
229
  output.add(page);
@@ -2,6 +2,7 @@ package org.embulk.spi;
2
2
 
3
3
  import io.airlift.slice.Slice;
4
4
  import io.airlift.slice.Slices;
5
+ import org.msgpack.value.Value;
5
6
  import org.embulk.spi.time.Timestamp;
6
7
 
7
8
  public class PageReader
@@ -124,6 +125,18 @@ public class PageReader
124
125
  return Timestamp.ofEpochSecond(sec, nsec);
125
126
  }
126
127
 
128
+ public Value getJson(Column column)
129
+ {
130
+ // TODO check type?
131
+ return getJson(column.getIndex());
132
+ }
133
+
134
+ public Value getJson(int columnIndex)
135
+ {
136
+ int index = pageSlice.getInt(getOffset(columnIndex));
137
+ return page.getValueReference(index);
138
+ }
139
+
127
140
  private int getOffset(int columnIndex)
128
141
  {
129
142
  return position + columnOffsets[columnIndex];
@@ -0,0 +1,17 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import org.embulk.spi.DataException;
4
+
5
+ public class JsonParseException
6
+ extends DataException
7
+ {
8
+ public JsonParseException(String message)
9
+ {
10
+ super(message);
11
+ }
12
+
13
+ public JsonParseException(String message, Throwable cause)
14
+ {
15
+ super(message, cause);
16
+ }
17
+ }
@@ -0,0 +1,125 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.Map;
6
+ import java.util.HashMap;
7
+ import java.io.IOException;
8
+ import org.msgpack.value.Value;
9
+ import org.msgpack.value.ValueFactory;
10
+ import com.fasterxml.jackson.core.JsonFactory;
11
+ import com.fasterxml.jackson.core.JsonParser.Feature;
12
+ import com.fasterxml.jackson.core.JsonToken;
13
+
14
+ public class JsonParser
15
+ {
16
+ private final JsonFactory factory;
17
+
18
+ public JsonParser()
19
+ {
20
+ this.factory = new JsonFactory();
21
+ factory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
22
+ }
23
+
24
+ public Value parse(String json)
25
+ {
26
+ return new ParseContext(json).parse();
27
+ }
28
+
29
+ private class ParseContext
30
+ {
31
+ private final String json;
32
+ private final com.fasterxml.jackson.core.JsonParser parser;
33
+
34
+ public ParseContext(String json)
35
+ {
36
+ this.json = json;
37
+ try {
38
+ this.parser = factory.createParser(json);
39
+ }
40
+ catch (Exception ex) {
41
+ throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
42
+ }
43
+ }
44
+
45
+ public Value parse()
46
+ {
47
+ try {
48
+ JsonToken token = parser.nextToken();
49
+ return jsonTokenToValue(token);
50
+ }
51
+ catch (JsonParseException ex) {
52
+ throw ex;
53
+ }
54
+ catch (Exception ex) {
55
+ throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
56
+ }
57
+ }
58
+
59
+ private Value jsonTokenToValue(JsonToken token)
60
+ throws IOException
61
+ {
62
+ switch(token) {
63
+ case VALUE_NULL:
64
+ return ValueFactory.newNil();
65
+ case VALUE_TRUE:
66
+ return ValueFactory.newBoolean(true);
67
+ case VALUE_FALSE:
68
+ return ValueFactory.newBoolean(false);
69
+ case VALUE_NUMBER_FLOAT:
70
+ return ValueFactory.newFloat(parser.getDoubleValue());
71
+ case VALUE_NUMBER_INT:
72
+ try {
73
+ return ValueFactory.newInteger(parser.getLongValue());
74
+ }
75
+ catch (JsonParseException ex) {
76
+ return ValueFactory.newInteger(parser.getBigIntegerValue());
77
+ }
78
+ case VALUE_STRING:
79
+ return ValueFactory.newString(parser.getText());
80
+ case START_ARRAY: {
81
+ List<Value> list = new ArrayList<>();
82
+ while (true) {
83
+ token = parser.nextToken();
84
+ if(token == JsonToken.END_ARRAY) {
85
+ return ValueFactory.newArray(list);
86
+ }
87
+ list.add(jsonTokenToValue(token));
88
+ }
89
+ }
90
+ case START_OBJECT:
91
+ Map<Value, Value> map = new HashMap<>();
92
+ while (true) {
93
+ token = parser.nextToken();
94
+ if (token == JsonToken.END_OBJECT) {
95
+ return ValueFactory.newMap(map);
96
+ }
97
+ String key = parser.getCurrentName();
98
+ if (key == null) {
99
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
100
+ }
101
+ token = parser.nextToken();
102
+ Value value = jsonTokenToValue(token);
103
+ map.put(ValueFactory.newString(key), value);
104
+ }
105
+ case VALUE_EMBEDDED_OBJECT:
106
+ case FIELD_NAME:
107
+ case END_ARRAY:
108
+ case END_OBJECT:
109
+ case NOT_AVAILABLE:
110
+ default:
111
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
112
+ }
113
+ }
114
+ }
115
+
116
+ private static String sampleJsonString(String json)
117
+ {
118
+ if (json.length() < 100) {
119
+ return json;
120
+ }
121
+ else {
122
+ return json.substring(0, 97) + "...";
123
+ }
124
+ }
125
+ }
@@ -0,0 +1,55 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import java.io.ByteArrayOutputStream;
4
+ import java.io.IOException;
5
+ import com.google.common.base.Throwables;
6
+ import org.msgpack.core.MessagePack;
7
+ import org.msgpack.core.MessageBufferPacker;
8
+ import org.msgpack.core.buffer.MessageBuffer;
9
+ import org.msgpack.value.Value;
10
+ import org.jruby.Ruby;
11
+ import org.jruby.RubyString;
12
+ import org.jruby.util.ByteList;
13
+ import org.jcodings.specific.ASCIIEncoding;
14
+
15
+ public class RubyValueApi
16
+ {
17
+ public static Value fromMessagePack(RubyString content)
18
+ {
19
+ ByteList list = content.getByteList();
20
+ try {
21
+ return MessagePack.newDefaultUnpacker(list.unsafeBytes(), list.begin(), list.length()).unpackValue();
22
+ }
23
+ catch (IOException ex) {
24
+ throw Throwables.propagate(ex);
25
+ }
26
+ }
27
+
28
+ private static class OpenByteArrayOutputStream
29
+ extends ByteArrayOutputStream
30
+ {
31
+ public byte[] getBuffer()
32
+ {
33
+ return buf;
34
+ }
35
+
36
+ public int getCount()
37
+ {
38
+ return count;
39
+ }
40
+ }
41
+
42
+ public static RubyString toMessagePack(Ruby runtime, Value value)
43
+ {
44
+ try {
45
+ MessageBufferPacker packer = MessagePack.newDefaultBufferPacker();
46
+ packer.packValue(value);
47
+ MessageBuffer mb = packer.toMessageBuffer();
48
+ ByteList list = new ByteList(mb.array(), mb.arrayOffset(), mb.size(), ASCIIEncoding.INSTANCE, false);
49
+ return RubyString.newString(runtime, list);
50
+ }
51
+ catch (IOException ex) {
52
+ throw Throwables.propagate(ex);
53
+ }
54
+ }
55
+ }
@@ -0,0 +1,14 @@
1
+ package org.embulk.spi.type;
2
+
3
+ import org.msgpack.value.Value;
4
+
5
+ public class JsonType
6
+ extends AbstractType
7
+ {
8
+ static final JsonType JSON = new JsonType();
9
+
10
+ private JsonType()
11
+ {
12
+ super("json", Value.class, 4);
13
+ }
14
+ }
@@ -20,6 +20,7 @@ public class TypeDeserializer
20
20
  builder.put(DoubleType.DOUBLE.getName(), DoubleType.DOUBLE);
21
21
  builder.put(StringType.STRING.getName(), StringType.STRING);
22
22
  builder.put(TimestampType.TIMESTAMP.getName(), TimestampType.TIMESTAMP);
23
+ builder.put(JsonType.JSON.getName(), JsonType.JSON);
23
24
  stringToTypeMap = builder.build();
24
25
  }
25
26