embulk 0.7.11-java → 0.8.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/README.md +1 -1
  4. data/build.gradle +2 -2
  5. data/embulk-core/build.gradle +2 -0
  6. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
  7. data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
  9. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
  10. data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
  13. data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
  15. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
  16. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
  25. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
  27. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
  28. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
  29. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
  30. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
  32. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
  33. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
  35. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
  36. data/embulk-docs/src/built-in.rst +40 -3
  37. data/embulk-docs/src/conf.py +2 -2
  38. data/embulk-docs/src/release.rst +1 -1
  39. data/embulk-docs/src/release/release-0.8.0.rst +68 -0
  40. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
  41. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
  42. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
  43. data/embulk.gemspec +1 -1
  44. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  45. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  46. data/gradlew +3 -7
  47. data/lib/embulk/column.rb +2 -0
  48. data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
  49. data/lib/embulk/command/embulk_new_plugin.rb +2 -0
  50. data/lib/embulk/command/embulk_run.rb +17 -10
  51. data/lib/embulk/data/bundle/.ruby-version +1 -1
  52. data/lib/embulk/data/new/java/build.gradle.erb +21 -0
  53. data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
  54. data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
  55. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  56. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  57. data/lib/embulk/data/new/java/gradlew +3 -7
  58. data/lib/embulk/data/new/ruby/.ruby-version +1 -1
  59. data/lib/embulk/guess/csv.rb +1 -1
  60. data/lib/embulk/guess/schema_guess.rb +6 -0
  61. data/lib/embulk/guess_plugin.rb +1 -1
  62. data/lib/embulk/java/imports.rb +4 -0
  63. data/lib/embulk/plugin_registry.rb +8 -12
  64. data/lib/embulk/schema.rb +6 -0
  65. data/lib/embulk/version.rb +1 -1
  66. data/test/guess/test_csv_guess.rb +170 -0
  67. data/test/helper.rb +2 -0
  68. metadata +17 -15
  69. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
  70. data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
  71. data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
  72. data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
  73. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
  74. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
  75. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
  76. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
  77. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
  78. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
  79. data/embulk-docs/src/release/release-0.7.11.rst +0 -13
@@ -9,6 +9,7 @@ import org.embulk.spi.type.DoubleType;
9
9
  import org.embulk.spi.type.LongType;
10
10
  import org.embulk.spi.type.TimestampType;
11
11
  import org.embulk.spi.type.StringType;
12
+ import org.embulk.spi.type.JsonType;
12
13
 
13
14
  public class Column
14
15
  {
@@ -57,6 +58,8 @@ public class Column
57
58
  visitor.stringColumn(this);
58
59
  } else if (type instanceof TimestampType) {
59
60
  visitor.timestampColumn(this);
61
+ } else if (type instanceof JsonType) {
62
+ visitor.jsonColumn(this);
60
63
  } else {
61
64
  assert(false);
62
65
  }
@@ -11,4 +11,6 @@ public interface ColumnVisitor
11
11
  void stringColumn(Column column);
12
12
 
13
13
  void timestampColumn(Column column);
14
+
15
+ void jsonColumn(Column column);
14
16
  }
@@ -47,6 +47,7 @@ public class ExecSession
47
47
  public static class Builder
48
48
  {
49
49
  private final Injector injector;
50
+ private ILoggerFactory loggerFactory;
50
51
  private Timestamp transactionTime;
51
52
 
52
53
  public Builder(Injector injector)
@@ -60,6 +61,12 @@ public class ExecSession
60
61
  return this;
61
62
  }
62
63
 
64
+ public Builder setLoggerFactory(ILoggerFactory loggerFactory)
65
+ {
66
+ this.loggerFactory = loggerFactory;
67
+ return this;
68
+ }
69
+
63
70
  public Builder setTransactionTime(Timestamp timestamp)
64
71
  {
65
72
  this.transactionTime = timestamp;
@@ -71,7 +78,7 @@ public class ExecSession
71
78
  if (transactionTime == null) {
72
79
  transactionTime = Timestamp.ofEpochMilli(System.currentTimeMillis()); // TODO get nanoseconds for default
73
80
  }
74
- return new ExecSession(injector, transactionTime);
81
+ return new ExecSession(injector, transactionTime, Optional.fromNullable(loggerFactory));
75
82
  }
76
83
  }
77
84
 
@@ -86,14 +93,14 @@ public class ExecSession
86
93
  this(injector,
87
94
  configSource.loadConfig(SessionTask.class).getTransactionTime().or(
88
95
  Timestamp.ofEpochMilli(System.currentTimeMillis())
89
- )
90
- ); // TODO get nanoseconds for default
96
+ ), // TODO get nanoseconds for default
97
+ null);
91
98
  }
92
99
 
93
- private ExecSession(Injector injector, Timestamp transactionTime)
100
+ private ExecSession(Injector injector, Timestamp transactionTime, Optional<ILoggerFactory> loggerFactory)
94
101
  {
95
102
  this.injector = injector;
96
- this.loggerFactory = injector.getInstance(ILoggerFactory.class);
103
+ this.loggerFactory = loggerFactory.or(injector.getInstance(ILoggerFactory.class));
97
104
  this.modelManager = injector.getInstance(ModelManager.class);
98
105
  this.pluginManager = injector.getInstance(PluginManager.class);
99
106
  this.bufferAllocator = injector.getInstance(BufferAllocator.class);
@@ -1,11 +1,13 @@
1
1
  package org.embulk.spi;
2
2
 
3
3
  import java.util.List;
4
+ import org.msgpack.value.ImmutableValue;
4
5
 
5
6
  public class Page
6
7
  {
7
8
  private final Buffer buffer;
8
9
  private List<String> stringReferences;
10
+ private List<ImmutableValue> valueReferences;
9
11
 
10
12
  protected Page(Buffer buffer)
11
13
  {
@@ -28,17 +30,34 @@ public class Page
28
30
  return this;
29
31
  }
30
32
 
33
+ public Page setValueReferences(List<ImmutableValue> values)
34
+ {
35
+ this.valueReferences = values;
36
+ return this;
37
+ }
38
+
31
39
  public List<String> getStringReferences()
32
40
  {
33
41
  // TODO used by mapreduce executor
34
42
  return stringReferences;
35
43
  }
36
44
 
45
+ public List<ImmutableValue> getValueReferences()
46
+ {
47
+ // TODO used by mapreduce executor
48
+ return valueReferences;
49
+ }
50
+
37
51
  public String getStringReference(int index)
38
52
  {
39
53
  return stringReferences.get(index);
40
54
  }
41
55
 
56
+ public ImmutableValue getValueReference(int index)
57
+ {
58
+ return valueReferences.get(index);
59
+ }
60
+
42
61
  public void release()
43
62
  {
44
63
  buffer.release();
@@ -11,6 +11,8 @@ import com.google.common.collect.BiMap;
11
11
  import com.google.common.collect.HashBiMap;
12
12
  import io.airlift.slice.Slice;
13
13
  import io.airlift.slice.Slices;
14
+ import org.msgpack.value.Value;
15
+ import org.msgpack.value.ImmutableValue;
14
16
  import org.embulk.spi.time.Timestamp;
15
17
 
16
18
  public class PageBuilder
@@ -29,7 +31,8 @@ public class PageBuilder
29
31
  private int position;
30
32
  private final byte[] nullBitSet;
31
33
  private final BiMap<String, Integer> stringReferences = HashBiMap.create();
32
- private int stringReferenceSize;
34
+ private List<ImmutableValue> valueReferences = new ArrayList<>();
35
+ private int referenceSize;
33
36
  private int nextVariableLengthDataOffset;
34
37
 
35
38
  public PageBuilder(BufferAllocator allocator, Schema schema, PageOutput output)
@@ -52,7 +55,8 @@ public class PageBuilder
52
55
  this.count = 0;
53
56
  this.position = PageFormat.PAGE_HEADER_SIZE;
54
57
  this.stringReferences.clear();
55
- this.stringReferenceSize = 0;
58
+ this.valueReferences = new ArrayList<>();
59
+ this.referenceSize = 0;
56
60
  }
57
61
 
58
62
  public Schema getSchema()
@@ -126,11 +130,26 @@ public class PageBuilder
126
130
  int index = stringReferences.size();
127
131
  stringReferences.put(value, index);
128
132
  bufferSlice.setInt(getOffset(columnIndex), index);
129
- stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
133
+ referenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
130
134
  }
131
135
  clearNull(columnIndex);
132
136
  }
133
137
 
138
+ public void setJson(Column column, Value value)
139
+ {
140
+ // TODO check type?
141
+ setJson(column.getIndex(), value);
142
+ }
143
+
144
+ public void setJson(int columnIndex, Value value)
145
+ {
146
+ int index = valueReferences.size();
147
+ valueReferences.add(value.immutableValue());
148
+ bufferSlice.setInt(getOffset(columnIndex), index);
149
+ referenceSize += 256; // TODO how to estimate size of the value?
150
+ clearNull(columnIndex);
151
+ }
152
+
134
153
  public void setTimestamp(Column column, Timestamp value)
135
154
  {
136
155
  // TODO check type?
@@ -189,7 +208,7 @@ public class PageBuilder
189
208
  Arrays.fill(nullBitSet, (byte) -1);
190
209
 
191
210
  // flush if next record will not fit in this buffer
192
- if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) {
211
+ if (buffer.capacity() < position + nextVariableLengthDataOffset + referenceSize) {
193
212
  flush();
194
213
  }
195
214
  }
@@ -202,7 +221,9 @@ public class PageBuilder
202
221
  buffer.limit(position);
203
222
 
204
223
  // flush page
205
- Page page = Page.wrap(buffer).setStringReferences(getSortedStringReferences());
224
+ Page page = Page.wrap(buffer)
225
+ .setStringReferences(getSortedStringReferences())
226
+ .setValueReferences(valueReferences);
206
227
  buffer = null;
207
228
  bufferSlice = null;
208
229
  output.add(page);
@@ -2,6 +2,7 @@ package org.embulk.spi;
2
2
 
3
3
  import io.airlift.slice.Slice;
4
4
  import io.airlift.slice.Slices;
5
+ import org.msgpack.value.Value;
5
6
  import org.embulk.spi.time.Timestamp;
6
7
 
7
8
  public class PageReader
@@ -124,6 +125,18 @@ public class PageReader
124
125
  return Timestamp.ofEpochSecond(sec, nsec);
125
126
  }
126
127
 
128
+ public Value getJson(Column column)
129
+ {
130
+ // TODO check type?
131
+ return getJson(column.getIndex());
132
+ }
133
+
134
+ public Value getJson(int columnIndex)
135
+ {
136
+ int index = pageSlice.getInt(getOffset(columnIndex));
137
+ return page.getValueReference(index);
138
+ }
139
+
127
140
  private int getOffset(int columnIndex)
128
141
  {
129
142
  return position + columnOffsets[columnIndex];
@@ -0,0 +1,17 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import org.embulk.spi.DataException;
4
+
5
+ public class JsonParseException
6
+ extends DataException
7
+ {
8
+ public JsonParseException(String message)
9
+ {
10
+ super(message);
11
+ }
12
+
13
+ public JsonParseException(String message, Throwable cause)
14
+ {
15
+ super(message, cause);
16
+ }
17
+ }
@@ -0,0 +1,125 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.Map;
6
+ import java.util.HashMap;
7
+ import java.io.IOException;
8
+ import org.msgpack.value.Value;
9
+ import org.msgpack.value.ValueFactory;
10
+ import com.fasterxml.jackson.core.JsonFactory;
11
+ import com.fasterxml.jackson.core.JsonParser.Feature;
12
+ import com.fasterxml.jackson.core.JsonToken;
13
+
14
+ public class JsonParser
15
+ {
16
+ private final JsonFactory factory;
17
+
18
+ public JsonParser()
19
+ {
20
+ this.factory = new JsonFactory();
21
+ factory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
22
+ }
23
+
24
+ public Value parse(String json)
25
+ {
26
+ return new ParseContext(json).parse();
27
+ }
28
+
29
+ private class ParseContext
30
+ {
31
+ private final String json;
32
+ private final com.fasterxml.jackson.core.JsonParser parser;
33
+
34
+ public ParseContext(String json)
35
+ {
36
+ this.json = json;
37
+ try {
38
+ this.parser = factory.createParser(json);
39
+ }
40
+ catch (Exception ex) {
41
+ throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
42
+ }
43
+ }
44
+
45
+ public Value parse()
46
+ {
47
+ try {
48
+ JsonToken token = parser.nextToken();
49
+ return jsonTokenToValue(token);
50
+ }
51
+ catch (JsonParseException ex) {
52
+ throw ex;
53
+ }
54
+ catch (Exception ex) {
55
+ throw new JsonParseException("Failed to parse a JSON string: "+sampleJsonString(json), ex);
56
+ }
57
+ }
58
+
59
+ private Value jsonTokenToValue(JsonToken token)
60
+ throws IOException
61
+ {
62
+ switch(token) {
63
+ case VALUE_NULL:
64
+ return ValueFactory.newNil();
65
+ case VALUE_TRUE:
66
+ return ValueFactory.newBoolean(true);
67
+ case VALUE_FALSE:
68
+ return ValueFactory.newBoolean(false);
69
+ case VALUE_NUMBER_FLOAT:
70
+ return ValueFactory.newFloat(parser.getDoubleValue());
71
+ case VALUE_NUMBER_INT:
72
+ try {
73
+ return ValueFactory.newInteger(parser.getLongValue());
74
+ }
75
+ catch (JsonParseException ex) {
76
+ return ValueFactory.newInteger(parser.getBigIntegerValue());
77
+ }
78
+ case VALUE_STRING:
79
+ return ValueFactory.newString(parser.getText());
80
+ case START_ARRAY: {
81
+ List<Value> list = new ArrayList<>();
82
+ while (true) {
83
+ token = parser.nextToken();
84
+ if(token == JsonToken.END_ARRAY) {
85
+ return ValueFactory.newArray(list);
86
+ }
87
+ list.add(jsonTokenToValue(token));
88
+ }
89
+ }
90
+ case START_OBJECT:
91
+ Map<Value, Value> map = new HashMap<>();
92
+ while (true) {
93
+ token = parser.nextToken();
94
+ if (token == JsonToken.END_OBJECT) {
95
+ return ValueFactory.newMap(map);
96
+ }
97
+ String key = parser.getCurrentName();
98
+ if (key == null) {
99
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
100
+ }
101
+ token = parser.nextToken();
102
+ Value value = jsonTokenToValue(token);
103
+ map.put(ValueFactory.newString(key), value);
104
+ }
105
+ case VALUE_EMBEDDED_OBJECT:
106
+ case FIELD_NAME:
107
+ case END_ARRAY:
108
+ case END_OBJECT:
109
+ case NOT_AVAILABLE:
110
+ default:
111
+ throw new JsonParseException("Unexpected token "+token+" at "+parser.getTokenLocation());
112
+ }
113
+ }
114
+ }
115
+
116
+ private static String sampleJsonString(String json)
117
+ {
118
+ if (json.length() < 100) {
119
+ return json;
120
+ }
121
+ else {
122
+ return json.substring(0, 97) + "...";
123
+ }
124
+ }
125
+ }
@@ -0,0 +1,55 @@
1
+ package org.embulk.spi.json;
2
+
3
+ import java.io.ByteArrayOutputStream;
4
+ import java.io.IOException;
5
+ import com.google.common.base.Throwables;
6
+ import org.msgpack.core.MessagePack;
7
+ import org.msgpack.core.MessageBufferPacker;
8
+ import org.msgpack.core.buffer.MessageBuffer;
9
+ import org.msgpack.value.Value;
10
+ import org.jruby.Ruby;
11
+ import org.jruby.RubyString;
12
+ import org.jruby.util.ByteList;
13
+ import org.jcodings.specific.ASCIIEncoding;
14
+
15
+ public class RubyValueApi
16
+ {
17
+ public static Value fromMessagePack(RubyString content)
18
+ {
19
+ ByteList list = content.getByteList();
20
+ try {
21
+ return MessagePack.newDefaultUnpacker(list.unsafeBytes(), list.begin(), list.length()).unpackValue();
22
+ }
23
+ catch (IOException ex) {
24
+ throw Throwables.propagate(ex);
25
+ }
26
+ }
27
+
28
+ private static class OpenByteArrayOutputStream
29
+ extends ByteArrayOutputStream
30
+ {
31
+ public byte[] getBuffer()
32
+ {
33
+ return buf;
34
+ }
35
+
36
+ public int getCount()
37
+ {
38
+ return count;
39
+ }
40
+ }
41
+
42
+ public static RubyString toMessagePack(Ruby runtime, Value value)
43
+ {
44
+ try {
45
+ MessageBufferPacker packer = MessagePack.newDefaultBufferPacker();
46
+ packer.packValue(value);
47
+ MessageBuffer mb = packer.toMessageBuffer();
48
+ ByteList list = new ByteList(mb.array(), mb.arrayOffset(), mb.size(), ASCIIEncoding.INSTANCE, false);
49
+ return RubyString.newString(runtime, list);
50
+ }
51
+ catch (IOException ex) {
52
+ throw Throwables.propagate(ex);
53
+ }
54
+ }
55
+ }
@@ -0,0 +1,14 @@
1
+ package org.embulk.spi.type;
2
+
3
+ import org.msgpack.value.Value;
4
+
5
+ public class JsonType
6
+ extends AbstractType
7
+ {
8
+ static final JsonType JSON = new JsonType();
9
+
10
+ private JsonType()
11
+ {
12
+ super("json", Value.class, 4);
13
+ }
14
+ }
@@ -20,6 +20,7 @@ public class TypeDeserializer
20
20
  builder.put(DoubleType.DOUBLE.getName(), DoubleType.DOUBLE);
21
21
  builder.put(StringType.STRING.getName(), StringType.STRING);
22
22
  builder.put(TimestampType.TIMESTAMP.getName(), TimestampType.TIMESTAMP);
23
+ builder.put(JsonType.JSON.getName(), JsonType.JSON);
23
24
  stringToTypeMap = builder.build();
24
25
  }
25
26