embulk 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
@@ -0,0 +1,327 @@
|
|
1
|
+
package org.embulk.spi;
|
2
|
+
|
3
|
+
import java.io.Serializable;
|
4
|
+
import java.util.Map;
|
5
|
+
import java.util.List;
|
6
|
+
import java.util.Arrays;
|
7
|
+
import java.util.ArrayList;
|
8
|
+
import java.util.Comparator;
|
9
|
+
import java.util.Collections;
|
10
|
+
import com.google.common.collect.BiMap;
|
11
|
+
import com.google.common.collect.HashBiMap;
|
12
|
+
import io.airlift.slice.Slice;
|
13
|
+
import io.airlift.slice.Slices;
|
14
|
+
import org.embulk.spi.time.Timestamp;
|
15
|
+
|
16
|
+
public class PageBuilder
|
17
|
+
implements AutoCloseable
|
18
|
+
{
|
19
|
+
private final BufferAllocator allocator;
|
20
|
+
private final PageOutput output;
|
21
|
+
private final Schema schema;
|
22
|
+
private final int[] columnOffsets;
|
23
|
+
private final int fixedRecordSize;
|
24
|
+
|
25
|
+
private Buffer buffer;
|
26
|
+
private Slice bufferSlice;
|
27
|
+
|
28
|
+
private int count;
|
29
|
+
private int position;
|
30
|
+
private final byte[] nullBitSet;
|
31
|
+
private final BiMap<String, Integer> stringReferences = HashBiMap.create();
|
32
|
+
private int stringReferenceSize;
|
33
|
+
private int nextVariableLengthDataOffset;
|
34
|
+
|
35
|
+
public PageBuilder(BufferAllocator allocator, Schema schema, PageOutput output)
|
36
|
+
{
|
37
|
+
this.allocator = allocator;
|
38
|
+
this.output = output;
|
39
|
+
this.schema = schema;
|
40
|
+
this.columnOffsets = PageFormat.columnOffsets(schema);
|
41
|
+
this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
|
42
|
+
this.fixedRecordSize = PageFormat.recordHeaderSize(schema) + PageFormat.totalColumnSize(schema);
|
43
|
+
this.nextVariableLengthDataOffset = fixedRecordSize;
|
44
|
+
newBuffer();
|
45
|
+
}
|
46
|
+
|
47
|
+
private void newBuffer()
|
48
|
+
{
|
49
|
+
this.buffer = allocator.allocate(PageFormat.PAGE_HEADER_SIZE + fixedRecordSize);
|
50
|
+
this.bufferSlice = Slices.wrappedBuffer(buffer.array(), buffer.offset(), buffer.capacity());
|
51
|
+
this.count = 0;
|
52
|
+
this.position = PageFormat.PAGE_HEADER_SIZE;
|
53
|
+
this.stringReferences.clear();
|
54
|
+
this.stringReferenceSize = 0;
|
55
|
+
}
|
56
|
+
|
57
|
+
public Schema getSchema()
|
58
|
+
{
|
59
|
+
return schema;
|
60
|
+
}
|
61
|
+
|
62
|
+
public void setNull(Column column)
|
63
|
+
{
|
64
|
+
setNull(column.getIndex());
|
65
|
+
}
|
66
|
+
|
67
|
+
public void setNull(int columnIndex)
|
68
|
+
{
|
69
|
+
nullBitSet[columnIndex >>> 3] |= (1 << (columnIndex & 7));
|
70
|
+
}
|
71
|
+
|
72
|
+
public void setBoolean(Column column, boolean value)
|
73
|
+
{
|
74
|
+
// TODO check type?
|
75
|
+
setBoolean(column.getIndex(), value);
|
76
|
+
}
|
77
|
+
|
78
|
+
public void setBoolean(int columnIndex, boolean value)
|
79
|
+
{
|
80
|
+
bufferSlice.setByte(getOffset(columnIndex), value ? (byte) 1 : (byte) 0);
|
81
|
+
}
|
82
|
+
|
83
|
+
public void setLong(Column column, long value)
|
84
|
+
{
|
85
|
+
// TODO check type?
|
86
|
+
setLong(column.getIndex(), value);
|
87
|
+
}
|
88
|
+
|
89
|
+
public void setLong(int columnIndex, long value)
|
90
|
+
{
|
91
|
+
bufferSlice.setLong(getOffset(columnIndex), value);
|
92
|
+
}
|
93
|
+
|
94
|
+
public void setDouble(Column column, double value)
|
95
|
+
{
|
96
|
+
// TODO check type?
|
97
|
+
setDouble(column.getIndex(), value);
|
98
|
+
}
|
99
|
+
|
100
|
+
public void setDouble(int columnIndex, double value)
|
101
|
+
{
|
102
|
+
bufferSlice.setDouble(getOffset(columnIndex), value);
|
103
|
+
}
|
104
|
+
|
105
|
+
public void setString(Column column, String value)
|
106
|
+
{
|
107
|
+
// TODO check type?
|
108
|
+
setString(column.getIndex(), value);
|
109
|
+
}
|
110
|
+
|
111
|
+
public void setString(int columnIndex, String value)
|
112
|
+
{
|
113
|
+
Integer reuseIndex = stringReferences.get(value);
|
114
|
+
if (reuseIndex != null) {
|
115
|
+
bufferSlice.setInt(getOffset(columnIndex), reuseIndex);
|
116
|
+
} else {
|
117
|
+
int index = stringReferences.size();
|
118
|
+
stringReferences.put(value, index);
|
119
|
+
bufferSlice.setInt(getOffset(columnIndex), index);
|
120
|
+
stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
public void setTimestamp(Column column, Timestamp value)
|
125
|
+
{
|
126
|
+
// TODO check type?
|
127
|
+
setTimestamp(column.getIndex(), value);
|
128
|
+
}
|
129
|
+
|
130
|
+
public void setTimestamp(int columnIndex, Timestamp value)
|
131
|
+
{
|
132
|
+
int offset = getOffset(columnIndex);
|
133
|
+
bufferSlice.setLong(offset, value.getEpochSecond());
|
134
|
+
bufferSlice.setInt(offset + 8, value.getNano());
|
135
|
+
}
|
136
|
+
|
137
|
+
private int getOffset(int columnIndex)
|
138
|
+
{
|
139
|
+
return position + columnOffsets[columnIndex];
|
140
|
+
}
|
141
|
+
|
142
|
+
private static class StringReferenceSortComparator
|
143
|
+
implements Comparator<Map.Entry<String, Integer>>, Serializable
|
144
|
+
{
|
145
|
+
@Override
|
146
|
+
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2)
|
147
|
+
{
|
148
|
+
return e1.getValue().compareTo(e2.getValue());
|
149
|
+
}
|
150
|
+
|
151
|
+
@Override
|
152
|
+
public boolean equals(Object obj)
|
153
|
+
{
|
154
|
+
return obj instanceof StringReferenceSortComparator;
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
private List<String> getSortedStringReferences()
|
159
|
+
{
|
160
|
+
ArrayList<Map.Entry<String, Integer>> s = new ArrayList<>(stringReferences.entrySet());
|
161
|
+
Collections.sort(s, new StringReferenceSortComparator());
|
162
|
+
String[] array = new String[s.size()];
|
163
|
+
for (int i=0; i < array.length; i++) {
|
164
|
+
array[i] = s.get(i).getKey();
|
165
|
+
}
|
166
|
+
return Arrays.asList(array);
|
167
|
+
}
|
168
|
+
|
169
|
+
public void addRecord()
|
170
|
+
{
|
171
|
+
// record header
|
172
|
+
bufferSlice.setInt(position, nextVariableLengthDataOffset); // nextVariableLengthDataOffset means record size
|
173
|
+
bufferSlice.setBytes(position + 4, nullBitSet);
|
174
|
+
count++;
|
175
|
+
|
176
|
+
this.position += nextVariableLengthDataOffset;
|
177
|
+
this.nextVariableLengthDataOffset = fixedRecordSize;
|
178
|
+
Arrays.fill(nullBitSet, (byte) 0);
|
179
|
+
|
180
|
+
// flush if next record will not fit in this buffer
|
181
|
+
if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) {
|
182
|
+
flush();
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
private void doFlush()
|
187
|
+
{
|
188
|
+
if (buffer != null && count > 0) {
|
189
|
+
// write page header
|
190
|
+
bufferSlice.setInt(0, count);
|
191
|
+
buffer.limit(position);
|
192
|
+
|
193
|
+
// flush page
|
194
|
+
Page page = Page.wrap(buffer).setStringReferences(getSortedStringReferences());
|
195
|
+
buffer = null;
|
196
|
+
bufferSlice = null;
|
197
|
+
output.add(page);
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
public void flush()
|
202
|
+
{
|
203
|
+
doFlush();
|
204
|
+
if (buffer == null) {
|
205
|
+
newBuffer();
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
public void finish()
|
210
|
+
{
|
211
|
+
doFlush();
|
212
|
+
output.finish();
|
213
|
+
}
|
214
|
+
|
215
|
+
@Override
|
216
|
+
public void close()
|
217
|
+
{
|
218
|
+
if (buffer != null) {
|
219
|
+
buffer.release();
|
220
|
+
buffer = null;
|
221
|
+
bufferSlice = null;
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
225
|
+
/* TODO for variable-length types
|
226
|
+
private void flushAndTakeOverRemaingData()
|
227
|
+
{
|
228
|
+
if (page != null) {
|
229
|
+
// page header
|
230
|
+
page.setInt(0, count);
|
231
|
+
|
232
|
+
Page lastPage = page;
|
233
|
+
|
234
|
+
this.page = allocator.allocatePage(Page.PAGE_HEADER_SIZE + fixedRecordSize + nextVariableLengthDataOffset);
|
235
|
+
page.setBytes(Page.PAGE_HEADER_SIZE, lastPage, position, nextVariableLengthDataOffset);
|
236
|
+
this.count = 0;
|
237
|
+
this.position = Page.PAGE_HEADER_SIZE;
|
238
|
+
|
239
|
+
output.add(lastPage);
|
240
|
+
}
|
241
|
+
}
|
242
|
+
|
243
|
+
public int getVariableLengthDataOffset()
|
244
|
+
{
|
245
|
+
return nextVariableLengthDataOffset;
|
246
|
+
}
|
247
|
+
|
248
|
+
public VariableLengthDataWriter setVariableLengthData(int columnIndex, int intData)
|
249
|
+
{
|
250
|
+
// Page.VARIABLE_LENGTH_COLUMN_SIZE is 4 bytes
|
251
|
+
page.setInt(position + columnOffsets[columnIndex], intData);
|
252
|
+
return new VariableLengthDataWriter(nextVariableLengthDataOffset);
|
253
|
+
}
|
254
|
+
|
255
|
+
Page ensureVariableLengthDataCapacity(int requiredOffsetFromPosition)
|
256
|
+
{
|
257
|
+
if (page.capacity() < position + requiredOffsetFromPosition) {
|
258
|
+
flushAndTakeOverRemaingData();
|
259
|
+
}
|
260
|
+
return page;
|
261
|
+
}
|
262
|
+
|
263
|
+
public class VariableLengthDataWriter
|
264
|
+
{
|
265
|
+
private int offsetFromPosition;
|
266
|
+
|
267
|
+
VariableLengthDataWriter(int offsetFromPosition)
|
268
|
+
{
|
269
|
+
this.offsetFromPosition = offsetFromPosition;
|
270
|
+
}
|
271
|
+
|
272
|
+
public void writeByte(byte value)
|
273
|
+
{
|
274
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 1);
|
275
|
+
page.setByte(position + offsetFromPosition, value);
|
276
|
+
offsetFromPosition += 1;
|
277
|
+
}
|
278
|
+
|
279
|
+
public void writeShort(short value)
|
280
|
+
{
|
281
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 2);
|
282
|
+
page.setShort(position + offsetFromPosition, value);
|
283
|
+
offsetFromPosition += 2;
|
284
|
+
}
|
285
|
+
|
286
|
+
public void writeInt(int value)
|
287
|
+
{
|
288
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 4);
|
289
|
+
page.setInt(position + offsetFromPosition, value);
|
290
|
+
offsetFromPosition += 4;
|
291
|
+
}
|
292
|
+
|
293
|
+
public void writeLong(long value)
|
294
|
+
{
|
295
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 8);
|
296
|
+
page.setLong(position + offsetFromPosition, value);
|
297
|
+
offsetFromPosition += 8;
|
298
|
+
}
|
299
|
+
|
300
|
+
public void writeFloat(float value)
|
301
|
+
{
|
302
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 4);
|
303
|
+
page.setFloat(position + offsetFromPosition, value);
|
304
|
+
offsetFromPosition += 4;
|
305
|
+
}
|
306
|
+
|
307
|
+
public void writeDouble(double value)
|
308
|
+
{
|
309
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 8);
|
310
|
+
page.setDouble(position + offsetFromPosition, value);
|
311
|
+
offsetFromPosition += 8;
|
312
|
+
}
|
313
|
+
|
314
|
+
public void writeBytes(byte[] data)
|
315
|
+
{
|
316
|
+
writeBytes(data, 0, data.length);
|
317
|
+
}
|
318
|
+
|
319
|
+
public void writeBytes(byte[] data, int off, int len)
|
320
|
+
{
|
321
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + len);
|
322
|
+
page.setBytes(position + offsetFromPosition, data, off, len);
|
323
|
+
offsetFromPosition += len;
|
324
|
+
}
|
325
|
+
}
|
326
|
+
*/
|
327
|
+
}
|
@@ -0,0 +1,47 @@
|
|
1
|
+
package org.embulk.spi;
|
2
|
+
|
3
|
+
abstract class PageFormat
|
4
|
+
{
|
5
|
+
// PageHeader
|
6
|
+
// +---+
|
7
|
+
// | 4 |
|
8
|
+
// +---+
|
9
|
+
// count (number of records)
|
10
|
+
|
11
|
+
private PageFormat() { }
|
12
|
+
|
13
|
+
static final int PAGE_HEADER_SIZE = 4;
|
14
|
+
|
15
|
+
// PageBuilder.setVariableLengthData and PageReader.readVariableLengthData
|
16
|
+
// uses 4 bytes integer
|
17
|
+
static final int VARIABLE_LENGTH_COLUMN_SIZE = 4;
|
18
|
+
|
19
|
+
static int nullBitSetSize(Schema schema)
|
20
|
+
{
|
21
|
+
return (schema.size() + 7) / 8;
|
22
|
+
}
|
23
|
+
|
24
|
+
static int recordHeaderSize(Schema schema)
|
25
|
+
{
|
26
|
+
return 4 + nullBitSetSize(schema);
|
27
|
+
}
|
28
|
+
|
29
|
+
static int totalColumnSize(Schema schema)
|
30
|
+
{
|
31
|
+
return recordHeaderSize(schema) + schema.getFixedStorageSize();
|
32
|
+
}
|
33
|
+
|
34
|
+
static int[] columnOffsets(Schema schema)
|
35
|
+
{
|
36
|
+
int[] offsets = new int[schema.size()];
|
37
|
+
|
38
|
+
if (!schema.isEmpty()) {
|
39
|
+
offsets[0] = recordHeaderSize(schema);
|
40
|
+
for (int i=0; i < schema.size()-1; i++) {
|
41
|
+
offsets[i+1] = offsets[i] + schema.getColumnType(i).getFixedStorageSize();
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
return offsets;
|
46
|
+
}
|
47
|
+
}
|
@@ -0,0 +1,227 @@
|
|
1
|
+
package org.embulk.spi;
|
2
|
+
|
3
|
+
import java.util.Iterator;
|
4
|
+
import io.airlift.slice.Slice;
|
5
|
+
import io.airlift.slice.Slices;
|
6
|
+
import org.embulk.spi.time.Timestamp;
|
7
|
+
|
8
|
+
public class PageReader
|
9
|
+
implements AutoCloseable
|
10
|
+
{
|
11
|
+
private final Schema schema;
|
12
|
+
private final int[] columnOffsets;
|
13
|
+
|
14
|
+
private Page page = SENTINEL;
|
15
|
+
private Slice pageSlice = null;
|
16
|
+
private int pageRecordCount = 0;
|
17
|
+
|
18
|
+
private int readCount = 0;
|
19
|
+
private int position;
|
20
|
+
private final byte[] nullBitSet;
|
21
|
+
|
22
|
+
private static final Page SENTINEL = Page.wrap(Buffer.wrap(new byte[4])); // buffer().release() does nothing
|
23
|
+
|
24
|
+
public PageReader(Schema schema)
|
25
|
+
{
|
26
|
+
this.schema = schema;
|
27
|
+
this.columnOffsets = PageFormat.columnOffsets(schema);
|
28
|
+
this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
|
29
|
+
}
|
30
|
+
|
31
|
+
public static int getRecordCount(Page page)
|
32
|
+
{
|
33
|
+
Buffer pageBuffer = page.buffer();
|
34
|
+
Slice pageSlice = Slices.wrappedBuffer(pageBuffer.array(), pageBuffer.offset(), pageBuffer.limit());
|
35
|
+
return pageSlice.getInt(0); // see page format
|
36
|
+
}
|
37
|
+
|
38
|
+
public void setPage(Page page)
|
39
|
+
{
|
40
|
+
this.page.buffer().release();
|
41
|
+
this.page = SENTINEL;
|
42
|
+
|
43
|
+
Buffer pageBuffer = page.buffer();
|
44
|
+
Slice pageSlice = Slices.wrappedBuffer(pageBuffer.array(), pageBuffer.offset(), pageBuffer.limit());
|
45
|
+
|
46
|
+
pageRecordCount = pageSlice.getInt(0); // see page format
|
47
|
+
readCount = 0;
|
48
|
+
position = PageFormat.PAGE_HEADER_SIZE;
|
49
|
+
|
50
|
+
this.page = page;
|
51
|
+
this.pageSlice = pageSlice;
|
52
|
+
}
|
53
|
+
|
54
|
+
public Schema getSchema()
|
55
|
+
{
|
56
|
+
return schema;
|
57
|
+
}
|
58
|
+
|
59
|
+
public boolean isNull(Column column)
|
60
|
+
{
|
61
|
+
return isNull(column.getIndex());
|
62
|
+
}
|
63
|
+
|
64
|
+
public boolean isNull(int columnIndex)
|
65
|
+
{
|
66
|
+
return (nullBitSet[columnIndex >>> 3] & (1 << (columnIndex & 7))) != 0;
|
67
|
+
}
|
68
|
+
|
69
|
+
public boolean getBoolean(Column column)
|
70
|
+
{
|
71
|
+
// TODO check type?
|
72
|
+
return getBoolean(column.getIndex());
|
73
|
+
}
|
74
|
+
|
75
|
+
public boolean getBoolean(int columnIndex)
|
76
|
+
{
|
77
|
+
return pageSlice.getByte(getOffset(columnIndex)) != (byte) 0;
|
78
|
+
}
|
79
|
+
|
80
|
+
public long getLong(Column column)
|
81
|
+
{
|
82
|
+
// TODO check type?
|
83
|
+
return getLong(column.getIndex());
|
84
|
+
}
|
85
|
+
|
86
|
+
public long getLong(int columnIndex)
|
87
|
+
{
|
88
|
+
return pageSlice.getLong(getOffset(columnIndex));
|
89
|
+
}
|
90
|
+
|
91
|
+
public double getDouble(Column column)
|
92
|
+
{
|
93
|
+
// TODO check type?
|
94
|
+
return getDouble(column.getIndex());
|
95
|
+
}
|
96
|
+
|
97
|
+
public double getDouble(int columnIndex)
|
98
|
+
{
|
99
|
+
return pageSlice.getDouble(getOffset(columnIndex));
|
100
|
+
}
|
101
|
+
|
102
|
+
public String getString(Column column)
|
103
|
+
{
|
104
|
+
// TODO check type?
|
105
|
+
return getString(column.getIndex());
|
106
|
+
}
|
107
|
+
|
108
|
+
public String getString(int columnIndex)
|
109
|
+
{
|
110
|
+
int index = pageSlice.getInt(getOffset(columnIndex));
|
111
|
+
return page.getStringReference(index);
|
112
|
+
}
|
113
|
+
|
114
|
+
public Timestamp getTimestamp(Column column)
|
115
|
+
{
|
116
|
+
// TODO check type?
|
117
|
+
return getTimestamp(column.getIndex());
|
118
|
+
}
|
119
|
+
|
120
|
+
public Timestamp getTimestamp(int columnIndex)
|
121
|
+
{
|
122
|
+
int offset = getOffset(columnIndex);
|
123
|
+
long sec = pageSlice.getLong(offset);
|
124
|
+
int nsec = pageSlice.getInt(offset + 8);
|
125
|
+
return Timestamp.ofEpochSecond(sec, nsec);
|
126
|
+
}
|
127
|
+
|
128
|
+
private int getOffset(int columnIndex)
|
129
|
+
{
|
130
|
+
return position + columnOffsets[columnIndex];
|
131
|
+
}
|
132
|
+
|
133
|
+
public boolean nextRecord()
|
134
|
+
{
|
135
|
+
if (pageRecordCount <= readCount) {
|
136
|
+
return false;
|
137
|
+
}
|
138
|
+
|
139
|
+
if (readCount > 0) {
|
140
|
+
// advance position excepting the first record
|
141
|
+
int lastRecordSize = pageSlice.getInt(position);
|
142
|
+
position += lastRecordSize;
|
143
|
+
}
|
144
|
+
|
145
|
+
readCount++;
|
146
|
+
pageSlice.getBytes(position + 4, nullBitSet, 0, nullBitSet.length);
|
147
|
+
|
148
|
+
return true;
|
149
|
+
}
|
150
|
+
|
151
|
+
@Override
|
152
|
+
public void close()
|
153
|
+
{
|
154
|
+
page.buffer().release();
|
155
|
+
page = SENTINEL;
|
156
|
+
}
|
157
|
+
|
158
|
+
/* TODO for variable-length types
|
159
|
+
public VariableLengthDataReader getVariableLengthData(int columnIndex, int variableLengthDataOffset)
|
160
|
+
{
|
161
|
+
return new VariableLengthDataReader(variableLengthDataOffset);
|
162
|
+
}
|
163
|
+
|
164
|
+
public class VariableLengthDataReader
|
165
|
+
{
|
166
|
+
private int offsetFromPosition;
|
167
|
+
|
168
|
+
VariableLengthDataReader(int offsetFromPosition)
|
169
|
+
{
|
170
|
+
this.offsetFromPosition = offsetFromPosition;
|
171
|
+
}
|
172
|
+
|
173
|
+
public byte readByte()
|
174
|
+
{
|
175
|
+
byte value = page.getByte(position + offsetFromPosition);
|
176
|
+
offsetFromPosition += 1;
|
177
|
+
return value;
|
178
|
+
}
|
179
|
+
|
180
|
+
public short readShort()
|
181
|
+
{
|
182
|
+
short value = page.getShort(position + offsetFromPosition);
|
183
|
+
offsetFromPosition += 2;
|
184
|
+
return value;
|
185
|
+
}
|
186
|
+
|
187
|
+
public int readInt()
|
188
|
+
{
|
189
|
+
int value = page.getInt(position + offsetFromPosition);
|
190
|
+
offsetFromPosition += 4;
|
191
|
+
return value;
|
192
|
+
}
|
193
|
+
|
194
|
+
public long readLong()
|
195
|
+
{
|
196
|
+
long value = page.getLong(position + offsetFromPosition);
|
197
|
+
offsetFromPosition += 8;
|
198
|
+
return value;
|
199
|
+
}
|
200
|
+
|
201
|
+
public float readFloat()
|
202
|
+
{
|
203
|
+
float value = page.getFloat(position + offsetFromPosition);
|
204
|
+
offsetFromPosition += 4;
|
205
|
+
return value;
|
206
|
+
}
|
207
|
+
|
208
|
+
public double readDouble()
|
209
|
+
{
|
210
|
+
double value = page.getDouble(position + offsetFromPosition);
|
211
|
+
offsetFromPosition += 8;
|
212
|
+
return value;
|
213
|
+
}
|
214
|
+
|
215
|
+
public void readBytes(byte[] data)
|
216
|
+
{
|
217
|
+
readBytes(data, 0, data.length);
|
218
|
+
}
|
219
|
+
|
220
|
+
public void readBytes(byte[] data, int off, int len)
|
221
|
+
{
|
222
|
+
page.getBytes(position + offsetFromPosition, data, off, len);
|
223
|
+
offsetFromPosition += len;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
*/
|
227
|
+
}
|