embulk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
package org.embulk.spi;
|
|
2
|
+
|
|
3
|
+
import java.io.Serializable;
|
|
4
|
+
import java.util.Map;
|
|
5
|
+
import java.util.List;
|
|
6
|
+
import java.util.Arrays;
|
|
7
|
+
import java.util.ArrayList;
|
|
8
|
+
import java.util.Comparator;
|
|
9
|
+
import java.util.Collections;
|
|
10
|
+
import com.google.common.collect.BiMap;
|
|
11
|
+
import com.google.common.collect.HashBiMap;
|
|
12
|
+
import io.airlift.slice.Slice;
|
|
13
|
+
import io.airlift.slice.Slices;
|
|
14
|
+
import org.embulk.spi.time.Timestamp;
|
|
15
|
+
|
|
16
|
+
public class PageBuilder
|
|
17
|
+
implements AutoCloseable
|
|
18
|
+
{
|
|
19
|
+
private final BufferAllocator allocator;
|
|
20
|
+
private final PageOutput output;
|
|
21
|
+
private final Schema schema;
|
|
22
|
+
private final int[] columnOffsets;
|
|
23
|
+
private final int fixedRecordSize;
|
|
24
|
+
|
|
25
|
+
private Buffer buffer;
|
|
26
|
+
private Slice bufferSlice;
|
|
27
|
+
|
|
28
|
+
private int count;
|
|
29
|
+
private int position;
|
|
30
|
+
private final byte[] nullBitSet;
|
|
31
|
+
private final BiMap<String, Integer> stringReferences = HashBiMap.create();
|
|
32
|
+
private int stringReferenceSize;
|
|
33
|
+
private int nextVariableLengthDataOffset;
|
|
34
|
+
|
|
35
|
+
public PageBuilder(BufferAllocator allocator, Schema schema, PageOutput output)
|
|
36
|
+
{
|
|
37
|
+
this.allocator = allocator;
|
|
38
|
+
this.output = output;
|
|
39
|
+
this.schema = schema;
|
|
40
|
+
this.columnOffsets = PageFormat.columnOffsets(schema);
|
|
41
|
+
this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
|
|
42
|
+
this.fixedRecordSize = PageFormat.recordHeaderSize(schema) + PageFormat.totalColumnSize(schema);
|
|
43
|
+
this.nextVariableLengthDataOffset = fixedRecordSize;
|
|
44
|
+
newBuffer();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
private void newBuffer()
|
|
48
|
+
{
|
|
49
|
+
this.buffer = allocator.allocate(PageFormat.PAGE_HEADER_SIZE + fixedRecordSize);
|
|
50
|
+
this.bufferSlice = Slices.wrappedBuffer(buffer.array(), buffer.offset(), buffer.capacity());
|
|
51
|
+
this.count = 0;
|
|
52
|
+
this.position = PageFormat.PAGE_HEADER_SIZE;
|
|
53
|
+
this.stringReferences.clear();
|
|
54
|
+
this.stringReferenceSize = 0;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
public Schema getSchema()
|
|
58
|
+
{
|
|
59
|
+
return schema;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
public void setNull(Column column)
|
|
63
|
+
{
|
|
64
|
+
setNull(column.getIndex());
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
public void setNull(int columnIndex)
|
|
68
|
+
{
|
|
69
|
+
nullBitSet[columnIndex >>> 3] |= (1 << (columnIndex & 7));
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
public void setBoolean(Column column, boolean value)
|
|
73
|
+
{
|
|
74
|
+
// TODO check type?
|
|
75
|
+
setBoolean(column.getIndex(), value);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
public void setBoolean(int columnIndex, boolean value)
|
|
79
|
+
{
|
|
80
|
+
bufferSlice.setByte(getOffset(columnIndex), value ? (byte) 1 : (byte) 0);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
public void setLong(Column column, long value)
|
|
84
|
+
{
|
|
85
|
+
// TODO check type?
|
|
86
|
+
setLong(column.getIndex(), value);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
public void setLong(int columnIndex, long value)
|
|
90
|
+
{
|
|
91
|
+
bufferSlice.setLong(getOffset(columnIndex), value);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
public void setDouble(Column column, double value)
|
|
95
|
+
{
|
|
96
|
+
// TODO check type?
|
|
97
|
+
setDouble(column.getIndex(), value);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
public void setDouble(int columnIndex, double value)
|
|
101
|
+
{
|
|
102
|
+
bufferSlice.setDouble(getOffset(columnIndex), value);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
public void setString(Column column, String value)
|
|
106
|
+
{
|
|
107
|
+
// TODO check type?
|
|
108
|
+
setString(column.getIndex(), value);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
public void setString(int columnIndex, String value)
|
|
112
|
+
{
|
|
113
|
+
Integer reuseIndex = stringReferences.get(value);
|
|
114
|
+
if (reuseIndex != null) {
|
|
115
|
+
bufferSlice.setInt(getOffset(columnIndex), reuseIndex);
|
|
116
|
+
} else {
|
|
117
|
+
int index = stringReferences.size();
|
|
118
|
+
stringReferences.put(value, index);
|
|
119
|
+
bufferSlice.setInt(getOffset(columnIndex), index);
|
|
120
|
+
stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
public void setTimestamp(Column column, Timestamp value)
|
|
125
|
+
{
|
|
126
|
+
// TODO check type?
|
|
127
|
+
setTimestamp(column.getIndex(), value);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
public void setTimestamp(int columnIndex, Timestamp value)
|
|
131
|
+
{
|
|
132
|
+
int offset = getOffset(columnIndex);
|
|
133
|
+
bufferSlice.setLong(offset, value.getEpochSecond());
|
|
134
|
+
bufferSlice.setInt(offset + 8, value.getNano());
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
private int getOffset(int columnIndex)
|
|
138
|
+
{
|
|
139
|
+
return position + columnOffsets[columnIndex];
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
private static class StringReferenceSortComparator
|
|
143
|
+
implements Comparator<Map.Entry<String, Integer>>, Serializable
|
|
144
|
+
{
|
|
145
|
+
@Override
|
|
146
|
+
public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2)
|
|
147
|
+
{
|
|
148
|
+
return e1.getValue().compareTo(e2.getValue());
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
@Override
|
|
152
|
+
public boolean equals(Object obj)
|
|
153
|
+
{
|
|
154
|
+
return obj instanceof StringReferenceSortComparator;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
private List<String> getSortedStringReferences()
|
|
159
|
+
{
|
|
160
|
+
ArrayList<Map.Entry<String, Integer>> s = new ArrayList<>(stringReferences.entrySet());
|
|
161
|
+
Collections.sort(s, new StringReferenceSortComparator());
|
|
162
|
+
String[] array = new String[s.size()];
|
|
163
|
+
for (int i=0; i < array.length; i++) {
|
|
164
|
+
array[i] = s.get(i).getKey();
|
|
165
|
+
}
|
|
166
|
+
return Arrays.asList(array);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
public void addRecord()
|
|
170
|
+
{
|
|
171
|
+
// record header
|
|
172
|
+
bufferSlice.setInt(position, nextVariableLengthDataOffset); // nextVariableLengthDataOffset means record size
|
|
173
|
+
bufferSlice.setBytes(position + 4, nullBitSet);
|
|
174
|
+
count++;
|
|
175
|
+
|
|
176
|
+
this.position += nextVariableLengthDataOffset;
|
|
177
|
+
this.nextVariableLengthDataOffset = fixedRecordSize;
|
|
178
|
+
Arrays.fill(nullBitSet, (byte) 0);
|
|
179
|
+
|
|
180
|
+
// flush if next record will not fit in this buffer
|
|
181
|
+
if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) {
|
|
182
|
+
flush();
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
private void doFlush()
|
|
187
|
+
{
|
|
188
|
+
if (buffer != null && count > 0) {
|
|
189
|
+
// write page header
|
|
190
|
+
bufferSlice.setInt(0, count);
|
|
191
|
+
buffer.limit(position);
|
|
192
|
+
|
|
193
|
+
// flush page
|
|
194
|
+
Page page = Page.wrap(buffer).setStringReferences(getSortedStringReferences());
|
|
195
|
+
buffer = null;
|
|
196
|
+
bufferSlice = null;
|
|
197
|
+
output.add(page);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
public void flush()
|
|
202
|
+
{
|
|
203
|
+
doFlush();
|
|
204
|
+
if (buffer == null) {
|
|
205
|
+
newBuffer();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
public void finish()
|
|
210
|
+
{
|
|
211
|
+
doFlush();
|
|
212
|
+
output.finish();
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
@Override
|
|
216
|
+
public void close()
|
|
217
|
+
{
|
|
218
|
+
if (buffer != null) {
|
|
219
|
+
buffer.release();
|
|
220
|
+
buffer = null;
|
|
221
|
+
bufferSlice = null;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/* TODO for variable-length types
|
|
226
|
+
private void flushAndTakeOverRemaingData()
|
|
227
|
+
{
|
|
228
|
+
if (page != null) {
|
|
229
|
+
// page header
|
|
230
|
+
page.setInt(0, count);
|
|
231
|
+
|
|
232
|
+
Page lastPage = page;
|
|
233
|
+
|
|
234
|
+
this.page = allocator.allocatePage(Page.PAGE_HEADER_SIZE + fixedRecordSize + nextVariableLengthDataOffset);
|
|
235
|
+
page.setBytes(Page.PAGE_HEADER_SIZE, lastPage, position, nextVariableLengthDataOffset);
|
|
236
|
+
this.count = 0;
|
|
237
|
+
this.position = Page.PAGE_HEADER_SIZE;
|
|
238
|
+
|
|
239
|
+
output.add(lastPage);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
public int getVariableLengthDataOffset()
|
|
244
|
+
{
|
|
245
|
+
return nextVariableLengthDataOffset;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
public VariableLengthDataWriter setVariableLengthData(int columnIndex, int intData)
|
|
249
|
+
{
|
|
250
|
+
// Page.VARIABLE_LENGTH_COLUMN_SIZE is 4 bytes
|
|
251
|
+
page.setInt(position + columnOffsets[columnIndex], intData);
|
|
252
|
+
return new VariableLengthDataWriter(nextVariableLengthDataOffset);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
Page ensureVariableLengthDataCapacity(int requiredOffsetFromPosition)
|
|
256
|
+
{
|
|
257
|
+
if (page.capacity() < position + requiredOffsetFromPosition) {
|
|
258
|
+
flushAndTakeOverRemaingData();
|
|
259
|
+
}
|
|
260
|
+
return page;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
public class VariableLengthDataWriter
|
|
264
|
+
{
|
|
265
|
+
private int offsetFromPosition;
|
|
266
|
+
|
|
267
|
+
VariableLengthDataWriter(int offsetFromPosition)
|
|
268
|
+
{
|
|
269
|
+
this.offsetFromPosition = offsetFromPosition;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
public void writeByte(byte value)
|
|
273
|
+
{
|
|
274
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 1);
|
|
275
|
+
page.setByte(position + offsetFromPosition, value);
|
|
276
|
+
offsetFromPosition += 1;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
public void writeShort(short value)
|
|
280
|
+
{
|
|
281
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 2);
|
|
282
|
+
page.setShort(position + offsetFromPosition, value);
|
|
283
|
+
offsetFromPosition += 2;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
public void writeInt(int value)
|
|
287
|
+
{
|
|
288
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 4);
|
|
289
|
+
page.setInt(position + offsetFromPosition, value);
|
|
290
|
+
offsetFromPosition += 4;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
public void writeLong(long value)
|
|
294
|
+
{
|
|
295
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 8);
|
|
296
|
+
page.setLong(position + offsetFromPosition, value);
|
|
297
|
+
offsetFromPosition += 8;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
public void writeFloat(float value)
|
|
301
|
+
{
|
|
302
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 4);
|
|
303
|
+
page.setFloat(position + offsetFromPosition, value);
|
|
304
|
+
offsetFromPosition += 4;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
public void writeDouble(double value)
|
|
308
|
+
{
|
|
309
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + 8);
|
|
310
|
+
page.setDouble(position + offsetFromPosition, value);
|
|
311
|
+
offsetFromPosition += 8;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
public void writeBytes(byte[] data)
|
|
315
|
+
{
|
|
316
|
+
writeBytes(data, 0, data.length);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
public void writeBytes(byte[] data, int off, int len)
|
|
320
|
+
{
|
|
321
|
+
ensureVariableLengthDataCapacity(offsetFromPosition + len);
|
|
322
|
+
page.setBytes(position + offsetFromPosition, data, off, len);
|
|
323
|
+
offsetFromPosition += len;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
*/
|
|
327
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
package org.embulk.spi;
|
|
2
|
+
|
|
3
|
+
abstract class PageFormat
|
|
4
|
+
{
|
|
5
|
+
// PageHeader
|
|
6
|
+
// +---+
|
|
7
|
+
// | 4 |
|
|
8
|
+
// +---+
|
|
9
|
+
// count (number of records)
|
|
10
|
+
|
|
11
|
+
private PageFormat() { }
|
|
12
|
+
|
|
13
|
+
static final int PAGE_HEADER_SIZE = 4;
|
|
14
|
+
|
|
15
|
+
// PageBuilder.setVariableLengthData and PageReader.readVariableLengthData
|
|
16
|
+
// uses 4 bytes integer
|
|
17
|
+
static final int VARIABLE_LENGTH_COLUMN_SIZE = 4;
|
|
18
|
+
|
|
19
|
+
static int nullBitSetSize(Schema schema)
|
|
20
|
+
{
|
|
21
|
+
return (schema.size() + 7) / 8;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static int recordHeaderSize(Schema schema)
|
|
25
|
+
{
|
|
26
|
+
return 4 + nullBitSetSize(schema);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
static int totalColumnSize(Schema schema)
|
|
30
|
+
{
|
|
31
|
+
return recordHeaderSize(schema) + schema.getFixedStorageSize();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
static int[] columnOffsets(Schema schema)
|
|
35
|
+
{
|
|
36
|
+
int[] offsets = new int[schema.size()];
|
|
37
|
+
|
|
38
|
+
if (!schema.isEmpty()) {
|
|
39
|
+
offsets[0] = recordHeaderSize(schema);
|
|
40
|
+
for (int i=0; i < schema.size()-1; i++) {
|
|
41
|
+
offsets[i+1] = offsets[i] + schema.getColumnType(i).getFixedStorageSize();
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return offsets;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
package org.embulk.spi;
|
|
2
|
+
|
|
3
|
+
import java.util.Iterator;
|
|
4
|
+
import io.airlift.slice.Slice;
|
|
5
|
+
import io.airlift.slice.Slices;
|
|
6
|
+
import org.embulk.spi.time.Timestamp;
|
|
7
|
+
|
|
8
|
+
public class PageReader
|
|
9
|
+
implements AutoCloseable
|
|
10
|
+
{
|
|
11
|
+
private final Schema schema;
|
|
12
|
+
private final int[] columnOffsets;
|
|
13
|
+
|
|
14
|
+
private Page page = SENTINEL;
|
|
15
|
+
private Slice pageSlice = null;
|
|
16
|
+
private int pageRecordCount = 0;
|
|
17
|
+
|
|
18
|
+
private int readCount = 0;
|
|
19
|
+
private int position;
|
|
20
|
+
private final byte[] nullBitSet;
|
|
21
|
+
|
|
22
|
+
private static final Page SENTINEL = Page.wrap(Buffer.wrap(new byte[4])); // buffer().release() does nothing
|
|
23
|
+
|
|
24
|
+
public PageReader(Schema schema)
|
|
25
|
+
{
|
|
26
|
+
this.schema = schema;
|
|
27
|
+
this.columnOffsets = PageFormat.columnOffsets(schema);
|
|
28
|
+
this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
public static int getRecordCount(Page page)
|
|
32
|
+
{
|
|
33
|
+
Buffer pageBuffer = page.buffer();
|
|
34
|
+
Slice pageSlice = Slices.wrappedBuffer(pageBuffer.array(), pageBuffer.offset(), pageBuffer.limit());
|
|
35
|
+
return pageSlice.getInt(0); // see page format
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
public void setPage(Page page)
|
|
39
|
+
{
|
|
40
|
+
this.page.buffer().release();
|
|
41
|
+
this.page = SENTINEL;
|
|
42
|
+
|
|
43
|
+
Buffer pageBuffer = page.buffer();
|
|
44
|
+
Slice pageSlice = Slices.wrappedBuffer(pageBuffer.array(), pageBuffer.offset(), pageBuffer.limit());
|
|
45
|
+
|
|
46
|
+
pageRecordCount = pageSlice.getInt(0); // see page format
|
|
47
|
+
readCount = 0;
|
|
48
|
+
position = PageFormat.PAGE_HEADER_SIZE;
|
|
49
|
+
|
|
50
|
+
this.page = page;
|
|
51
|
+
this.pageSlice = pageSlice;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
public Schema getSchema()
|
|
55
|
+
{
|
|
56
|
+
return schema;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
public boolean isNull(Column column)
|
|
60
|
+
{
|
|
61
|
+
return isNull(column.getIndex());
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
public boolean isNull(int columnIndex)
|
|
65
|
+
{
|
|
66
|
+
return (nullBitSet[columnIndex >>> 3] & (1 << (columnIndex & 7))) != 0;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
public boolean getBoolean(Column column)
|
|
70
|
+
{
|
|
71
|
+
// TODO check type?
|
|
72
|
+
return getBoolean(column.getIndex());
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
public boolean getBoolean(int columnIndex)
|
|
76
|
+
{
|
|
77
|
+
return pageSlice.getByte(getOffset(columnIndex)) != (byte) 0;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
public long getLong(Column column)
|
|
81
|
+
{
|
|
82
|
+
// TODO check type?
|
|
83
|
+
return getLong(column.getIndex());
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
public long getLong(int columnIndex)
|
|
87
|
+
{
|
|
88
|
+
return pageSlice.getLong(getOffset(columnIndex));
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
public double getDouble(Column column)
|
|
92
|
+
{
|
|
93
|
+
// TODO check type?
|
|
94
|
+
return getDouble(column.getIndex());
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
public double getDouble(int columnIndex)
|
|
98
|
+
{
|
|
99
|
+
return pageSlice.getDouble(getOffset(columnIndex));
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
public String getString(Column column)
|
|
103
|
+
{
|
|
104
|
+
// TODO check type?
|
|
105
|
+
return getString(column.getIndex());
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
public String getString(int columnIndex)
|
|
109
|
+
{
|
|
110
|
+
int index = pageSlice.getInt(getOffset(columnIndex));
|
|
111
|
+
return page.getStringReference(index);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
public Timestamp getTimestamp(Column column)
|
|
115
|
+
{
|
|
116
|
+
// TODO check type?
|
|
117
|
+
return getTimestamp(column.getIndex());
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
public Timestamp getTimestamp(int columnIndex)
|
|
121
|
+
{
|
|
122
|
+
int offset = getOffset(columnIndex);
|
|
123
|
+
long sec = pageSlice.getLong(offset);
|
|
124
|
+
int nsec = pageSlice.getInt(offset + 8);
|
|
125
|
+
return Timestamp.ofEpochSecond(sec, nsec);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private int getOffset(int columnIndex)
|
|
129
|
+
{
|
|
130
|
+
return position + columnOffsets[columnIndex];
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
public boolean nextRecord()
|
|
134
|
+
{
|
|
135
|
+
if (pageRecordCount <= readCount) {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (readCount > 0) {
|
|
140
|
+
// advance position excepting the first record
|
|
141
|
+
int lastRecordSize = pageSlice.getInt(position);
|
|
142
|
+
position += lastRecordSize;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
readCount++;
|
|
146
|
+
pageSlice.getBytes(position + 4, nullBitSet, 0, nullBitSet.length);
|
|
147
|
+
|
|
148
|
+
return true;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
@Override
|
|
152
|
+
public void close()
|
|
153
|
+
{
|
|
154
|
+
page.buffer().release();
|
|
155
|
+
page = SENTINEL;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/* TODO for variable-length types
|
|
159
|
+
public VariableLengthDataReader getVariableLengthData(int columnIndex, int variableLengthDataOffset)
|
|
160
|
+
{
|
|
161
|
+
return new VariableLengthDataReader(variableLengthDataOffset);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
public class VariableLengthDataReader
|
|
165
|
+
{
|
|
166
|
+
private int offsetFromPosition;
|
|
167
|
+
|
|
168
|
+
VariableLengthDataReader(int offsetFromPosition)
|
|
169
|
+
{
|
|
170
|
+
this.offsetFromPosition = offsetFromPosition;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
public byte readByte()
|
|
174
|
+
{
|
|
175
|
+
byte value = page.getByte(position + offsetFromPosition);
|
|
176
|
+
offsetFromPosition += 1;
|
|
177
|
+
return value;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
public short readShort()
|
|
181
|
+
{
|
|
182
|
+
short value = page.getShort(position + offsetFromPosition);
|
|
183
|
+
offsetFromPosition += 2;
|
|
184
|
+
return value;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
public int readInt()
|
|
188
|
+
{
|
|
189
|
+
int value = page.getInt(position + offsetFromPosition);
|
|
190
|
+
offsetFromPosition += 4;
|
|
191
|
+
return value;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
public long readLong()
|
|
195
|
+
{
|
|
196
|
+
long value = page.getLong(position + offsetFromPosition);
|
|
197
|
+
offsetFromPosition += 8;
|
|
198
|
+
return value;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
public float readFloat()
|
|
202
|
+
{
|
|
203
|
+
float value = page.getFloat(position + offsetFromPosition);
|
|
204
|
+
offsetFromPosition += 4;
|
|
205
|
+
return value;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
public double readDouble()
|
|
209
|
+
{
|
|
210
|
+
double value = page.getDouble(position + offsetFromPosition);
|
|
211
|
+
offsetFromPosition += 8;
|
|
212
|
+
return value;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
public void readBytes(byte[] data)
|
|
216
|
+
{
|
|
217
|
+
readBytes(data, 0, data.length);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
public void readBytes(byte[] data, int off, int len)
|
|
221
|
+
{
|
|
222
|
+
page.getBytes(position + offsetFromPosition, data, off, len);
|
|
223
|
+
offsetFromPosition += len;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
*/
|
|
227
|
+
}
|