embulk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,327 @@
1
+ package org.embulk.spi;
2
+
3
+ import java.io.Serializable;
4
+ import java.util.Map;
5
+ import java.util.List;
6
+ import java.util.Arrays;
7
+ import java.util.ArrayList;
8
+ import java.util.Comparator;
9
+ import java.util.Collections;
10
+ import com.google.common.collect.BiMap;
11
+ import com.google.common.collect.HashBiMap;
12
+ import io.airlift.slice.Slice;
13
+ import io.airlift.slice.Slices;
14
+ import org.embulk.spi.time.Timestamp;
15
+
16
+ public class PageBuilder
17
+ implements AutoCloseable
18
+ {
19
+ private final BufferAllocator allocator;
20
+ private final PageOutput output;
21
+ private final Schema schema;
22
+ private final int[] columnOffsets;
23
+ private final int fixedRecordSize;
24
+
25
+ private Buffer buffer;
26
+ private Slice bufferSlice;
27
+
28
+ private int count;
29
+ private int position;
30
+ private final byte[] nullBitSet;
31
+ private final BiMap<String, Integer> stringReferences = HashBiMap.create();
32
+ private int stringReferenceSize;
33
+ private int nextVariableLengthDataOffset;
34
+
35
+ public PageBuilder(BufferAllocator allocator, Schema schema, PageOutput output)
36
+ {
37
+ this.allocator = allocator;
38
+ this.output = output;
39
+ this.schema = schema;
40
+ this.columnOffsets = PageFormat.columnOffsets(schema);
41
+ this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
42
+ this.fixedRecordSize = PageFormat.recordHeaderSize(schema) + PageFormat.totalColumnSize(schema);
43
+ this.nextVariableLengthDataOffset = fixedRecordSize;
44
+ newBuffer();
45
+ }
46
+
47
+ private void newBuffer()
48
+ {
49
+ this.buffer = allocator.allocate(PageFormat.PAGE_HEADER_SIZE + fixedRecordSize);
50
+ this.bufferSlice = Slices.wrappedBuffer(buffer.array(), buffer.offset(), buffer.capacity());
51
+ this.count = 0;
52
+ this.position = PageFormat.PAGE_HEADER_SIZE;
53
+ this.stringReferences.clear();
54
+ this.stringReferenceSize = 0;
55
+ }
56
+
57
+ public Schema getSchema()
58
+ {
59
+ return schema;
60
+ }
61
+
62
+ public void setNull(Column column)
63
+ {
64
+ setNull(column.getIndex());
65
+ }
66
+
67
+ public void setNull(int columnIndex)
68
+ {
69
+ nullBitSet[columnIndex >>> 3] |= (1 << (columnIndex & 7));
70
+ }
71
+
72
+ public void setBoolean(Column column, boolean value)
73
+ {
74
+ // TODO check type?
75
+ setBoolean(column.getIndex(), value);
76
+ }
77
+
78
+ public void setBoolean(int columnIndex, boolean value)
79
+ {
80
+ bufferSlice.setByte(getOffset(columnIndex), value ? (byte) 1 : (byte) 0);
81
+ }
82
+
83
+ public void setLong(Column column, long value)
84
+ {
85
+ // TODO check type?
86
+ setLong(column.getIndex(), value);
87
+ }
88
+
89
+ public void setLong(int columnIndex, long value)
90
+ {
91
+ bufferSlice.setLong(getOffset(columnIndex), value);
92
+ }
93
+
94
+ public void setDouble(Column column, double value)
95
+ {
96
+ // TODO check type?
97
+ setDouble(column.getIndex(), value);
98
+ }
99
+
100
+ public void setDouble(int columnIndex, double value)
101
+ {
102
+ bufferSlice.setDouble(getOffset(columnIndex), value);
103
+ }
104
+
105
+ public void setString(Column column, String value)
106
+ {
107
+ // TODO check type?
108
+ setString(column.getIndex(), value);
109
+ }
110
+
111
+ public void setString(int columnIndex, String value)
112
+ {
113
+ Integer reuseIndex = stringReferences.get(value);
114
+ if (reuseIndex != null) {
115
+ bufferSlice.setInt(getOffset(columnIndex), reuseIndex);
116
+ } else {
117
+ int index = stringReferences.size();
118
+ stringReferences.put(value, index);
119
+ bufferSlice.setInt(getOffset(columnIndex), index);
120
+ stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length
121
+ }
122
+ }
123
+
124
+ public void setTimestamp(Column column, Timestamp value)
125
+ {
126
+ // TODO check type?
127
+ setTimestamp(column.getIndex(), value);
128
+ }
129
+
130
+ public void setTimestamp(int columnIndex, Timestamp value)
131
+ {
132
+ int offset = getOffset(columnIndex);
133
+ bufferSlice.setLong(offset, value.getEpochSecond());
134
+ bufferSlice.setInt(offset + 8, value.getNano());
135
+ }
136
+
137
+ private int getOffset(int columnIndex)
138
+ {
139
+ return position + columnOffsets[columnIndex];
140
+ }
141
+
142
+ private static class StringReferenceSortComparator
143
+ implements Comparator<Map.Entry<String, Integer>>, Serializable
144
+ {
145
+ @Override
146
+ public int compare(Map.Entry<String, Integer> e1, Map.Entry<String, Integer> e2)
147
+ {
148
+ return e1.getValue().compareTo(e2.getValue());
149
+ }
150
+
151
+ @Override
152
+ public boolean equals(Object obj)
153
+ {
154
+ return obj instanceof StringReferenceSortComparator;
155
+ }
156
+ }
157
+
158
+ private List<String> getSortedStringReferences()
159
+ {
160
+ ArrayList<Map.Entry<String, Integer>> s = new ArrayList<>(stringReferences.entrySet());
161
+ Collections.sort(s, new StringReferenceSortComparator());
162
+ String[] array = new String[s.size()];
163
+ for (int i=0; i < array.length; i++) {
164
+ array[i] = s.get(i).getKey();
165
+ }
166
+ return Arrays.asList(array);
167
+ }
168
+
169
+ public void addRecord()
170
+ {
171
+ // record header
172
+ bufferSlice.setInt(position, nextVariableLengthDataOffset); // nextVariableLengthDataOffset means record size
173
+ bufferSlice.setBytes(position + 4, nullBitSet);
174
+ count++;
175
+
176
+ this.position += nextVariableLengthDataOffset;
177
+ this.nextVariableLengthDataOffset = fixedRecordSize;
178
+ Arrays.fill(nullBitSet, (byte) 0);
179
+
180
+ // flush if next record will not fit in this buffer
181
+ if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) {
182
+ flush();
183
+ }
184
+ }
185
+
186
+ private void doFlush()
187
+ {
188
+ if (buffer != null && count > 0) {
189
+ // write page header
190
+ bufferSlice.setInt(0, count);
191
+ buffer.limit(position);
192
+
193
+ // flush page
194
+ Page page = Page.wrap(buffer).setStringReferences(getSortedStringReferences());
195
+ buffer = null;
196
+ bufferSlice = null;
197
+ output.add(page);
198
+ }
199
+ }
200
+
201
+ public void flush()
202
+ {
203
+ doFlush();
204
+ if (buffer == null) {
205
+ newBuffer();
206
+ }
207
+ }
208
+
209
+ public void finish()
210
+ {
211
+ doFlush();
212
+ output.finish();
213
+ }
214
+
215
+ @Override
216
+ public void close()
217
+ {
218
+ if (buffer != null) {
219
+ buffer.release();
220
+ buffer = null;
221
+ bufferSlice = null;
222
+ }
223
+ }
224
+
225
+ /* TODO for variable-length types
226
+ private void flushAndTakeOverRemaingData()
227
+ {
228
+ if (page != null) {
229
+ // page header
230
+ page.setInt(0, count);
231
+
232
+ Page lastPage = page;
233
+
234
+ this.page = allocator.allocatePage(Page.PAGE_HEADER_SIZE + fixedRecordSize + nextVariableLengthDataOffset);
235
+ page.setBytes(Page.PAGE_HEADER_SIZE, lastPage, position, nextVariableLengthDataOffset);
236
+ this.count = 0;
237
+ this.position = Page.PAGE_HEADER_SIZE;
238
+
239
+ output.add(lastPage);
240
+ }
241
+ }
242
+
243
+ public int getVariableLengthDataOffset()
244
+ {
245
+ return nextVariableLengthDataOffset;
246
+ }
247
+
248
+ public VariableLengthDataWriter setVariableLengthData(int columnIndex, int intData)
249
+ {
250
+ // Page.VARIABLE_LENGTH_COLUMN_SIZE is 4 bytes
251
+ page.setInt(position + columnOffsets[columnIndex], intData);
252
+ return new VariableLengthDataWriter(nextVariableLengthDataOffset);
253
+ }
254
+
255
+ Page ensureVariableLengthDataCapacity(int requiredOffsetFromPosition)
256
+ {
257
+ if (page.capacity() < position + requiredOffsetFromPosition) {
258
+ flushAndTakeOverRemaingData();
259
+ }
260
+ return page;
261
+ }
262
+
263
+ public class VariableLengthDataWriter
264
+ {
265
+ private int offsetFromPosition;
266
+
267
+ VariableLengthDataWriter(int offsetFromPosition)
268
+ {
269
+ this.offsetFromPosition = offsetFromPosition;
270
+ }
271
+
272
+ public void writeByte(byte value)
273
+ {
274
+ ensureVariableLengthDataCapacity(offsetFromPosition + 1);
275
+ page.setByte(position + offsetFromPosition, value);
276
+ offsetFromPosition += 1;
277
+ }
278
+
279
+ public void writeShort(short value)
280
+ {
281
+ ensureVariableLengthDataCapacity(offsetFromPosition + 2);
282
+ page.setShort(position + offsetFromPosition, value);
283
+ offsetFromPosition += 2;
284
+ }
285
+
286
+ public void writeInt(int value)
287
+ {
288
+ ensureVariableLengthDataCapacity(offsetFromPosition + 4);
289
+ page.setInt(position + offsetFromPosition, value);
290
+ offsetFromPosition += 4;
291
+ }
292
+
293
+ public void writeLong(long value)
294
+ {
295
+ ensureVariableLengthDataCapacity(offsetFromPosition + 8);
296
+ page.setLong(position + offsetFromPosition, value);
297
+ offsetFromPosition += 8;
298
+ }
299
+
300
+ public void writeFloat(float value)
301
+ {
302
+ ensureVariableLengthDataCapacity(offsetFromPosition + 4);
303
+ page.setFloat(position + offsetFromPosition, value);
304
+ offsetFromPosition += 4;
305
+ }
306
+
307
+ public void writeDouble(double value)
308
+ {
309
+ ensureVariableLengthDataCapacity(offsetFromPosition + 8);
310
+ page.setDouble(position + offsetFromPosition, value);
311
+ offsetFromPosition += 8;
312
+ }
313
+
314
+ public void writeBytes(byte[] data)
315
+ {
316
+ writeBytes(data, 0, data.length);
317
+ }
318
+
319
+ public void writeBytes(byte[] data, int off, int len)
320
+ {
321
+ ensureVariableLengthDataCapacity(offsetFromPosition + len);
322
+ page.setBytes(position + offsetFromPosition, data, off, len);
323
+ offsetFromPosition += len;
324
+ }
325
+ }
326
+ */
327
+ }
@@ -0,0 +1,47 @@
1
+ package org.embulk.spi;
2
+
3
+ abstract class PageFormat
4
+ {
5
+ // PageHeader
6
+ // +---+
7
+ // | 4 |
8
+ // +---+
9
+ // count (number of records)
10
+
11
+ private PageFormat() { }
12
+
13
+ static final int PAGE_HEADER_SIZE = 4;
14
+
15
+ // PageBuilder.setVariableLengthData and PageReader.readVariableLengthData
16
+ // uses 4 bytes integer
17
+ static final int VARIABLE_LENGTH_COLUMN_SIZE = 4;
18
+
19
+ static int nullBitSetSize(Schema schema)
20
+ {
21
+ return (schema.size() + 7) / 8;
22
+ }
23
+
24
+ static int recordHeaderSize(Schema schema)
25
+ {
26
+ return 4 + nullBitSetSize(schema);
27
+ }
28
+
29
+ static int totalColumnSize(Schema schema)
30
+ {
31
+ return recordHeaderSize(schema) + schema.getFixedStorageSize();
32
+ }
33
+
34
+ static int[] columnOffsets(Schema schema)
35
+ {
36
+ int[] offsets = new int[schema.size()];
37
+
38
+ if (!schema.isEmpty()) {
39
+ offsets[0] = recordHeaderSize(schema);
40
+ for (int i=0; i < schema.size()-1; i++) {
41
+ offsets[i+1] = offsets[i] + schema.getColumnType(i).getFixedStorageSize();
42
+ }
43
+ }
44
+
45
+ return offsets;
46
+ }
47
+ }
@@ -0,0 +1,11 @@
1
+ package org.embulk.spi;
2
+
3
+ public interface PageOutput
4
+ extends AutoCloseable
5
+ {
6
+ public void add(Page page);
7
+
8
+ public void finish();
9
+
10
+ public void close();
11
+ }
@@ -0,0 +1,227 @@
1
+ package org.embulk.spi;
2
+
3
+ import java.util.Iterator;
4
+ import io.airlift.slice.Slice;
5
+ import io.airlift.slice.Slices;
6
+ import org.embulk.spi.time.Timestamp;
7
+
8
+ public class PageReader
9
+ implements AutoCloseable
10
+ {
11
+ private final Schema schema;
12
+ private final int[] columnOffsets;
13
+
14
+ private Page page = SENTINEL;
15
+ private Slice pageSlice = null;
16
+ private int pageRecordCount = 0;
17
+
18
+ private int readCount = 0;
19
+ private int position;
20
+ private final byte[] nullBitSet;
21
+
22
+ private static final Page SENTINEL = Page.wrap(Buffer.wrap(new byte[4])); // buffer().release() does nothing
23
+
24
+ public PageReader(Schema schema)
25
+ {
26
+ this.schema = schema;
27
+ this.columnOffsets = PageFormat.columnOffsets(schema);
28
+ this.nullBitSet = new byte[PageFormat.nullBitSetSize(schema)];
29
+ }
30
+
31
+ public static int getRecordCount(Page page)
32
+ {
33
+ Buffer pageBuffer = page.buffer();
34
+ Slice pageSlice = Slices.wrappedBuffer(pageBuffer.array(), pageBuffer.offset(), pageBuffer.limit());
35
+ return pageSlice.getInt(0); // see page format
36
+ }
37
+
38
+ public void setPage(Page page)
39
+ {
40
+ this.page.buffer().release();
41
+ this.page = SENTINEL;
42
+
43
+ Buffer pageBuffer = page.buffer();
44
+ Slice pageSlice = Slices.wrappedBuffer(pageBuffer.array(), pageBuffer.offset(), pageBuffer.limit());
45
+
46
+ pageRecordCount = pageSlice.getInt(0); // see page format
47
+ readCount = 0;
48
+ position = PageFormat.PAGE_HEADER_SIZE;
49
+
50
+ this.page = page;
51
+ this.pageSlice = pageSlice;
52
+ }
53
+
54
+ public Schema getSchema()
55
+ {
56
+ return schema;
57
+ }
58
+
59
+ public boolean isNull(Column column)
60
+ {
61
+ return isNull(column.getIndex());
62
+ }
63
+
64
+ public boolean isNull(int columnIndex)
65
+ {
66
+ return (nullBitSet[columnIndex >>> 3] & (1 << (columnIndex & 7))) != 0;
67
+ }
68
+
69
+ public boolean getBoolean(Column column)
70
+ {
71
+ // TODO check type?
72
+ return getBoolean(column.getIndex());
73
+ }
74
+
75
+ public boolean getBoolean(int columnIndex)
76
+ {
77
+ return pageSlice.getByte(getOffset(columnIndex)) != (byte) 0;
78
+ }
79
+
80
+ public long getLong(Column column)
81
+ {
82
+ // TODO check type?
83
+ return getLong(column.getIndex());
84
+ }
85
+
86
+ public long getLong(int columnIndex)
87
+ {
88
+ return pageSlice.getLong(getOffset(columnIndex));
89
+ }
90
+
91
+ public double getDouble(Column column)
92
+ {
93
+ // TODO check type?
94
+ return getDouble(column.getIndex());
95
+ }
96
+
97
+ public double getDouble(int columnIndex)
98
+ {
99
+ return pageSlice.getDouble(getOffset(columnIndex));
100
+ }
101
+
102
+ public String getString(Column column)
103
+ {
104
+ // TODO check type?
105
+ return getString(column.getIndex());
106
+ }
107
+
108
+ public String getString(int columnIndex)
109
+ {
110
+ int index = pageSlice.getInt(getOffset(columnIndex));
111
+ return page.getStringReference(index);
112
+ }
113
+
114
+ public Timestamp getTimestamp(Column column)
115
+ {
116
+ // TODO check type?
117
+ return getTimestamp(column.getIndex());
118
+ }
119
+
120
+ public Timestamp getTimestamp(int columnIndex)
121
+ {
122
+ int offset = getOffset(columnIndex);
123
+ long sec = pageSlice.getLong(offset);
124
+ int nsec = pageSlice.getInt(offset + 8);
125
+ return Timestamp.ofEpochSecond(sec, nsec);
126
+ }
127
+
128
+ private int getOffset(int columnIndex)
129
+ {
130
+ return position + columnOffsets[columnIndex];
131
+ }
132
+
133
+ public boolean nextRecord()
134
+ {
135
+ if (pageRecordCount <= readCount) {
136
+ return false;
137
+ }
138
+
139
+ if (readCount > 0) {
140
+ // advance position excepting the first record
141
+ int lastRecordSize = pageSlice.getInt(position);
142
+ position += lastRecordSize;
143
+ }
144
+
145
+ readCount++;
146
+ pageSlice.getBytes(position + 4, nullBitSet, 0, nullBitSet.length);
147
+
148
+ return true;
149
+ }
150
+
151
+ @Override
152
+ public void close()
153
+ {
154
+ page.buffer().release();
155
+ page = SENTINEL;
156
+ }
157
+
158
+ /* TODO for variable-length types
159
+ public VariableLengthDataReader getVariableLengthData(int columnIndex, int variableLengthDataOffset)
160
+ {
161
+ return new VariableLengthDataReader(variableLengthDataOffset);
162
+ }
163
+
164
+ public class VariableLengthDataReader
165
+ {
166
+ private int offsetFromPosition;
167
+
168
+ VariableLengthDataReader(int offsetFromPosition)
169
+ {
170
+ this.offsetFromPosition = offsetFromPosition;
171
+ }
172
+
173
+ public byte readByte()
174
+ {
175
+ byte value = page.getByte(position + offsetFromPosition);
176
+ offsetFromPosition += 1;
177
+ return value;
178
+ }
179
+
180
+ public short readShort()
181
+ {
182
+ short value = page.getShort(position + offsetFromPosition);
183
+ offsetFromPosition += 2;
184
+ return value;
185
+ }
186
+
187
+ public int readInt()
188
+ {
189
+ int value = page.getInt(position + offsetFromPosition);
190
+ offsetFromPosition += 4;
191
+ return value;
192
+ }
193
+
194
+ public long readLong()
195
+ {
196
+ long value = page.getLong(position + offsetFromPosition);
197
+ offsetFromPosition += 8;
198
+ return value;
199
+ }
200
+
201
+ public float readFloat()
202
+ {
203
+ float value = page.getFloat(position + offsetFromPosition);
204
+ offsetFromPosition += 4;
205
+ return value;
206
+ }
207
+
208
+ public double readDouble()
209
+ {
210
+ double value = page.getDouble(position + offsetFromPosition);
211
+ offsetFromPosition += 8;
212
+ return value;
213
+ }
214
+
215
+ public void readBytes(byte[] data)
216
+ {
217
+ readBytes(data, 0, data.length);
218
+ }
219
+
220
+ public void readBytes(byte[] data, int off, int len)
221
+ {
222
+ page.getBytes(position + offsetFromPosition, data, off, len);
223
+ offsetFromPosition += len;
224
+ }
225
+ }
226
+ */
227
+ }