embulk-parser-variable_length_bytes 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -14
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/parser/variable_length_bytes/VariableLengthBytesParserPlugin.java +11 -25
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e989400587d6450b34c2805d5747ded0a97c036
|
4
|
+
data.tar.gz: 85d3ac18255d16726f074d7051ed8ccae672dbe9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af303c131a2984b44126cbdd9876e8b61ec46933ae83e1124aa0c8d5955e5f01d8e7c711a183f45c6b1b8aa8f60f7d52e31f6694986b92ea81319960a07472e1
|
7
|
+
data.tar.gz: 6dc42d0699a64b213a65393f28fec47d27335395021f44cd6c63f8d04b3a7d4a9631ff5419e0df8fc2d4dad39154d04cdf53597deef2596928319bf0b774bfd7
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Variable Length Bytes parser plugin for Embulk
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/embulk-parser-variable_length_bytes)
|
4
|
+
|
3
5
|
Variable length bytes record parser plugin for Embulk.
|
4
6
|
|
5
7
|
## Overview
|
@@ -7,6 +9,11 @@ Variable length bytes record parser plugin for Embulk.
|
|
7
9
|
* **Plugin type**: parser
|
8
10
|
* **Guess supported**: no
|
9
11
|
|
12
|
+
## Install
|
13
|
+
|
14
|
+
```shell script
|
15
|
+
$ embulk gem install embulk-parser-variable_length_bytes
|
16
|
+
```
|
10
17
|
## Configuration
|
11
18
|
|
12
19
|
- **columns**: Specify column name and type, range of bytes('\<start\>..\<end\>'). If the last column has a variable length, specify it as '\<start\>...'. (array, required)
|
@@ -32,20 +39,9 @@ in:
|
|
32
39
|
- {name: description, type: string, pos: '20...'}
|
33
40
|
```
|
34
41
|
|
42
|
+
## Build
|
35
43
|
|
36
|
-
|
37
|
-
|
38
|
-
Currently the status of this plugin is work in progress.
|
39
|
-
|
40
|
-
To install manually.
|
41
|
-
|
42
|
-
```
|
43
|
-
$ git clone https://github.com/koji-m/embulk-parser-variable_length_bytes.git
|
44
|
-
$ cd embulk-parser-variable_length_bytes
|
45
|
-
$ ./gradlew package
|
46
|
-
|
47
|
-
# prepare config.yml file
|
48
|
-
|
49
|
-
$ embulk run -L path/to/embulk-parser-variable_length_bytes config.yml
|
44
|
+
```shell script
|
45
|
+
$ ./gradlew gem
|
50
46
|
```
|
51
47
|
|
data/build.gradle
CHANGED
data/src/main/java/org/embulk/parser/variable_length_bytes/VariableLengthBytesParserPlugin.java
CHANGED
@@ -4,6 +4,7 @@ import com.google.common.base.Optional;
|
|
4
4
|
import com.google.common.collect.ImmutableSet;
|
5
5
|
import org.embulk.config.Config;
|
6
6
|
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.config.ConfigException;
|
7
8
|
import org.embulk.config.ConfigSource;
|
8
9
|
import org.embulk.config.Task;
|
9
10
|
import org.embulk.config.TaskSource;
|
@@ -167,8 +168,7 @@ public class VariableLengthBytesParserPlugin
|
|
167
168
|
}
|
168
169
|
}
|
169
170
|
catch (UnsupportedEncodingException e) {
|
170
|
-
|
171
|
-
System.err.println("UnsupportedEncoding");
|
171
|
+
throw new DataException("Could not decode with specified charset");
|
172
172
|
}
|
173
173
|
}
|
174
174
|
pageBuilder.addRecord();
|
@@ -202,8 +202,7 @@ public class VariableLengthBytesParserPlugin
|
|
202
202
|
}
|
203
203
|
}
|
204
204
|
catch (UnsupportedEncodingException e) {
|
205
|
-
|
206
|
-
System.err.println("UnsupportedEncoding");
|
205
|
+
throw new DataException("Could not decode with specified charset");
|
207
206
|
}
|
208
207
|
}
|
209
208
|
Column varLenColumn = columns.get(numColumns - 1);
|
@@ -268,8 +267,7 @@ public class VariableLengthBytesParserPlugin
|
|
268
267
|
}
|
269
268
|
}
|
270
269
|
catch (UnsupportedEncodingException e) {
|
271
|
-
|
272
|
-
System.err.println("UnsupportedEncoding");
|
270
|
+
throw new DataException("Could not decode with specified charset");
|
273
271
|
}
|
274
272
|
pageBuilder.addRecord();
|
275
273
|
}
|
@@ -313,6 +311,9 @@ public class VariableLengthBytesParserPlugin
|
|
313
311
|
while (true) {
|
314
312
|
int len = is.read(buf, totalLen, bufSize - totalLen);
|
315
313
|
if (len < 0) {
|
314
|
+
if (totalLen > 0) {
|
315
|
+
throw new DataException("File ended with insufficient record length");
|
316
|
+
}
|
316
317
|
break;
|
317
318
|
}
|
318
319
|
totalLen += len;
|
@@ -330,6 +331,9 @@ public class VariableLengthBytesParserPlugin
|
|
330
331
|
while (true) {
|
331
332
|
int len = is.read(buf, totalLen, bufSize - totalLen);
|
332
333
|
if (len < 0) {
|
334
|
+
if (totalLen > 0) {
|
335
|
+
throw new DataException("File ended with insufficient record length");
|
336
|
+
}
|
333
337
|
break;
|
334
338
|
}
|
335
339
|
totalLen += len;
|
@@ -339,7 +343,7 @@ public class VariableLengthBytesParserPlugin
|
|
339
343
|
if (invalidRecordSeparator(buf, recordSeparator,
|
340
344
|
recordSize, recordSeparatorSize)) {
|
341
345
|
if (stopOnInvalidRecord) {
|
342
|
-
throw new
|
346
|
+
throw new DataException(String.format("Invalid record separator(%s): %d", fileName));
|
343
347
|
}
|
344
348
|
logger.warn(String.format("Skipped record: file: %s, record-separator:%d,%d:%s", fileName, buf[bufSize - 2], buf[bufSize - 1],
|
345
349
|
new String(buf, java.nio.charset.Charset.forName("Shift_JIS"))));
|
@@ -352,24 +356,6 @@ public class VariableLengthBytesParserPlugin
|
|
352
356
|
}
|
353
357
|
pageBuilder.finish();
|
354
358
|
}
|
355
|
-
} catch (Exception e) {
|
356
|
-
// ToDo
|
357
|
-
}
|
358
|
-
}
|
359
|
-
|
360
|
-
static class RecordException extends DataException
|
361
|
-
{
|
362
|
-
RecordException(String cause)
|
363
|
-
{
|
364
|
-
super(cause);
|
365
|
-
}
|
366
|
-
}
|
367
|
-
|
368
|
-
static class ConfigException extends DataException
|
369
|
-
{
|
370
|
-
ConfigException(String cause)
|
371
|
-
{
|
372
|
-
super(cause);
|
373
359
|
}
|
374
360
|
}
|
375
361
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-variable_length_bytes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Koji Matsumoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -49,7 +49,7 @@ files:
|
|
49
49
|
- LICENSE.txt
|
50
50
|
- README.md
|
51
51
|
- build.gradle
|
52
|
-
- classpath/embulk-parser-variable_length_bytes-0.1.
|
52
|
+
- classpath/embulk-parser-variable_length_bytes-0.1.1.jar
|
53
53
|
- config/checkstyle/checkstyle.xml
|
54
54
|
- config/checkstyle/default.xml
|
55
55
|
- gradle/wrapper/gradle-wrapper.jar
|