embulk-input-filesplit 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +41 -40
  3. data/build.gradle +64 -64
  4. data/classpath/embulk-input-filesplit-0.1.4.jar +0 -0
  5. data/lib/embulk/input/filesplit.rb +3 -3
  6. data/src/main/java/org/embulk/input/filesplit/LocalFileSplitInputPlugin.java +300 -187
  7. data/src/test/java/org/embulk/input/filesplit/EmbulkPluginTester.java +70 -70
  8. data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputPluginTest.java +129 -94
  9. data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java +78 -78
  10. data/src/test/java/org/embulk/input/filesplit/PartialFileInputStreamTest.java +570 -570
  11. data/src/test/resources/data/sub1/test1.csv +1 -0
  12. data/src/test/resources/data/sub1/test2.csv +3 -0
  13. data/src/test/resources/data/sub2/test1.csv +1 -0
  14. data/src/test/resources/data/sub2/test2.csv +3 -0
  15. data/src/test/resources/data/sub2/x.csv +1 -0
  16. data/src/test/resources/data/test-header.csv +5 -5
  17. data/src/test/resources/data/test-semicolon.csv +4 -4
  18. data/src/test/resources/data/test.csv +4 -4
  19. data/src/test/resources/yml/test-error1.yml +22 -0
  20. data/src/test/resources/yml/test-error2.yml +24 -0
  21. data/src/test/resources/yml/test-header.yml +24 -24
  22. data/src/test/resources/yml/test-only-header.yml +24 -24
  23. data/src/test/resources/yml/test-path_prefix-directory.yml +23 -0
  24. data/src/test/resources/yml/test-path_prefix-files.yml +23 -0
  25. data/src/test/resources/yml/test-tasks.yml +23 -23
  26. data/src/test/resources/yml/test.yml +22 -22
  27. metadata +15 -6
  28. data/classpath/embulk-input-filesplit-0.1.3.jar +0 -0
@@ -0,0 +1 @@
1
+ 1,aaaaa,12345
@@ -0,0 +1,3 @@
1
+ 2,bbb,67890
2
+ 3,ccccccccc,-1
3
+ 4,dd,555555
@@ -0,0 +1 @@
1
+ 1,aaaaa,12345
@@ -0,0 +1,3 @@
1
+ 2,bbb,67890
2
+ 3,ccccccccc,-1
3
+ 4,dd,555555
@@ -0,0 +1 @@
1
+ 5,xxx,0
@@ -1,5 +1,5 @@
1
- id,name,value
2
- 1,aaaaa,12345
3
- 2,bbb,67890
4
- 3,ccccccccc,-1
5
- 4,dd,555555
1
+ id,name,value
2
+ 1,aaaaa,12345
3
+ 2,bbb,67890
4
+ 3,ccccccccc,-1
5
+ 4,dd,555555
@@ -1,4 +1,4 @@
1
- 1;aaaaa;12345
2
- 2;bbb;67890
3
- 3;ccccccccc;-1
4
- 4;dd;555555
1
+ 1;aaaaa;12345
2
+ 2;bbb;67890
3
+ 3;ccccccccc;-1
4
+ 4;dd;555555
@@ -1,4 +1,4 @@
1
- 1,aaaaa,12345
2
- 2,bbb,67890
3
- 3,ccccccccc,-1
4
- 4,dd,555555
1
+ 1,aaaaa,12345
2
+ 2,bbb,67890
3
+ 3,ccccccccc,-1
4
+ 4,dd,555555
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: filesplit
3
+ parser:
4
+ charset: UTF-8
5
+ newline: CRLF
6
+ type: csv
7
+ delimiter: ','
8
+ quote: ''
9
+ columns:
10
+ - {name: id, type: long}
11
+ - {name: name, type: string}
12
+ - {name: value, type: long}
13
+ out:
14
+ type: file
15
+ path_prefix: 'temp/result'
16
+ file_ext: .csv
17
+ formatter:
18
+ type: csv
19
+ delimiter: ';'
20
+ header_line: false
21
+ charset: UTF-8
22
+ newline: CRLF
@@ -0,0 +1,24 @@
1
+ in:
2
+ type: filesplit
3
+ path: 'data/test.csv'
4
+ path_prefix: 'data/sub1'
5
+ parser:
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ type: csv
9
+ delimiter: ','
10
+ quote: ''
11
+ columns:
12
+ - {name: id, type: long}
13
+ - {name: name, type: string}
14
+ - {name: value, type: long}
15
+ out:
16
+ type: file
17
+ path_prefix: 'temp/result'
18
+ file_ext: .csv
19
+ formatter:
20
+ type: csv
21
+ delimiter: ';'
22
+ header_line: false
23
+ charset: UTF-8
24
+ newline: CRLF
@@ -1,25 +1,25 @@
1
- in:
2
- type: filesplit
3
- path: 'data/test-header.csv'
4
- header_line: true
5
- parser:
6
- charset: UTF-8
7
- newline: CRLF
8
- type: csv
9
- header_line: true
10
- delimiter: ','
11
- quote: ''
12
- columns:
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: value, type: long}
16
- out:
17
- type: file
18
- path_prefix: 'temp/result'
19
- file_ext: .csv
20
- formatter:
21
- type: csv
22
- delimiter: ';'
23
- header_line: false
24
- charset: UTF-8
1
+ in:
2
+ type: filesplit
3
+ path: 'data/test-header.csv'
4
+ header_line: true
5
+ parser:
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ type: csv
9
+ header_line: true
10
+ delimiter: ','
11
+ quote: ''
12
+ columns:
13
+ - {name: id, type: long}
14
+ - {name: name, type: string}
15
+ - {name: value, type: long}
16
+ out:
17
+ type: file
18
+ path_prefix: 'temp/result'
19
+ file_ext: .csv
20
+ formatter:
21
+ type: csv
22
+ delimiter: ';'
23
+ header_line: false
24
+ charset: UTF-8
25
25
  newline: CRLF
@@ -1,25 +1,25 @@
1
- in:
2
- type: filesplit
3
- path: 'data/test-only-header.csv'
4
- header_line: true
5
- parser:
6
- charset: UTF-8
7
- newline: CRLF
8
- type: csv
9
- header_line: true
10
- delimiter: ','
11
- quote: ''
12
- columns:
13
- - {name: id, type: long}
14
- - {name: name, type: string}
15
- - {name: value, type: long}
16
- out:
17
- type: file
18
- path_prefix: 'temp/result'
19
- file_ext: .csv
20
- formatter:
21
- type: csv
22
- delimiter: ';'
23
- header_line: false
24
- charset: UTF-8
1
+ in:
2
+ type: filesplit
3
+ path: 'data/test-only-header.csv'
4
+ header_line: true
5
+ parser:
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ type: csv
9
+ header_line: true
10
+ delimiter: ','
11
+ quote: ''
12
+ columns:
13
+ - {name: id, type: long}
14
+ - {name: name, type: string}
15
+ - {name: value, type: long}
16
+ out:
17
+ type: file
18
+ path_prefix: 'temp/result'
19
+ file_ext: .csv
20
+ formatter:
21
+ type: csv
22
+ delimiter: ';'
23
+ header_line: false
24
+ charset: UTF-8
25
25
  newline: CRLF
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: filesplit
3
+ path_prefix: 'data/sub1'
4
+ parser:
5
+ charset: UTF-8
6
+ newline: CRLF
7
+ type: csv
8
+ delimiter: ','
9
+ quote: ''
10
+ columns:
11
+ - {name: id, type: long}
12
+ - {name: name, type: string}
13
+ - {name: value, type: long}
14
+ out:
15
+ type: file
16
+ path_prefix: 'temp/result'
17
+ file_ext: .csv
18
+ formatter:
19
+ type: csv
20
+ delimiter: ','
21
+ header_line: false
22
+ charset: UTF-8
23
+ newline: CRLF
@@ -0,0 +1,23 @@
1
+ in:
2
+ type: filesplit
3
+ path_prefix: 'data/sub2/test'
4
+ parser:
5
+ charset: UTF-8
6
+ newline: CRLF
7
+ type: csv
8
+ delimiter: ','
9
+ quote: ''
10
+ columns:
11
+ - {name: id, type: long}
12
+ - {name: name, type: string}
13
+ - {name: value, type: long}
14
+ out:
15
+ type: file
16
+ path_prefix: 'temp/result'
17
+ file_ext: .csv
18
+ formatter:
19
+ type: csv
20
+ delimiter: ','
21
+ header_line: false
22
+ charset: UTF-8
23
+ newline: CRLF
@@ -1,24 +1,24 @@
1
- in:
2
- type: filesplit
3
- path: 'data/test.csv'
4
- tasks: 3
5
- parser:
6
- charset: UTF-8
7
- newline: CRLF
8
- type: csv
9
- delimiter: ','
10
- quote: ''
11
- columns:
12
- - {name: id, type: long}
13
- - {name: name, type: string}
14
- - {name: value, type: long}
15
- out:
16
- type: file
17
- path_prefix: 'temp/result'
18
- file_ext: .csv
19
- formatter:
20
- type: csv
21
- delimiter: ';'
22
- header_line: false
23
- charset: UTF-8
1
+ in:
2
+ type: filesplit
3
+ path: 'data/test.csv'
4
+ tasks: 3
5
+ parser:
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ type: csv
9
+ delimiter: ','
10
+ quote: ''
11
+ columns:
12
+ - {name: id, type: long}
13
+ - {name: name, type: string}
14
+ - {name: value, type: long}
15
+ out:
16
+ type: file
17
+ path_prefix: 'temp/result'
18
+ file_ext: .csv
19
+ formatter:
20
+ type: csv
21
+ delimiter: ';'
22
+ header_line: false
23
+ charset: UTF-8
24
24
  newline: CRLF
@@ -1,23 +1,23 @@
1
- in:
2
- type: filesplit
3
- path: 'data/test.csv'
4
- parser:
5
- charset: UTF-8
6
- newline: CRLF
7
- type: csv
8
- delimiter: ','
9
- quote: ''
10
- columns:
11
- - {name: id, type: long}
12
- - {name: name, type: string}
13
- - {name: value, type: long}
14
- out:
15
- type: file
16
- path_prefix: 'temp/result'
17
- file_ext: .csv
18
- formatter:
19
- type: csv
20
- delimiter: ';'
21
- header_line: false
22
- charset: UTF-8
1
+ in:
2
+ type: filesplit
3
+ path: 'data/test.csv'
4
+ parser:
5
+ charset: UTF-8
6
+ newline: CRLF
7
+ type: csv
8
+ delimiter: ','
9
+ quote: ''
10
+ columns:
11
+ - {name: id, type: long}
12
+ - {name: name, type: string}
13
+ - {name: value, type: long}
14
+ out:
15
+ type: file
16
+ path_prefix: 'temp/result'
17
+ file_ext: .csv
18
+ formatter:
19
+ type: csv
20
+ delimiter: ';'
21
+ header_line: false
22
+ charset: UTF-8
23
23
  newline: CRLF
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-filesplit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hitoshi Tanaka
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-31 00:00:00.000000000 Z
11
+ date: 2017-09-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -18,6 +18,7 @@ extra_rdoc_files: []
18
18
  files:
19
19
  - README.md
20
20
  - build.gradle
21
+ - classpath/embulk-input-filesplit-0.1.4.jar
21
22
  - lib/embulk/input/filesplit.rb
22
23
  - src/main/java/org/embulk/input/filesplit/LocalFileSplitInputPlugin.java
23
24
  - src/main/java/org/embulk/input/filesplit/PartialFile.java
@@ -27,16 +28,24 @@ files:
27
28
  - src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java
28
29
  - src/test/java/org/embulk/input/filesplit/PartialFileInputStreamTest.java
29
30
  - src/test/resources/data/empty.csv
31
+ - src/test/resources/data/sub1/test1.csv
32
+ - src/test/resources/data/sub1/test2.csv
33
+ - src/test/resources/data/sub2/test1.csv
34
+ - src/test/resources/data/sub2/test2.csv
35
+ - src/test/resources/data/sub2/x.csv
30
36
  - src/test/resources/data/test-header.csv
31
37
  - src/test/resources/data/test-only-header.csv
32
38
  - src/test/resources/data/test-semicolon.csv
33
39
  - src/test/resources/data/test.csv
34
40
  - src/test/resources/resource.txt
41
+ - src/test/resources/yml/test-error1.yml
42
+ - src/test/resources/yml/test-error2.yml
35
43
  - src/test/resources/yml/test-header.yml
36
44
  - src/test/resources/yml/test-only-header.yml
45
+ - src/test/resources/yml/test-path_prefix-directory.yml
46
+ - src/test/resources/yml/test-path_prefix-files.yml
37
47
  - src/test/resources/yml/test-tasks.yml
38
48
  - src/test/resources/yml/test.yml
39
- - classpath/embulk-input-filesplit-0.1.3.jar
40
49
  homepage: https://github.com/hito4t/embulk-input-filesplit
41
50
  licenses:
42
51
  - Apache 2.0
@@ -47,17 +56,17 @@ require_paths:
47
56
  - lib
48
57
  required_ruby_version: !ruby/object:Gem::Requirement
49
58
  requirements:
50
- - - '>='
59
+ - - ">="
51
60
  - !ruby/object:Gem::Version
52
61
  version: '0'
53
62
  required_rubygems_version: !ruby/object:Gem::Requirement
54
63
  requirements:
55
- - - '>='
64
+ - - ">="
56
65
  - !ruby/object:Gem::Version
57
66
  version: '0'
58
67
  requirements: []
59
68
  rubyforge_project:
60
- rubygems_version: 2.1.9
69
+ rubygems_version: 2.4.8
61
70
  signing_key:
62
71
  specification_version: 4
63
72
  summary: Embulk plugin for splitting input file