embulk-input-hdfs 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -1
  3. data/.travis.yml +9 -0
  4. data/README.md +6 -3
  5. data/build.gradle +33 -6
  6. data/classpath/commons-collections-3.2.2.jar +0 -0
  7. data/classpath/embulk-input-hdfs-0.1.9.jar +0 -0
  8. data/classpath/{hadoop-annotations-2.6.0.jar → hadoop-annotations-2.6.3.jar} +0 -0
  9. data/classpath/hadoop-auth-2.6.3.jar +0 -0
  10. data/classpath/hadoop-client-2.6.3.jar +0 -0
  11. data/classpath/{hadoop-common-2.6.0.jar → hadoop-common-2.6.3.jar} +0 -0
  12. data/classpath/{hadoop-hdfs-2.6.0.jar → hadoop-hdfs-2.6.3.jar} +0 -0
  13. data/classpath/hadoop-mapreduce-client-app-2.6.3.jar +0 -0
  14. data/classpath/{hadoop-mapreduce-client-common-2.6.0.jar → hadoop-mapreduce-client-common-2.6.3.jar} +0 -0
  15. data/classpath/{hadoop-mapreduce-client-core-2.6.0.jar → hadoop-mapreduce-client-core-2.6.3.jar} +0 -0
  16. data/classpath/hadoop-mapreduce-client-jobclient-2.6.3.jar +0 -0
  17. data/classpath/hadoop-mapreduce-client-shuffle-2.6.3.jar +0 -0
  18. data/classpath/hadoop-yarn-api-2.6.3.jar +0 -0
  19. data/classpath/hadoop-yarn-client-2.6.3.jar +0 -0
  20. data/classpath/{hadoop-yarn-common-2.6.0.jar → hadoop-yarn-common-2.6.3.jar} +0 -0
  21. data/classpath/hadoop-yarn-server-common-2.6.3.jar +0 -0
  22. data/classpath/hadoop-yarn-server-nodemanager-2.6.3.jar +0 -0
  23. data/config/checkstyle/checkstyle.xml +128 -0
  24. data/config/checkstyle/default.xml +108 -0
  25. data/example/config.yml +35 -0
  26. data/example/data.csv +5 -0
  27. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  28. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  29. data/src/main/java/org/embulk/input/hdfs/HdfsFileInputPlugin.java +91 -13
  30. data/src/main/java/org/embulk/input/hdfs/HdfsFilePartitioner.java +2 -1
  31. data/src/main/java/org/embulk/input/hdfs/HdfsPartialFile.java +3 -4
  32. data/src/main/java/org/embulk/input/hdfs/HdfsPartialFileInputStream.java +20 -11
  33. data/src/test/java/org/embulk/input/hdfs/TestHdfsFileInputPlugin.java +227 -0
  34. data/src/test/resources/sample_01.csv +5 -0
  35. data/src/test/resources/sample_02.csv +5 -0
  36. metadata +27 -20
  37. data/classpath/commons-collections-3.2.1.jar +0 -0
  38. data/classpath/embulk-input-hdfs-0.1.8.jar +0 -0
  39. data/classpath/hadoop-auth-2.6.0.jar +0 -0
  40. data/classpath/hadoop-client-2.6.0.jar +0 -0
  41. data/classpath/hadoop-mapreduce-client-app-2.6.0.jar +0 -0
  42. data/classpath/hadoop-mapreduce-client-jobclient-2.6.0.jar +0 -0
  43. data/classpath/hadoop-mapreduce-client-shuffle-2.6.0.jar +0 -0
  44. data/classpath/hadoop-yarn-api-2.6.0.jar +0 -0
  45. data/classpath/hadoop-yarn-client-2.6.0.jar +0 -0
  46. data/classpath/hadoop-yarn-server-common-2.6.0.jar +0 -0
  47. data/classpath/hadoop-yarn-server-nodemanager-2.6.0.jar +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c428003a976a3148f1b59e7dd54c7ec870ed3bce
4
- data.tar.gz: a2941dce02f97452b54938bf73b5d1dceba5f4c5
3
+ metadata.gz: ce120e7049f33e30dd23af9f8b7bcedc1a246457
4
+ data.tar.gz: a2dc70fee60be2ab535df3549e99304e751a7b7a
5
5
  SHA512:
6
- metadata.gz: 02ca4fc8c3c82571296eb8da1d3a61533d0017d49c2712886e0872fbebf58ae8656412c61e3b6f1d6aaf71078ad1e886029a91907df92e236b2c56d3cbbb6083
7
- data.tar.gz: f218db27f48822f33427ac0ebcccf5c58c41cdec92486cf9ceb1ddb240f5332744b8d65daf6a672e18488aa675c597ef56d34c994b7c04fe218a5e9c3428c91d
6
+ metadata.gz: a37baf6f948dff41f694457dc9ea9ea9270e41473642114d4dc7a569c61550471b9dbc440478c638fe56ba79956f043097e2129302d3ae12511bdc9d33cef994
7
+ data.tar.gz: 16922c84dcdb9715cb1b0377886b36192acdda31a037352e18df83895f33b09a9f275cd02b9662f02ee411725a6dae65950cfc256c707f639312810839018037
data/.gitignore CHANGED
@@ -6,6 +6,9 @@
6
6
  /classpath/
7
7
  build/
8
8
  .idea
9
+ /.settings/
10
+ /.metadata/
11
+ .classpath
12
+ .project
9
13
  *.iml
10
14
  .ruby-version
11
-
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: java
2
+ jdk:
3
+ - openjdk7
4
+ - oraclejdk7
5
+ - oraclejdk8
6
+ script:
7
+ - ./gradlew test
8
+ after_success:
9
+ - ./gradlew jacocoTestReport coveralls
data/README.md CHANGED
@@ -1,4 +1,6 @@
1
1
  # Hdfs file input plugin for Embulk
2
+ [![Build Status](https://travis-ci.org/civitaspo/embulk-input-hdfs.svg)](https://travis-ci.org/civitaspo/embulk-input-hdfs)
3
+ [![Coverage Status](https://coveralls.io/repos/civitaspo/embulk-input-hdfs/badge.svg?branch=master&service=github)](https://coveralls.io/github/civitaspo/embulk-input-hdfs?branch=master)
2
4
 
3
5
  Read files on Hdfs.
4
6
 
@@ -16,6 +18,7 @@ Read files on Hdfs.
16
18
  - **rewind_seconds** When you use Date format in input_path property, the format is executed by using the time which is Now minus this property.
17
19
  - **partition** when this is true, partition input files and increase task count. (default: `true`)
18
20
  - **num_partitions** number of partitions. (default: `Runtime.getRuntime().availableProcessors()`)
21
+ - **skip_header_lines** Skip this number of lines first. Set 1 if the file has header line. (default: `0`)
19
22
 
20
23
  ## Example
21
24
 
@@ -23,8 +26,8 @@ Read files on Hdfs.
23
26
  in:
24
27
  type: hdfs
25
28
  config_files:
26
- - /opt/analytics/etc/hadoop/conf/core-site.xml
27
- - /opt/analytics/etc/hadoop/conf/hdfs-site.xml
29
+ - /etc/hadoop/conf/core-site.xml
30
+ - /etc/hadoop/conf/hdfs-site.xml
28
31
  config:
29
32
  fs.defaultFS: 'hdfs://hadoop-nn1:8020'
30
33
  dfs.replication: 1
@@ -106,4 +109,4 @@ $ ./gradlew gem
106
109
  ```
107
110
  $ ./gradlew classpath
108
111
  $ bundle exec embulk run -I lib example.yml
109
- ```
112
+ ```
data/build.gradle CHANGED
@@ -2,6 +2,9 @@ plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
4
  id "java"
5
+ id "checkstyle"
6
+ id "com.github.kt3k.coveralls" version "2.4.0"
7
+ id "jacoco"
5
8
  }
6
9
  import com.github.jrubygradle.JRubyExec
7
10
  repositories {
@@ -12,18 +15,19 @@ configurations {
12
15
  provided
13
16
  }
14
17
 
15
- version = "0.1.8"
18
+ version = "0.1.9"
16
19
 
17
20
  sourceCompatibility = 1.7
18
21
  targetCompatibility = 1.7
19
22
 
20
23
  dependencies {
21
- compile "org.embulk:embulk-core:0.7.0"
22
- provided "org.embulk:embulk-core:0.7.0"
24
+ compile "org.embulk:embulk-core:0.8.+"
25
+ provided "org.embulk:embulk-core:0.8.+"
23
26
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
24
- compile 'org.apache.hadoop:hadoop-client:2.6.0'
25
- compile 'com.google.guava:guava:15.0'
27
+ compile 'org.apache.hadoop:hadoop-client:2.6.+'
26
28
  testCompile "junit:junit:4.+"
29
+ testCompile "org.embulk:embulk-core:0.8.+:tests"
30
+ testCompile "org.embulk:embulk-standards:0.8.+"
27
31
  }
28
32
 
29
33
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -33,6 +37,29 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
33
37
  }
34
38
  clean { delete "classpath" }
35
39
 
40
+ jacocoTestReport {
41
+ reports {
42
+ xml.enabled = true // coveralls plugin depends on xml format report
43
+ html.enabled = true
44
+ }
45
+ }
46
+ checkstyle {
47
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
48
+ toolVersion = '6.14.1'
49
+ }
50
+ checkstyleMain {
51
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
52
+ ignoreFailures = true
53
+ }
54
+ checkstyleTest {
55
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
56
+ ignoreFailures = true
57
+ }
58
+ task checkstyle(type: Checkstyle) {
59
+ classpath = sourceSets.main.output + sourceSets.test.output
60
+ source = sourceSets.main.allJava + sourceSets.test.allJava
61
+ }
62
+
36
63
  task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
37
64
  jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
38
65
  script "${project.name}.gemspec"
@@ -57,7 +84,7 @@ task gemspec {
57
84
  Gem::Specification.new do |spec|
58
85
  spec.name = "${project.name}"
59
86
  spec.version = "${project.version}"
60
- spec.authors = ["takahiro.nakayama"]
87
+ spec.authors = ["Civitaspo"]
61
88
  spec.summary = %[Hdfs file input plugin for Embulk]
62
89
  spec.description = %[Reads files stored on Hdfs.]
63
90
  spec.email = ["civitaspo@gmail.com"]
Binary file
Binary file
Binary file
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -0,0 +1,35 @@
1
+ hdfs_example: &hdfs_example
2
+ config_files:
3
+ - /etc/hadoop/conf/core-site.xml
4
+ - /etc/hadoop/conf/hdfs-site.xml
5
+ config:
6
+ fs.defaultFS: 'hdfs://hadoop-nn1:8020'
7
+ fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
8
+ fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
9
+
10
+ local_fs_example: &local_fs_example
11
+ config:
12
+ fs.defaultFS: 'file:///'
13
+ fs.hdfs.impl: 'org.apache.hadoop.fs.LocalFileSystem'
14
+ fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
15
+
16
+ in:
17
+ type: hdfs
18
+ <<: *local_fs_example
19
+ path: example/data.csv
20
+ parser:
21
+ charset: UTF-8
22
+ newline: CRLF
23
+ type: csv
24
+ delimiter: ','
25
+ quote: '"'
26
+ header_line: true
27
+ columns:
28
+ - {name: id, type: long}
29
+ - {name: account, type: long}
30
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
31
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
32
+ - {name: comment, type: string}
33
+
34
+ out:
35
+ type: stdout
data/example/data.csv ADDED
@@ -0,0 +1,5 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,embulk
3
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby
4
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
5
+ 4,11270,2015-01-29 11:54:36,20150129,NULL
Binary file
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Wed Jan 13 12:41:02 JST 2016
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
@@ -24,18 +24,26 @@ import org.jruby.embed.ScriptingContainer;
24
24
  import org.slf4j.Logger;
25
25
 
26
26
  import javax.annotation.Nullable;
27
+
28
+ import java.io.BufferedInputStream;
29
+ import java.io.ByteArrayInputStream;
30
+ import java.io.ByteArrayOutputStream;
27
31
  import java.io.File;
28
32
  import java.io.IOException;
29
33
  import java.io.InputStream;
34
+ import java.io.SequenceInputStream;
30
35
  import java.util.ArrayList;
31
36
  import java.util.List;
32
37
  import java.util.Map;
33
38
 
34
- public class HdfsFileInputPlugin implements FileInputPlugin
39
+ public class HdfsFileInputPlugin
40
+ implements FileInputPlugin
35
41
  {
36
42
  private static final Logger logger = Exec.getLogger(HdfsFileInputPlugin.class);
43
+ private static FileSystem fs;
37
44
 
38
- public interface PluginTask extends Task
45
+ public interface PluginTask
46
+ extends Task
39
47
  {
40
48
  @Config("config_files")
41
49
  @ConfigDefault("[]")
@@ -60,7 +68,12 @@ public class HdfsFileInputPlugin implements FileInputPlugin
60
68
  @ConfigDefault("-1") // Default: Runtime.getRuntime().availableProcessors()
61
69
  public long getApproximateNumPartitions();
62
70
 
71
+ @Config("skip_header_lines") // Skip this number of lines first. Set 1 if the file has header line.
72
+ @ConfigDefault("0") // The reason why the parameter is configured is that this plugin splits files.
73
+ public int getSkipHeaderLines();
74
+
63
75
  public List<HdfsPartialFile> getFiles();
76
+
64
77
  public void setFiles(List<HdfsPartialFile> hdfsFiles);
65
78
 
66
79
  @ConfigInject
@@ -81,8 +94,8 @@ public class HdfsFileInputPlugin implements FileInputPlugin
81
94
  throw new PathNotFoundException(pathString);
82
95
  }
83
96
 
97
+ logger.debug("embulk-input-hdfs: Loading target files: {}", originalFileList);
84
98
  task.setFiles(allocateHdfsFilesToTasks(task, getFs(task), originalFileList));
85
- logger.info("embulk-input-hdfs: Loading target files: {}", originalFileList);
86
99
  }
87
100
  catch (IOException e) {
88
101
  logger.error(e.getMessage());
@@ -104,8 +117,8 @@ public class HdfsFileInputPlugin implements FileInputPlugin
104
117
 
105
118
  @Override
106
119
  public ConfigDiff resume(TaskSource taskSource,
107
- int taskCount,
108
- FileInputPlugin.Control control)
120
+ int taskCount,
121
+ FileInputPlugin.Control control)
109
122
  {
110
123
  control.run(taskSource, taskCount);
111
124
 
@@ -127,8 +140,8 @@ public class HdfsFileInputPlugin implements FileInputPlugin
127
140
 
128
141
  @Override
129
142
  public void cleanup(TaskSource taskSource,
130
- int taskCount,
131
- List<TaskReport> successTaskReports)
143
+ int taskCount,
144
+ List<TaskReport> successTaskReports)
132
145
  {
133
146
  }
134
147
 
@@ -138,15 +151,22 @@ public class HdfsFileInputPlugin implements FileInputPlugin
138
151
  final PluginTask task = taskSource.loadTask(PluginTask.class);
139
152
 
140
153
  InputStream input;
154
+ final HdfsPartialFile file = task.getFiles().get(taskIndex);
141
155
  try {
142
- input = openInputStream(task, task.getFiles().get(taskIndex));
156
+ if (file.getStart() > 0 && task.getSkipHeaderLines() > 0) {
157
+ input = new SequenceInputStream(getHeadersInputStream(task, file), openInputStream(task, file));
158
+ }
159
+ else {
160
+ input = openInputStream(task, file);
161
+ }
143
162
  }
144
163
  catch (IOException e) {
145
164
  logger.error(e.getMessage());
146
165
  throw new RuntimeException(e);
147
166
  }
148
167
 
149
- return new InputStreamTransactionalFileInput(task.getBufferAllocator(), input) {
168
+ return new InputStreamTransactionalFileInput(task.getBufferAllocator(), input)
169
+ {
150
170
  @Override
151
171
  public void abort()
152
172
  { }
@@ -159,6 +179,42 @@ public class HdfsFileInputPlugin implements FileInputPlugin
159
179
  };
160
180
  }
161
181
 
182
+ private InputStream getHeadersInputStream(PluginTask task, HdfsPartialFile partialFile)
183
+ throws IOException
184
+ {
185
+ FileSystem fs = getFs(task);
186
+ ByteArrayOutputStream header = new ByteArrayOutputStream();
187
+ int skippedHeaders = 0;
188
+
189
+ try (BufferedInputStream in = new BufferedInputStream(fs.open(new Path(partialFile.getPath())))) {
190
+ while (true) {
191
+ int c = in.read();
192
+ if (c < 0) {
193
+ break;
194
+ }
195
+
196
+ header.write(c);
197
+
198
+ if (c == '\n') {
199
+ skippedHeaders++;
200
+ }
201
+ else if (c == '\r') {
202
+ int c2 = in.read();
203
+ if (c2 == '\n') {
204
+ header.write(c2);
205
+ }
206
+ skippedHeaders++;
207
+ }
208
+
209
+ if (skippedHeaders >= task.getSkipHeaderLines()) {
210
+ break;
211
+ }
212
+ }
213
+ }
214
+ header.close();
215
+ return new ByteArrayInputStream(header.toByteArray());
216
+ }
217
+
162
218
  private static HdfsPartialFileInputStream openInputStream(PluginTask task, HdfsPartialFile partialFile)
163
219
  throws IOException
164
220
  {
@@ -168,6 +224,18 @@ public class HdfsFileInputPlugin implements FileInputPlugin
168
224
  }
169
225
 
170
226
  private static FileSystem getFs(final PluginTask task)
227
+ throws IOException
228
+ {
229
+ if (fs == null) {
230
+ setFs(task);
231
+ return fs;
232
+ }
233
+ else {
234
+ return fs;
235
+ }
236
+ }
237
+
238
+ private static FileSystem setFs(final PluginTask task)
171
239
  throws IOException
172
240
  {
173
241
  Configuration configuration = new Configuration();
@@ -177,18 +245,25 @@ public class HdfsFileInputPlugin implements FileInputPlugin
177
245
  configuration.addResource(file.toURI().toURL());
178
246
  }
179
247
 
180
- for (Map.Entry<String, String> entry: task.getConfig().entrySet()) {
248
+ for (Map.Entry<String, String> entry : task.getConfig().entrySet()) {
181
249
  configuration.set(entry.getKey(), entry.getValue());
182
250
  }
183
251
 
184
- return FileSystem.get(configuration);
252
+ // For debug
253
+ for (Map.Entry<String, String> entry : configuration) {
254
+ logger.trace("{}: {}", entry.getKey(), entry.getValue());
255
+ }
256
+ logger.debug("Resource Files: {}", configuration);
257
+
258
+ fs = FileSystem.get(configuration);
259
+ return fs;
185
260
  }
186
261
 
187
- private String strftime(final String raw, final int rewind_seconds)
262
+ private String strftime(final String raw, final int rewindSeconds)
188
263
  {
189
264
  ScriptingContainer jruby = new ScriptingContainer();
190
265
  Object resolved = jruby.runScriptlet(
191
- String.format("(Time.now - %s).strftime('%s')", String.valueOf(rewind_seconds), raw));
266
+ String.format("(Time.now - %s).strftime('%s')", String.valueOf(rewindSeconds), raw));
192
267
  return resolved.toString();
193
268
  }
194
269
 
@@ -255,6 +330,9 @@ public class HdfsFileInputPlugin implements FileInputPlugin
255
330
  long approximateNumPartitions =
256
331
  (task.getApproximateNumPartitions() <= 0) ? Runtime.getRuntime().availableProcessors() : task.getApproximateNumPartitions();
257
332
  long partitionSizeByOneTask = totalFileLength / approximateNumPartitions;
333
+ if (partitionSizeByOneTask <= 0) {
334
+ partitionSizeByOneTask = 1;
335
+ }
258
336
 
259
337
  List<HdfsPartialFile> hdfsPartialFiles = new ArrayList<>();
260
338
  for (Path path : pathList) {
@@ -23,7 +23,8 @@ public class HdfsFilePartitioner
23
23
  this.numPartitions = numPartitions;
24
24
  }
25
25
 
26
- public List<HdfsPartialFile> getHdfsPartialFiles() throws IOException
26
+ public List<HdfsPartialFile> getHdfsPartialFiles()
27
+ throws IOException
27
28
  {
28
29
  List<HdfsPartialFile> hdfsPartialFiles = new ArrayList<>();
29
30
  long size = fs.getFileStatus(path).getLen();
@@ -1,7 +1,5 @@
1
1
  package org.embulk.input.hdfs;
2
2
 
3
- import org.apache.hadoop.fs.Path;
4
-
5
3
  /**
6
4
  * Created by takahiro.nakayama on 8/20/15.
7
5
  */
@@ -20,7 +18,9 @@ public class HdfsPartialFile
20
18
  }
21
19
 
22
20
  // see: http://stackoverflow.com/questions/7625783/jsonmappingexception-no-suitable-constructor-found-for-type-simple-type-class
23
- public HdfsPartialFile() { }
21
+ public HdfsPartialFile()
22
+ {
23
+ }
24
24
 
25
25
  public String getPath()
26
26
  {
@@ -36,5 +36,4 @@ public class HdfsPartialFile
36
36
  {
37
37
  return end;
38
38
  }
39
-
40
39
  }
@@ -6,7 +6,8 @@ import java.io.InputStream;
6
6
  import java.io.PushbackInputStream;
7
7
 
8
8
  // ref. https://github.com/hito4t/embulk-input-filesplit/blob/master/src/main/java/org/embulk/input/filesplit/PartialFileInputStream.java
9
- public class HdfsPartialFileInputStream extends InputStream
9
+ public class HdfsPartialFileInputStream
10
+ extends InputStream
10
11
  {
11
12
  private final PushbackInputStream original;
12
13
  private long start;
@@ -23,13 +24,15 @@ public class HdfsPartialFileInputStream extends InputStream
23
24
  }
24
25
 
25
26
  @Override
26
- public int read(byte[] b) throws IOException
27
+ public int read(byte[] b)
28
+ throws IOException
27
29
  {
28
30
  return read(b, 0, b.length);
29
31
  }
30
32
 
31
33
  @Override
32
- public int read(byte[] b, int off, int len) throws IOException
34
+ public int read(byte[] b, int off, int len)
35
+ throws IOException
33
36
  {
34
37
  initializeIfNeeded();
35
38
 
@@ -45,7 +48,7 @@ public class HdfsPartialFileInputStream extends InputStream
45
48
 
46
49
  current += read;
47
50
  if (current >= end) {
48
- for (int i = Math.max((int)(end - 1 - current + read), 0); i < read; i++) {
51
+ for (int i = Math.max((int) (end - 1 - current + read), 0); i < read; i++) {
49
52
  if (b[off + i] == '\n') {
50
53
  eof = true;
51
54
  return i + 1;
@@ -65,7 +68,8 @@ public class HdfsPartialFileInputStream extends InputStream
65
68
  }
66
69
 
67
70
  @Override
68
- public int read() throws IOException
71
+ public int read()
72
+ throws IOException
69
73
  {
70
74
  initializeIfNeeded();
71
75
 
@@ -91,7 +95,8 @@ public class HdfsPartialFileInputStream extends InputStream
91
95
  }
92
96
 
93
97
  @Override
94
- public long skip(long n) throws IOException
98
+ public long skip(long n)
99
+ throws IOException
95
100
  {
96
101
  throw new IOException("Skip not supported.");
97
102
  /*
@@ -102,18 +107,21 @@ public class HdfsPartialFileInputStream extends InputStream
102
107
  }
103
108
 
104
109
  @Override
105
- public int available() throws IOException
110
+ public int available()
111
+ throws IOException
106
112
  {
107
113
  return 0;
108
114
  }
109
115
 
110
116
  @Override
111
- public void close() throws IOException
117
+ public void close()
118
+ throws IOException
112
119
  {
113
120
  original.close();
114
121
  }
115
122
 
116
- private void initializeIfNeeded() throws IOException
123
+ private void initializeIfNeeded()
124
+ throws IOException
117
125
  {
118
126
  if (current >= start) {
119
127
  return;
@@ -144,7 +152,8 @@ public class HdfsPartialFileInputStream extends InputStream
144
152
  }
145
153
  }
146
154
 
147
- private int prefetch() throws IOException
155
+ private int prefetch()
156
+ throws IOException
148
157
  {
149
158
  int c = original.read();
150
159
  if (c >= 0) {
@@ -152,4 +161,4 @@ public class HdfsPartialFileInputStream extends InputStream
152
161
  }
153
162
  return c;
154
163
  }
155
- }
164
+ }
@@ -1,5 +1,232 @@
1
1
  package org.embulk.input.hdfs;
2
2
 
3
+ import com.google.common.base.Function;
4
+ import com.google.common.collect.ImmutableList;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import com.google.common.collect.Lists;
7
+ import com.google.common.collect.Maps;
8
+ import org.apache.hadoop.fs.Path;
9
+ import org.embulk.EmbulkTestRuntime;
10
+ import org.embulk.config.ConfigException;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.TaskReport;
13
+ import org.embulk.config.TaskSource;
14
+ import org.embulk.input.hdfs.HdfsFileInputPlugin.PluginTask;
15
+ import org.embulk.spi.Exec;
16
+ import org.embulk.spi.FileInputPlugin;
17
+ import org.embulk.spi.FileInputRunner;
18
+ import org.embulk.spi.InputPlugin;
19
+ import org.embulk.spi.Schema;
20
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
21
+ import org.embulk.spi.util.Pages;
22
+ import org.embulk.standards.CsvParserPlugin;
23
+ import org.junit.Before;
24
+ import org.junit.Rule;
25
+ import org.junit.Test;
26
+ import org.junit.rules.ExpectedException;
27
+ import org.slf4j.Logger;
28
+
29
+ import javax.annotation.Nullable;
30
+
31
+ import java.io.File;
32
+ import java.util.ArrayList;
33
+ import java.util.List;
34
+
35
+ import static org.junit.Assert.assertEquals;
36
+
3
37
  public class TestHdfsFileInputPlugin
4
38
  {
39
+ @Rule
40
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
41
+
42
+ @Rule
43
+ public ExpectedException exception = ExpectedException.none();
44
+
45
+ private Logger logger = runtime.getExec().getLogger(TestHdfsFileInputPlugin.class);
46
+ private HdfsFileInputPlugin plugin;
47
+ private FileInputRunner runner;
48
+ private MockPageOutput output;
49
+ private Path path;
50
+
51
+ @Before
52
+ public void createResources()
53
+ {
54
+ plugin = new HdfsFileInputPlugin();
55
+ runner = new FileInputRunner(runtime.getInstance(HdfsFileInputPlugin.class));
56
+ output = new MockPageOutput();
57
+ path = new Path(new File(getClass().getResource("/sample_01.csv").getPath()).getParent());
58
+ }
59
+
60
+ @Test
61
+ public void testDefaultValues()
62
+ {
63
+ ConfigSource config = Exec.newConfigSource()
64
+ .set("path", path.toString());
65
+ PluginTask task = config.loadConfig(PluginTask.class);
66
+ assertEquals(path.toString(), task.getPath());
67
+ assertEquals(Lists.newArrayList(), task.getConfigFiles());
68
+ assertEquals(Maps.newHashMap(), task.getConfig());
69
+ assertEquals(true, task.getPartition());
70
+ assertEquals(0, task.getRewindSeconds());
71
+ assertEquals(-1, task.getApproximateNumPartitions());
72
+ }
73
+
74
+ @Test(expected = ConfigException.class)
75
+ public void testRequiredValues()
76
+ {
77
+ ConfigSource config = Exec.newConfigSource();
78
+ PluginTask task = config.loadConfig(PluginTask.class);
79
+ }
80
+
81
+ @Test
82
+ public void testFileList()
83
+ {
84
+ ConfigSource config = getConfigWithDefaultValues();
85
+ config.set("num_partitions", 1);
86
+ plugin.transaction(config, new FileInputPlugin.Control()
87
+ {
88
+ @Override
89
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
90
+ {
91
+ PluginTask task = taskSource.loadTask(PluginTask.class);
92
+ List<String> fileList = Lists.transform(Lists.newArrayList(new File(path.toString()).list()), new Function<String, String>()
93
+ {
94
+ @Nullable
95
+ @Override
96
+ public String apply(@Nullable String input)
97
+ {
98
+ return new File(path.toString() + "/" + input).toURI().toString();
99
+ }
100
+ });
101
+
102
+ List<String> resultFList = Lists.transform(task.getFiles(), new Function<HdfsPartialFile, String>()
103
+ {
104
+ @Nullable
105
+ @Override
106
+ public String apply(@Nullable HdfsPartialFile input)
107
+ {
108
+ assert input != null;
109
+ return input.getPath();
110
+ }
111
+ });
112
+ assertEquals(fileList, resultFList);
113
+ return emptyTaskReports(taskCount);
114
+ }
115
+ });
116
+ }
117
+
118
+ @Test
119
+ public void testHdfsFileInputByOpen()
120
+ {
121
+ ConfigSource config = getConfigWithDefaultValues();
122
+ config.set("num_partitions", 10);
123
+ runner.transaction(config, new Control());
124
+ assertRecords(config, output);
125
+ }
126
+
127
+ @Test
128
+ public void testHdfsFileInputByOpenWithoutPartition()
129
+ {
130
+ ConfigSource config = getConfigWithDefaultValues();
131
+ config.set("partition", false);
132
+ runner.transaction(config, new Control());
133
+ assertRecords(config, output);
134
+ }
135
+
136
+ private class Control
137
+ implements InputPlugin.Control
138
+ {
139
+ @Override
140
+ public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
141
+ {
142
+ List<TaskReport> reports = new ArrayList<>();
143
+ for (int i = 0; i < taskCount; i++) {
144
+ reports.add(runner.run(taskSource, schema, i, output));
145
+ }
146
+ return reports;
147
+ }
148
+ }
149
+
150
+ private ConfigSource getConfigWithDefaultValues()
151
+ {
152
+ return Exec.newConfigSource()
153
+ .set("path", path.toString())
154
+ .set("config", hdfsLocalFSConfig())
155
+ .set("skip_header_lines", 1)
156
+ .set("parser", parserConfig(schemaConfig()));
157
+ }
158
+
159
+ static List<TaskReport> emptyTaskReports(int taskCount)
160
+ {
161
+ ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
162
+ for (int i = 0; i < taskCount; i++) {
163
+ reports.add(Exec.newTaskReport());
164
+ }
165
+ return reports.build();
166
+ }
167
+
168
+ private ImmutableMap<String, Object> hdfsLocalFSConfig()
169
+ {
170
+ ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
171
+ builder.put("fs.hdfs.impl", "org.apache.hadoop.fs.LocalFileSystem");
172
+ builder.put("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
173
+ builder.put("fs.defaultFS", "file:///");
174
+ return builder.build();
175
+ }
176
+
177
+ private ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
178
+ {
179
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
180
+ builder.put("type", "csv");
181
+ builder.put("newline", "CRLF");
182
+ builder.put("delimiter", ",");
183
+ builder.put("quote", "\"");
184
+ builder.put("escape", "\"");
185
+ builder.put("trim_if_not_quoted", false);
186
+ builder.put("skip_header_lines", 1);
187
+ builder.put("allow_extra_columns", false);
188
+ builder.put("allow_optional_columns", false);
189
+ builder.put("columns", schemaConfig);
190
+ return builder.build();
191
+ }
192
+
193
+ private ImmutableList<Object> schemaConfig()
194
+ {
195
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
196
+ builder.add(ImmutableMap.of("name", "id", "type", "long"));
197
+ builder.add(ImmutableMap.of("name", "account", "type", "long"));
198
+ builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
199
+ builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d"));
200
+ builder.add(ImmutableMap.of("name", "comment", "type", "string"));
201
+ return builder.build();
202
+ }
203
+
204
+ private void assertRecords(ConfigSource config, MockPageOutput output)
205
+ {
206
+ List<Object[]> records = getRecords(config, output);
207
+ assertEquals(8, records.size());
208
+ {
209
+ Object[] record = records.get(0);
210
+ assertEquals(1L, record[0]);
211
+ assertEquals(32864L, record[1]);
212
+ assertEquals("2015-01-27 19:23:49 UTC", record[2].toString());
213
+ assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
214
+ assertEquals("embulk", record[4]);
215
+ }
216
+
217
+ {
218
+ Object[] record = records.get(1);
219
+ assertEquals(2L, record[0]);
220
+ assertEquals(14824L, record[1]);
221
+ assertEquals("2015-01-27 19:01:23 UTC", record[2].toString());
222
+ assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
223
+ assertEquals("embulk jruby", record[4]);
224
+ }
225
+ }
226
+
227
+ private List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
228
+ {
229
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
230
+ return Pages.toObjects(schema, output.pages);
231
+ }
5
232
  }
@@ -0,0 +1,5 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,embulk
3
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby
4
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
5
+ 4,11270,2015-01-29 11:54:36,20150129,NULL
@@ -0,0 +1,5 @@
1
+ id,account,time,purchase,comment
2
+ 1,32864,2015-01-27 19:23:49,20150127,embulk
3
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby
4
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
5
+ 4,11270,2015-01-29 11:54:36,20150129,NULL
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-hdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
- - takahiro.nakayama
7
+ - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-22 00:00:00.000000000 Z
11
+ date: 2016-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -46,9 +46,14 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - LICENSE.txt
50
51
  - README.md
51
52
  - build.gradle
53
+ - config/checkstyle/checkstyle.xml
54
+ - config/checkstyle/default.xml
55
+ - example/config.yml
56
+ - example/data.csv
52
57
  - gradle/wrapper/gradle-wrapper.jar
53
58
  - gradle/wrapper/gradle-wrapper.properties
54
59
  - gradlew
@@ -59,6 +64,8 @@ files:
59
64
  - src/main/java/org/embulk/input/hdfs/HdfsPartialFile.java
60
65
  - src/main/java/org/embulk/input/hdfs/HdfsPartialFileInputStream.java
61
66
  - src/test/java/org/embulk/input/hdfs/TestHdfsFileInputPlugin.java
67
+ - src/test/resources/sample_01.csv
68
+ - src/test/resources/sample_02.csv
62
69
  - classpath/activation-1.1.jar
63
70
  - classpath/apacheds-i18n-2.0.0-M15.jar
64
71
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
@@ -69,7 +76,7 @@ files:
69
76
  - classpath/commons-beanutils-1.7.0.jar
70
77
  - classpath/commons-cli-1.2.jar
71
78
  - classpath/commons-codec-1.6.jar
72
- - classpath/commons-collections-3.2.1.jar
79
+ - classpath/commons-collections-3.2.2.jar
73
80
  - classpath/commons-compress-1.4.1.jar
74
81
  - classpath/commons-configuration-1.6.jar
75
82
  - classpath/commons-digester-1.8.jar
@@ -82,23 +89,23 @@ files:
82
89
  - classpath/curator-client-2.6.0.jar
83
90
  - classpath/curator-framework-2.6.0.jar
84
91
  - classpath/curator-recipes-2.6.0.jar
85
- - classpath/embulk-input-hdfs-0.1.8.jar
92
+ - classpath/embulk-input-hdfs-0.1.9.jar
86
93
  - classpath/gson-2.2.4.jar
87
- - classpath/hadoop-annotations-2.6.0.jar
88
- - classpath/hadoop-auth-2.6.0.jar
89
- - classpath/hadoop-client-2.6.0.jar
90
- - classpath/hadoop-common-2.6.0.jar
91
- - classpath/hadoop-hdfs-2.6.0.jar
92
- - classpath/hadoop-mapreduce-client-app-2.6.0.jar
93
- - classpath/hadoop-mapreduce-client-common-2.6.0.jar
94
- - classpath/hadoop-mapreduce-client-core-2.6.0.jar
95
- - classpath/hadoop-mapreduce-client-jobclient-2.6.0.jar
96
- - classpath/hadoop-mapreduce-client-shuffle-2.6.0.jar
97
- - classpath/hadoop-yarn-api-2.6.0.jar
98
- - classpath/hadoop-yarn-client-2.6.0.jar
99
- - classpath/hadoop-yarn-common-2.6.0.jar
100
- - classpath/hadoop-yarn-server-common-2.6.0.jar
101
- - classpath/hadoop-yarn-server-nodemanager-2.6.0.jar
94
+ - classpath/hadoop-annotations-2.6.3.jar
95
+ - classpath/hadoop-auth-2.6.3.jar
96
+ - classpath/hadoop-client-2.6.3.jar
97
+ - classpath/hadoop-common-2.6.3.jar
98
+ - classpath/hadoop-hdfs-2.6.3.jar
99
+ - classpath/hadoop-mapreduce-client-app-2.6.3.jar
100
+ - classpath/hadoop-mapreduce-client-common-2.6.3.jar
101
+ - classpath/hadoop-mapreduce-client-core-2.6.3.jar
102
+ - classpath/hadoop-mapreduce-client-jobclient-2.6.3.jar
103
+ - classpath/hadoop-mapreduce-client-shuffle-2.6.3.jar
104
+ - classpath/hadoop-yarn-api-2.6.3.jar
105
+ - classpath/hadoop-yarn-client-2.6.3.jar
106
+ - classpath/hadoop-yarn-common-2.6.3.jar
107
+ - classpath/hadoop-yarn-server-common-2.6.3.jar
108
+ - classpath/hadoop-yarn-server-nodemanager-2.6.3.jar
102
109
  - classpath/htrace-core-3.0.4.jar
103
110
  - classpath/httpclient-4.2.5.jar
104
111
  - classpath/httpcore-4.2.4.jar
Binary file
Binary file
Binary file
Binary file
Binary file