embulk-parser-fluentd_out_file 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +7 -0
- data/CHANGELOG.md +3 -0
- data/README.md +47 -0
- data/build.gradle +97 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/embulk-parser-fluentd_out_file.gemspec +18 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/guess/fluentd_out_file.rb +97 -0
- data/lib/embulk/parser/fluentd_out_file.rb +3 -0
- data/src/main/java/org/embulk/parser/fluentd_out_file/FluentdOutFileParserPlugin.java +178 -0
- data/src/test/java/org/embulk/parser/fluentd_out_file/TestFluentdOutFileParserPlugin.java +185 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 48966ba24342b8320ac3159b13b4af1fe6615caa
|
4
|
+
data.tar.gz: 69cef7b3f095cc9a5daa0aa09fce75695393debe
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3a6c1bae339f8243bc8a65f15e2dfb8ebbf2072d7e5219ac8a08379c00cf0f5ab510598bff5365f5126f24e4bf2fca017b8ccda8e2f493bae7b441987c21a559
|
7
|
+
data.tar.gz: 33d84958e409618ae4278bbef8c49c7dcff1bec6ef3ac1f1a7306add3c501e448879aad9cd346f7cc45932de245abea8ee1d3814d36093cb2861252f2c7d1f8d
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Fluentd Out File parser plugin for Embulk
|
2
|
+
|
3
|
+
This plugin parses fluentd's out_file formatted files.
|
4
|
+
http://docs.fluentd.org/articles/out_file
|
5
|
+
|
6
|
+
## Overview
|
7
|
+
|
8
|
+
* **Plugin type**: parser
|
9
|
+
* **Guess supported**: yes
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **delimiter**: Delimiter character such as \t (string, required)
|
14
|
+
- **columns**: Columns (hash, required)
|
15
|
+
|
16
|
+
## Example
|
17
|
+
|
18
|
+
```yaml
|
19
|
+
in:
|
20
|
+
type: any file input plugin type
|
21
|
+
parser:
|
22
|
+
type: fluentd_out_file
|
23
|
+
delimiter: "\t"
|
24
|
+
columns:
|
25
|
+
- {name: time, type: timestamp, format: '%Y-%m-%dT%H:%M:%S%:z'}
|
26
|
+
- {name: tag, type: string}
|
27
|
+
- {name: record, type: json}
|
28
|
+
```
|
29
|
+
|
30
|
+
(If guess supported) you don't have to write `parser:` section in the configuration file. After writing `in:` section, you can let embulk guess `parser:` section using this command:
|
31
|
+
|
32
|
+
```
|
33
|
+
$ embulk gem install embulk-parser-fluentd_out_file
|
34
|
+
$ embulk guess -g fluentd_out_file config.yml -o guessed.yml
|
35
|
+
```
|
36
|
+
|
37
|
+
## Build
|
38
|
+
|
39
|
+
```
|
40
|
+
$ ./gradlew gem # -t to watch change of files and rebuild continuously
|
41
|
+
```
|
42
|
+
|
43
|
+
## Test
|
44
|
+
|
45
|
+
```
|
46
|
+
$ ./gradlew clean test jacocoTestReport
|
47
|
+
```
|
data/build.gradle
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
plugins {
|
2
|
+
id "com.jfrog.bintray" version "1.1"
|
3
|
+
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
|
+
id "java"
|
5
|
+
id "checkstyle"
|
6
|
+
id "jacoco"
|
7
|
+
}
|
8
|
+
import com.github.jrubygradle.JRubyExec
|
9
|
+
repositories {
|
10
|
+
mavenCentral()
|
11
|
+
jcenter()
|
12
|
+
}
|
13
|
+
configurations {
|
14
|
+
provided
|
15
|
+
}
|
16
|
+
|
17
|
+
version = "0.1.0"
|
18
|
+
|
19
|
+
compileJava.options.encoding = 'UTF-8' // source encoding
|
20
|
+
sourceCompatibility = 1.7
|
21
|
+
targetCompatibility = 1.7
|
22
|
+
|
23
|
+
dependencies {
|
24
|
+
compile "org.embulk:embulk-core:0.8.8"
|
25
|
+
provided "org.embulk:embulk-core:0.8.8"
|
26
|
+
|
27
|
+
testCompile "junit:junit:4.+"
|
28
|
+
testCompile "org.embulk:embulk-core:0.8.8:tests"
|
29
|
+
testCompile "org.embulk:embulk-standards:0.8.8"
|
30
|
+
}
|
31
|
+
|
32
|
+
task classpath(type: Copy, dependsOn: ["jar"]) {
|
33
|
+
doFirst { file("classpath").deleteDir() }
|
34
|
+
from (configurations.runtime - configurations.provided + files(jar.archivePath))
|
35
|
+
into "classpath"
|
36
|
+
}
|
37
|
+
clean { delete "classpath" }
|
38
|
+
|
39
|
+
checkstyle {
|
40
|
+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
|
41
|
+
toolVersion = '6.14.1'
|
42
|
+
}
|
43
|
+
checkstyleMain {
|
44
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
45
|
+
ignoreFailures = true
|
46
|
+
}
|
47
|
+
checkstyleTest {
|
48
|
+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
49
|
+
ignoreFailures = true
|
50
|
+
}
|
51
|
+
task checkstyle(type: Checkstyle) {
|
52
|
+
classpath = sourceSets.main.output + sourceSets.test.output
|
53
|
+
source = sourceSets.main.allJava + sourceSets.test.allJava
|
54
|
+
}
|
55
|
+
|
56
|
+
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
57
|
+
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
58
|
+
script "${project.name}.gemspec"
|
59
|
+
doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
|
60
|
+
}
|
61
|
+
|
62
|
+
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
63
|
+
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
|
64
|
+
script "pkg/${project.name}-${project.version}.gem"
|
65
|
+
}
|
66
|
+
|
67
|
+
task "package"(dependsOn: ["gemspec", "classpath"]) << {
|
68
|
+
println "> Build succeeded."
|
69
|
+
println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
|
70
|
+
}
|
71
|
+
|
72
|
+
task gemspec {
|
73
|
+
ext.gemspecFile = file("${project.name}.gemspec")
|
74
|
+
inputs.file "build.gradle"
|
75
|
+
outputs.file gemspecFile
|
76
|
+
doLast { gemspecFile.write($/
|
77
|
+
Gem::Specification.new do |spec|
|
78
|
+
spec.name = "${project.name}"
|
79
|
+
spec.version = "${project.version}"
|
80
|
+
spec.authors = ["Muga Nishizawa"]
|
81
|
+
spec.summary = %[Fluentd Out File parser plugin for Embulk]
|
82
|
+
spec.description = %[Parses Fluentd Out File files read by other file input plugins.]
|
83
|
+
spec.email = ["muga.nishizawa@gmail.com"]
|
84
|
+
spec.licenses = ["Apache 2.0"]
|
85
|
+
spec.homepage = "https://github.com/muga/embulk-parser-fluentd_out_file"
|
86
|
+
|
87
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
88
|
+
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
89
|
+
spec.require_paths = ["lib"]
|
90
|
+
|
91
|
+
spec.add_development_dependency 'bundler', ['~> 1.0']
|
92
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
93
|
+
end
|
94
|
+
/$)
|
95
|
+
}
|
96
|
+
}
|
97
|
+
clean { delete "${project.name}.gemspec" }
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<module name="Checker">
|
6
|
+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
|
7
|
+
<module name="FileTabCharacter"/>
|
8
|
+
<module name="NewlineAtEndOfFile">
|
9
|
+
<property name="lineSeparator" value="lf"/>
|
10
|
+
</module>
|
11
|
+
<module name="RegexpMultiline">
|
12
|
+
<property name="format" value="\r"/>
|
13
|
+
<property name="message" value="Line contains carriage return"/>
|
14
|
+
</module>
|
15
|
+
<module name="RegexpMultiline">
|
16
|
+
<property name="format" value=" \n"/>
|
17
|
+
<property name="message" value="Line has trailing whitespace"/>
|
18
|
+
</module>
|
19
|
+
<module name="RegexpMultiline">
|
20
|
+
<property name="format" value="\{\n\n"/>
|
21
|
+
<property name="message" value="Blank line after opening brace"/>
|
22
|
+
</module>
|
23
|
+
<module name="RegexpMultiline">
|
24
|
+
<property name="format" value="\n\n\s*\}"/>
|
25
|
+
<property name="message" value="Blank line before closing brace"/>
|
26
|
+
</module>
|
27
|
+
<module name="RegexpMultiline">
|
28
|
+
<property name="format" value="\n\n\n"/>
|
29
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
30
|
+
</module>
|
31
|
+
<module name="RegexpMultiline">
|
32
|
+
<property name="format" value="\n\n\Z"/>
|
33
|
+
<property name="message" value="Blank line before end of file"/>
|
34
|
+
</module>
|
35
|
+
<module name="RegexpMultiline">
|
36
|
+
<property name="format" value="Preconditions\.checkNotNull"/>
|
37
|
+
<property name="message" value="Use of checkNotNull"/>
|
38
|
+
</module>
|
39
|
+
|
40
|
+
<module name="TreeWalker">
|
41
|
+
<module name="EmptyBlock">
|
42
|
+
<property name="option" value="text"/>
|
43
|
+
<property name="tokens" value="
|
44
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
45
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
46
|
+
</module>
|
47
|
+
<module name="EmptyStatement"/>
|
48
|
+
<module name="EmptyForInitializerPad"/>
|
49
|
+
<module name="EmptyForIteratorPad">
|
50
|
+
<property name="option" value="space"/>
|
51
|
+
</module>
|
52
|
+
<module name="MethodParamPad">
|
53
|
+
<property name="allowLineBreaks" value="true"/>
|
54
|
+
<property name="option" value="nospace"/>
|
55
|
+
</module>
|
56
|
+
<module name="ParenPad"/>
|
57
|
+
<module name="TypecastParenPad"/>
|
58
|
+
<module name="NeedBraces"/>
|
59
|
+
<module name="LeftCurly">
|
60
|
+
<property name="option" value="nl"/>
|
61
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
62
|
+
</module>
|
63
|
+
<module name="LeftCurly">
|
64
|
+
<property name="option" value="eol"/>
|
65
|
+
<property name="tokens" value="
|
66
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
67
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
68
|
+
</module>
|
69
|
+
<module name="RightCurly">
|
70
|
+
<property name="option" value="alone"/>
|
71
|
+
</module>
|
72
|
+
<module name="GenericWhitespace"/>
|
73
|
+
<module name="WhitespaceAfter"/>
|
74
|
+
<module name="NoWhitespaceBefore"/>
|
75
|
+
|
76
|
+
<module name="UpperEll"/>
|
77
|
+
<module name="DefaultComesLast"/>
|
78
|
+
<module name="ArrayTypeStyle"/>
|
79
|
+
<module name="MultipleVariableDeclarations"/>
|
80
|
+
<module name="ModifierOrder"/>
|
81
|
+
<module name="OneStatementPerLine"/>
|
82
|
+
<module name="StringLiteralEquality"/>
|
83
|
+
<module name="MutableException"/>
|
84
|
+
<module name="EqualsHashCode"/>
|
85
|
+
<module name="InnerAssignment"/>
|
86
|
+
<module name="InterfaceIsType"/>
|
87
|
+
<module name="HideUtilityClassConstructor"/>
|
88
|
+
|
89
|
+
<module name="MemberName"/>
|
90
|
+
<module name="LocalVariableName"/>
|
91
|
+
<module name="LocalFinalVariableName"/>
|
92
|
+
<module name="TypeName"/>
|
93
|
+
<module name="PackageName"/>
|
94
|
+
<module name="ParameterName"/>
|
95
|
+
<module name="StaticVariableName"/>
|
96
|
+
<module name="ClassTypeParameterName">
|
97
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
98
|
+
</module>
|
99
|
+
<module name="MethodTypeParameterName">
|
100
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
101
|
+
</module>
|
102
|
+
|
103
|
+
<module name="AvoidStarImport"/>
|
104
|
+
<module name="RedundantImport"/>
|
105
|
+
<module name="UnusedImports"/>
|
106
|
+
<module name="ImportOrder">
|
107
|
+
<property name="groups" value="*,javax,java"/>
|
108
|
+
<property name="separated" value="true"/>
|
109
|
+
<property name="option" value="bottom"/>
|
110
|
+
<property name="sortStaticImportsAlphabetically" value="true"/>
|
111
|
+
</module>
|
112
|
+
|
113
|
+
<module name="WhitespaceAround">
|
114
|
+
<property name="allowEmptyConstructors" value="true"/>
|
115
|
+
<property name="allowEmptyMethods" value="true"/>
|
116
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
117
|
+
<property name="tokens" value="
|
118
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
119
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
120
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
121
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
122
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
123
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
124
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
125
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
126
|
+
</module>
|
127
|
+
</module>
|
128
|
+
</module>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
Gem::Specification.new do |spec|
|
3
|
+
spec.name = "embulk-parser-fluentd_out_file"
|
4
|
+
spec.version = "0.1.0"
|
5
|
+
spec.authors = ["Muga Nishizawa"]
|
6
|
+
spec.summary = %[Fluentd Out File parser plugin for Embulk]
|
7
|
+
spec.description = %[Parses Fluentd Out File files read by other file input plugins.]
|
8
|
+
spec.email = ["muga.nishizawa@gmail.com"]
|
9
|
+
spec.licenses = ["Apache 2.0"]
|
10
|
+
spec.homepage = "https://github.com/muga/embulk-parser-fluentd_out_file"
|
11
|
+
|
12
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
13
|
+
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
14
|
+
spec.require_paths = ["lib"]
|
15
|
+
|
16
|
+
spec.add_development_dependency 'bundler', ['~> 1.0']
|
17
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
18
|
+
end
|
Binary file
|
data/gradlew
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
##############################################################################
|
4
|
+
##
|
5
|
+
## Gradle start up script for UN*X
|
6
|
+
##
|
7
|
+
##############################################################################
|
8
|
+
|
9
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
10
|
+
DEFAULT_JVM_OPTS=""
|
11
|
+
|
12
|
+
APP_NAME="Gradle"
|
13
|
+
APP_BASE_NAME=`basename "$0"`
|
14
|
+
|
15
|
+
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
|
+
MAX_FD="maximum"
|
17
|
+
|
18
|
+
warn ( ) {
|
19
|
+
echo "$*"
|
20
|
+
}
|
21
|
+
|
22
|
+
die ( ) {
|
23
|
+
echo
|
24
|
+
echo "$*"
|
25
|
+
echo
|
26
|
+
exit 1
|
27
|
+
}
|
28
|
+
|
29
|
+
# OS specific support (must be 'true' or 'false').
|
30
|
+
cygwin=false
|
31
|
+
msys=false
|
32
|
+
darwin=false
|
33
|
+
case "`uname`" in
|
34
|
+
CYGWIN* )
|
35
|
+
cygwin=true
|
36
|
+
;;
|
37
|
+
Darwin* )
|
38
|
+
darwin=true
|
39
|
+
;;
|
40
|
+
MINGW* )
|
41
|
+
msys=true
|
42
|
+
;;
|
43
|
+
esac
|
44
|
+
|
45
|
+
# Attempt to set APP_HOME
|
46
|
+
# Resolve links: $0 may be a link
|
47
|
+
PRG="$0"
|
48
|
+
# Need this for relative symlinks.
|
49
|
+
while [ -h "$PRG" ] ; do
|
50
|
+
ls=`ls -ld "$PRG"`
|
51
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
52
|
+
if expr "$link" : '/.*' > /dev/null; then
|
53
|
+
PRG="$link"
|
54
|
+
else
|
55
|
+
PRG=`dirname "$PRG"`"/$link"
|
56
|
+
fi
|
57
|
+
done
|
58
|
+
SAVED="`pwd`"
|
59
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
60
|
+
APP_HOME="`pwd -P`"
|
61
|
+
cd "$SAVED" >/dev/null
|
62
|
+
|
63
|
+
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
64
|
+
|
65
|
+
# Determine the Java command to use to start the JVM.
|
66
|
+
if [ -n "$JAVA_HOME" ] ; then
|
67
|
+
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
68
|
+
# IBM's JDK on AIX uses strange locations for the executables
|
69
|
+
JAVACMD="$JAVA_HOME/jre/sh/java"
|
70
|
+
else
|
71
|
+
JAVACMD="$JAVA_HOME/bin/java"
|
72
|
+
fi
|
73
|
+
if [ ! -x "$JAVACMD" ] ; then
|
74
|
+
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
75
|
+
|
76
|
+
Please set the JAVA_HOME variable in your environment to match the
|
77
|
+
location of your Java installation."
|
78
|
+
fi
|
79
|
+
else
|
80
|
+
JAVACMD="java"
|
81
|
+
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
82
|
+
|
83
|
+
Please set the JAVA_HOME variable in your environment to match the
|
84
|
+
location of your Java installation."
|
85
|
+
fi
|
86
|
+
|
87
|
+
# Increase the maximum file descriptors if we can.
|
88
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
89
|
+
MAX_FD_LIMIT=`ulimit -H -n`
|
90
|
+
if [ $? -eq 0 ] ; then
|
91
|
+
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
92
|
+
MAX_FD="$MAX_FD_LIMIT"
|
93
|
+
fi
|
94
|
+
ulimit -n $MAX_FD
|
95
|
+
if [ $? -ne 0 ] ; then
|
96
|
+
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
97
|
+
fi
|
98
|
+
else
|
99
|
+
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
100
|
+
fi
|
101
|
+
fi
|
102
|
+
|
103
|
+
# For Darwin, add options to specify how the application appears in the dock
|
104
|
+
if $darwin; then
|
105
|
+
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
106
|
+
fi
|
107
|
+
|
108
|
+
# For Cygwin, switch paths to Windows format before running java
|
109
|
+
if $cygwin ; then
|
110
|
+
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
111
|
+
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
112
|
+
JAVACMD=`cygpath --unix "$JAVACMD"`
|
113
|
+
|
114
|
+
# We build the pattern for arguments to be converted via cygpath
|
115
|
+
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
116
|
+
SEP=""
|
117
|
+
for dir in $ROOTDIRSRAW ; do
|
118
|
+
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
119
|
+
SEP="|"
|
120
|
+
done
|
121
|
+
OURCYGPATTERN="(^($ROOTDIRS))"
|
122
|
+
# Add a user-defined pattern to the cygpath arguments
|
123
|
+
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
124
|
+
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
125
|
+
fi
|
126
|
+
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
127
|
+
i=0
|
128
|
+
for arg in "$@" ; do
|
129
|
+
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
130
|
+
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
131
|
+
|
132
|
+
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
133
|
+
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
134
|
+
else
|
135
|
+
eval `echo args$i`="\"$arg\""
|
136
|
+
fi
|
137
|
+
i=$((i+1))
|
138
|
+
done
|
139
|
+
case $i in
|
140
|
+
(0) set -- ;;
|
141
|
+
(1) set -- "$args0" ;;
|
142
|
+
(2) set -- "$args0" "$args1" ;;
|
143
|
+
(3) set -- "$args0" "$args1" "$args2" ;;
|
144
|
+
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
145
|
+
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
146
|
+
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
147
|
+
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
148
|
+
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
149
|
+
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
150
|
+
esac
|
151
|
+
fi
|
152
|
+
|
153
|
+
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
|
154
|
+
function splitJvmOpts() {
|
155
|
+
JVM_OPTS=("$@")
|
156
|
+
}
|
157
|
+
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
158
|
+
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
159
|
+
|
160
|
+
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
data/gradlew.bat
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
+
set DEFAULT_JVM_OPTS=
|
13
|
+
|
14
|
+
set DIRNAME=%~dp0
|
15
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
+
set APP_BASE_NAME=%~n0
|
17
|
+
set APP_HOME=%DIRNAME%
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windowz variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
+
|
54
|
+
:win9xME_args
|
55
|
+
@rem Slurp the command line arguments.
|
56
|
+
set CMD_LINE_ARGS=
|
57
|
+
set _SKIP=2
|
58
|
+
|
59
|
+
:win9xME_args_slurp
|
60
|
+
if "x%~1" == "x" goto execute
|
61
|
+
|
62
|
+
set CMD_LINE_ARGS=%*
|
63
|
+
goto execute
|
64
|
+
|
65
|
+
:4NT_args
|
66
|
+
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
+
set CMD_LINE_ARGS=%$
|
68
|
+
|
69
|
+
:execute
|
70
|
+
@rem Setup the command line
|
71
|
+
|
72
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
+
|
74
|
+
@rem Execute Gradle
|
75
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
+
|
77
|
+
:end
|
78
|
+
@rem End local scope for the variables with windows NT shell
|
79
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
+
|
81
|
+
:fail
|
82
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
+
rem the _cmd.exe /c_ return code!
|
84
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
+
exit /b 1
|
86
|
+
|
87
|
+
:mainEnd
|
88
|
+
if "%OS%"=="Windows_NT" endlocal
|
89
|
+
|
90
|
+
:omega
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Guess
|
3
|
+
require 'embulk/guess/schema_guess'
|
4
|
+
|
5
|
+
class FluentdOutFileGuessPlugin < LineGuessPlugin
|
6
|
+
Plugin.register_guess("fluentd_out_file", self)
|
7
|
+
|
8
|
+
DELIMITER_CANDIDATES = [
|
9
|
+
"\t", ",", "|"
|
10
|
+
]
|
11
|
+
|
12
|
+
def guess_lines(config, sample_lines)
|
13
|
+
return {} unless config.fetch("parser", {}).fetch("type", "fluentd_out_file") == "fluentd_out_file"
|
14
|
+
|
15
|
+
parser_config = config["parser"] || {}
|
16
|
+
|
17
|
+
# guess delimiter
|
18
|
+
if parser_config["type"] == "fluentd_out_file" && parser_config["delimiter"]
|
19
|
+
delim = parser_config["delimiter"]
|
20
|
+
else
|
21
|
+
delim = guess_delimiter(sample_lines)
|
22
|
+
unless delim
|
23
|
+
# not fluentd_out_file file
|
24
|
+
return {}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
parser_guessed = DataSource.new.merge(parser_config).merge({"type" => "fluentd_out_file", "delimiter" => delim})
|
29
|
+
|
30
|
+
# guess schema
|
31
|
+
sample_records = sample_lines.map {|line| line.split(delim)}
|
32
|
+
column_types = SchemaGuess.types_from_array_records(sample_records || [])
|
33
|
+
if column_types.size > 3
|
34
|
+
# not fluentd_out_file file
|
35
|
+
return {}
|
36
|
+
end
|
37
|
+
schema = []
|
38
|
+
column_types.each do |type|
|
39
|
+
if type.is_a?(SchemaGuess::TimestampTypeMatch)
|
40
|
+
schema << {"name" => "time", "type" => type, "format" => type.format}
|
41
|
+
elsif type == "string"
|
42
|
+
schema << {"name" => "tag", "type" => type}
|
43
|
+
elsif type == "json"
|
44
|
+
schema << {"name" => "record", "type" => type}
|
45
|
+
else
|
46
|
+
# not fluentd_out_file file
|
47
|
+
return {}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
parser_guessed["columns"] = schema
|
51
|
+
|
52
|
+
return {"parser" => parser_guessed}
|
53
|
+
end
|
54
|
+
|
55
|
+
private # ported from csv_guess.rb temporarily
|
56
|
+
|
57
|
+
def guess_delimiter(sample_lines)
|
58
|
+
delim_weights = DELIMITER_CANDIDATES.map do |d|
|
59
|
+
counts = sample_lines.map {|line| line.count(d) }
|
60
|
+
total = array_sum(counts)
|
61
|
+
if total > 0
|
62
|
+
stddev = array_standard_deviation(counts)
|
63
|
+
stddev = 0.000000001 if stddev == 0.0
|
64
|
+
weight = total / stddev
|
65
|
+
[d, weight]
|
66
|
+
else
|
67
|
+
[nil, 0]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
delim, weight = *delim_weights.sort_by {|d,weight| weight }.last
|
72
|
+
if delim != nil && weight > 1
|
73
|
+
return delim
|
74
|
+
else
|
75
|
+
return nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def array_sum(array)
|
80
|
+
array.inject(0) {|r,i| r += i }
|
81
|
+
end
|
82
|
+
|
83
|
+
def array_avg(array)
|
84
|
+
array.inject(0.0) {|r,i| r += i } / array.size
|
85
|
+
end
|
86
|
+
|
87
|
+
def array_variance(array)
|
88
|
+
avg = array_avg(array)
|
89
|
+
array.inject(0.0) {|r,i| r += (i - avg) ** 2 } / array.size
|
90
|
+
end
|
91
|
+
|
92
|
+
def array_standard_deviation(array)
|
93
|
+
Math.sqrt(array_variance(array))
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
package org.embulk.parser.fluentd_out_file;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.ConfigSource;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.config.TaskSource;
|
8
|
+
import org.embulk.spi.Column;
|
9
|
+
import org.embulk.spi.DataException;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.embulk.spi.PageBuilder;
|
12
|
+
import org.embulk.spi.ParserPlugin;
|
13
|
+
import org.embulk.spi.FileInput;
|
14
|
+
import org.embulk.spi.PageOutput;
|
15
|
+
import org.embulk.spi.Schema;
|
16
|
+
import org.embulk.spi.SchemaConfig;
|
17
|
+
import org.embulk.spi.SchemaConfigException;
|
18
|
+
import org.embulk.spi.json.JsonParseException;
|
19
|
+
import org.embulk.spi.json.JsonParser;
|
20
|
+
import org.embulk.spi.time.Timestamp;
|
21
|
+
import org.embulk.spi.time.TimestampParseException;
|
22
|
+
import org.embulk.spi.time.TimestampParser;
|
23
|
+
import org.embulk.spi.type.Types;
|
24
|
+
import org.embulk.spi.util.LineDecoder;
|
25
|
+
import org.embulk.spi.util.Timestamps;
|
26
|
+
import org.msgpack.value.Value;
|
27
|
+
import org.slf4j.Logger;
|
28
|
+
|
29
|
+
public class FluentdOutFileParserPlugin
|
30
|
+
implements ParserPlugin
|
31
|
+
{
|
32
|
+
// @see http://docs.fluentd.org/articles/out_file#format
|
33
|
+
|
34
|
+
public interface PluginTask
|
35
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
36
|
+
{
|
37
|
+
@Config("delimiter")
|
38
|
+
@ConfigDefault("\"\\t\"")
|
39
|
+
char getDelimiterChar();
|
40
|
+
|
41
|
+
@Config("columns")
|
42
|
+
SchemaConfig getSchemaConfig();
|
43
|
+
}
|
44
|
+
|
45
|
+
private final Logger log;
|
46
|
+
|
47
|
+
public FluentdOutFileParserPlugin()
|
48
|
+
{
|
49
|
+
log = Exec.getLogger(FluentdOutFileParserPlugin.class);
|
50
|
+
}
|
51
|
+
|
52
|
+
@Override
|
53
|
+
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
54
|
+
{
|
55
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
56
|
+
|
57
|
+
Schema schema = task.getSchemaConfig().toSchema();
|
58
|
+
validateSchema(schema);
|
59
|
+
|
60
|
+
control.run(task.dump(), schema);
|
61
|
+
}
|
62
|
+
|
63
|
+
private void validateSchema(Schema schema)
|
64
|
+
{
|
65
|
+
if (schema.getColumnCount() > 3) {
|
66
|
+
throw new SchemaConfigException("The size of columns must not be greater than 3: " + schema.getColumnCount());
|
67
|
+
}
|
68
|
+
|
69
|
+
for (Column column : schema.getColumns()) {
|
70
|
+
if (!column.getType().equals(Types.TIMESTAMP) &&
|
71
|
+
!column.getType().equals(Types.STRING) &&
|
72
|
+
!column.getType().equals(Types.JSON)) {
|
73
|
+
throw new SchemaConfigException("columns must not include 'long' and 'double' types.");
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
@Override
|
79
|
+
public void run(TaskSource taskSource, Schema schema,
|
80
|
+
FileInput input, PageOutput output)
|
81
|
+
{
|
82
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
83
|
+
final char delimiter = task.getDelimiterChar();
|
84
|
+
|
85
|
+
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
|
86
|
+
final JsonParser jsonParser = new JsonParser();
|
87
|
+
|
88
|
+
long lineNumber;
|
89
|
+
int linePos;
|
90
|
+
String line;
|
91
|
+
int columnIndex;
|
92
|
+
|
93
|
+
try (final PageBuilder pageBuilder = newPageBuilder(schema, output);
|
94
|
+
final LineDecoder decoder = new LineDecoder(input, task)) {
|
95
|
+
while (decoder.nextFile()) {
|
96
|
+
lineNumber = 0;
|
97
|
+
|
98
|
+
while ((line = decoder.poll()) != null) {
|
99
|
+
lineNumber++;
|
100
|
+
linePos = 0;
|
101
|
+
columnIndex = 0;
|
102
|
+
|
103
|
+
try {
|
104
|
+
// parse time
|
105
|
+
if (isTimestampType(schema.getColumn(columnIndex))) {
|
106
|
+
Column column = schema.getColumn(columnIndex);
|
107
|
+
|
108
|
+
int i = indexOf(delimiter, linePos, line);
|
109
|
+
Timestamp timestamp = timestampParsers[column.getIndex()].parse(line.substring(linePos, i));
|
110
|
+
pageBuilder.setTimestamp(column, timestamp);
|
111
|
+
|
112
|
+
linePos = i + 1;
|
113
|
+
columnIndex += 1;
|
114
|
+
}
|
115
|
+
|
116
|
+
// parse tag
|
117
|
+
if (isStringType(schema.getColumn(columnIndex))) {
|
118
|
+
Column column = schema.getColumn(columnIndex);
|
119
|
+
|
120
|
+
int i = indexOf(delimiter, linePos, line);
|
121
|
+
pageBuilder.setString(column, line.substring(linePos, i));
|
122
|
+
|
123
|
+
linePos = i + 1;
|
124
|
+
columnIndex += 1;
|
125
|
+
}
|
126
|
+
|
127
|
+
// parse record
|
128
|
+
Column column = schema.getColumn(columnIndex);
|
129
|
+
Value value = jsonParser.parse(line.substring(linePos));
|
130
|
+
|
131
|
+
pageBuilder.setJson(column, value);
|
132
|
+
|
133
|
+
pageBuilder.addRecord();
|
134
|
+
}
|
135
|
+
catch (FluentdOutFileRecordValidateException | StringIndexOutOfBoundsException |
|
136
|
+
TimestampParseException | JsonParseException e) {
|
137
|
+
log.warn(String.format("Skipped record %d (%s): %s", lineNumber, e.getMessage(), line));
|
138
|
+
}
|
139
|
+
}
|
140
|
+
}
|
141
|
+
|
142
|
+
pageBuilder.finish();
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
private PageBuilder newPageBuilder(Schema schema, PageOutput output)
|
147
|
+
{
|
148
|
+
return new PageBuilder(Exec.getBufferAllocator(), schema, output);
|
149
|
+
}
|
150
|
+
|
151
|
+
private static boolean isTimestampType(Column column)
|
152
|
+
{
|
153
|
+
return column.getType().equals(Types.TIMESTAMP);
|
154
|
+
}
|
155
|
+
|
156
|
+
private static boolean isStringType(Column column)
|
157
|
+
{
|
158
|
+
return column.getType().equals(Types.STRING);
|
159
|
+
}
|
160
|
+
|
161
|
+
private static int indexOf(char c, int fromPos, String inLine)
|
162
|
+
{
|
163
|
+
int i = inLine.indexOf(c, fromPos);
|
164
|
+
if (i < 0) {
|
165
|
+
throw new FluentdOutFileRecordValidateException("Too few columns");
|
166
|
+
}
|
167
|
+
return i;
|
168
|
+
}
|
169
|
+
|
170
|
+
static class FluentdOutFileRecordValidateException
|
171
|
+
extends DataException
|
172
|
+
{
|
173
|
+
FluentdOutFileRecordValidateException(String message)
|
174
|
+
{
|
175
|
+
super(message);
|
176
|
+
}
|
177
|
+
}
|
178
|
+
}
|
@@ -0,0 +1,185 @@
|
|
1
|
+
package org.embulk.parser.fluentd_out_file;
|
2
|
+
|
3
|
+
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.ImmutableMap;
|
5
|
+
import com.google.common.collect.Lists;
|
6
|
+
import org.embulk.EmbulkTestRuntime;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.config.TaskSource;
|
10
|
+
import org.embulk.parser.fluentd_out_file.FluentdOutFileParserPlugin.PluginTask;
|
11
|
+
import org.embulk.spi.ColumnConfig;
|
12
|
+
import org.embulk.spi.FileInput;
|
13
|
+
import org.embulk.spi.FileInputRunner;
|
14
|
+
import org.embulk.spi.ParserPlugin;
|
15
|
+
import org.embulk.spi.Schema;
|
16
|
+
import org.embulk.spi.SchemaConfig;
|
17
|
+
import org.embulk.spi.SchemaConfigException;
|
18
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
19
|
+
import org.embulk.spi.time.Timestamp;
|
20
|
+
import org.embulk.spi.type.Type;
|
21
|
+
import org.embulk.spi.type.Types;
|
22
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
23
|
+
import org.embulk.spi.util.Pages;
|
24
|
+
import org.embulk.standards.LocalFileInputPlugin;
|
25
|
+
import org.junit.Before;
|
26
|
+
import org.junit.Rule;
|
27
|
+
import org.junit.Test;
|
28
|
+
|
29
|
+
import java.io.ByteArrayInputStream;
|
30
|
+
import java.io.IOException;
|
31
|
+
import java.io.InputStream;
|
32
|
+
import java.util.List;
|
33
|
+
|
34
|
+
import static org.junit.Assert.assertEquals;
|
35
|
+
import static org.junit.Assert.assertTrue;
|
36
|
+
import static org.junit.Assert.fail;
|
37
|
+
import static org.msgpack.value.ValueFactory.newMap;
|
38
|
+
import static org.msgpack.value.ValueFactory.newString;
|
39
|
+
|
40
|
+
public class TestFluentdOutFileParserPlugin
|
41
|
+
{
|
42
|
+
@Rule
|
43
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
44
|
+
|
45
|
+
private ConfigSource config;
|
46
|
+
private FluentdOutFileParserPlugin plugin;
|
47
|
+
private FileInputRunner runner;
|
48
|
+
private MockPageOutput output;
|
49
|
+
|
50
|
+
@Before
|
51
|
+
public void createResources()
|
52
|
+
{
|
53
|
+
config = config().set("type", "fluentd_out_file");
|
54
|
+
plugin = new FluentdOutFileParserPlugin();
|
55
|
+
runner = new FileInputRunner(new LocalFileInputPlugin());
|
56
|
+
output = new MockPageOutput();
|
57
|
+
}
|
58
|
+
|
59
|
+
@Test
|
60
|
+
public void checkDefaultValues()
|
61
|
+
{
|
62
|
+
ConfigSource config = this.config.deepCopy()
|
63
|
+
.set("columns", ImmutableList.of(
|
64
|
+
ImmutableMap.of("name", "date_code", "type", "string"))
|
65
|
+
);
|
66
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
67
|
+
assertEquals('\t', task.getDelimiterChar());
|
68
|
+
}
|
69
|
+
|
70
|
+
@Test(expected = ConfigException.class)
|
71
|
+
public void checkColumnsRequired()
|
72
|
+
{
|
73
|
+
ConfigSource config = this.config.deepCopy();
|
74
|
+
config.loadConfig(PluginTask.class);
|
75
|
+
}
|
76
|
+
|
77
|
+
@Test
|
78
|
+
public void checkSchemaValidation()
|
79
|
+
{
|
80
|
+
{ // columns size must not be greater than 3.
|
81
|
+
ConfigSource config = this.config.deepCopy()
|
82
|
+
.set("columns", ImmutableList.of(
|
83
|
+
ImmutableMap.of("name", "_c0", "type", "string"),
|
84
|
+
ImmutableMap.of("name", "_c1", "type", "string"),
|
85
|
+
ImmutableMap.of("name", "_c2", "type", "string"),
|
86
|
+
ImmutableMap.of("name", "_c3", "type", "string"))
|
87
|
+
);
|
88
|
+
try {
|
89
|
+
plugin.transaction(config, null);
|
90
|
+
fail();
|
91
|
+
}
|
92
|
+
catch (Throwable t) {
|
93
|
+
assertTrue(t instanceof SchemaConfigException);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
{ // columns must not include 'long' and 'double' typed columns.
|
97
|
+
ConfigSource config = this.config.deepCopy()
|
98
|
+
.set("columns", ImmutableList.of(
|
99
|
+
ImmutableMap.of("name", "_c0", "type", "long"),
|
100
|
+
ImmutableMap.of("name", "_c1", "type", "double"))
|
101
|
+
);
|
102
|
+
try {
|
103
|
+
plugin.transaction(config, null);
|
104
|
+
fail();
|
105
|
+
}
|
106
|
+
catch (Throwable t) {
|
107
|
+
assertTrue(t instanceof SchemaConfigException);
|
108
|
+
}
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
@Test
|
113
|
+
public void checkTransaction()
|
114
|
+
throws Exception
|
115
|
+
{
|
116
|
+
SchemaConfig schema = schema(
|
117
|
+
column("time", Types.TIMESTAMP, config().set("format", "%Y-%m-%dT%H:%M:%S")),
|
118
|
+
column("tag", Types.STRING),
|
119
|
+
column("record", Types.JSON));
|
120
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
121
|
+
|
122
|
+
transaction(config, fileInput(
|
123
|
+
"2014-06-08T23:59:40\tfile.server.logs\t{\"field1\":\"value1\",\"field2\":\"value2\"}"
|
124
|
+
));
|
125
|
+
|
126
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
127
|
+
assertEquals(1, records.size());
|
128
|
+
for (Object[] record : records) {
|
129
|
+
assertEquals(Timestamp.ofEpochSecond(1402271980L), record[0]); // 2014-06-08T23:59:40UTC
|
130
|
+
assertEquals("file.server.logs", record[1]);
|
131
|
+
assertEquals(newMap(newString("field1"), newString("value1"), newString("field2"), newString("value2")), record[2]);
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
private FileInput fileInput(String... lines)
|
136
|
+
throws Exception
|
137
|
+
{
|
138
|
+
StringBuilder sb = new StringBuilder();
|
139
|
+
for (String line : lines) {
|
140
|
+
sb.append(line).append("\n");
|
141
|
+
}
|
142
|
+
|
143
|
+
ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes());
|
144
|
+
return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
|
145
|
+
}
|
146
|
+
|
147
|
+
private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
|
148
|
+
throws IOException
|
149
|
+
{
|
150
|
+
return new InputStreamFileInput.IteratorProvider(
|
151
|
+
ImmutableList.copyOf(inputStreams));
|
152
|
+
}
|
153
|
+
|
154
|
+
private ConfigSource config()
|
155
|
+
{
|
156
|
+
return runtime.getExec().newConfigSource();
|
157
|
+
}
|
158
|
+
|
159
|
+
private SchemaConfig schema(ColumnConfig... columns)
|
160
|
+
{
|
161
|
+
return new SchemaConfig(Lists.newArrayList(columns));
|
162
|
+
}
|
163
|
+
|
164
|
+
private ColumnConfig column(String name, Type type)
|
165
|
+
{
|
166
|
+
return column(name, type, config());
|
167
|
+
}
|
168
|
+
|
169
|
+
private ColumnConfig column(String name, Type type, ConfigSource config)
|
170
|
+
{
|
171
|
+
return new ColumnConfig(name, type, config);
|
172
|
+
}
|
173
|
+
|
174
|
+
private void transaction(ConfigSource config, final FileInput input)
|
175
|
+
{
|
176
|
+
plugin.transaction(config, new ParserPlugin.Control()
|
177
|
+
{
|
178
|
+
@Override
|
179
|
+
public void run(TaskSource taskSource, Schema schema)
|
180
|
+
{
|
181
|
+
plugin.run(taskSource, schema, input, output);
|
182
|
+
}
|
183
|
+
});
|
184
|
+
}
|
185
|
+
}
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-parser-fluentd_out_file
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Muga Nishizawa
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ~>
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '1.0'
|
19
|
+
name: bundler
|
20
|
+
prerelease: false
|
21
|
+
type: :development
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '10.0'
|
33
|
+
name: rake
|
34
|
+
prerelease: false
|
35
|
+
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description: Parses Fluentd Out File files read by other file input plugins.
|
42
|
+
email:
|
43
|
+
- muga.nishizawa@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- .travis.yml
|
50
|
+
- CHANGELOG.md
|
51
|
+
- README.md
|
52
|
+
- build.gradle
|
53
|
+
- config/checkstyle/checkstyle.xml
|
54
|
+
- config/checkstyle/default.xml
|
55
|
+
- embulk-parser-fluentd_out_file.gemspec
|
56
|
+
- gradle/wrapper/gradle-wrapper.jar
|
57
|
+
- gradle/wrapper/gradle-wrapper.properties
|
58
|
+
- gradlew
|
59
|
+
- gradlew.bat
|
60
|
+
- lib/embulk/guess/fluentd_out_file.rb
|
61
|
+
- lib/embulk/parser/fluentd_out_file.rb
|
62
|
+
- src/main/java/org/embulk/parser/fluentd_out_file/FluentdOutFileParserPlugin.java
|
63
|
+
- src/test/java/org/embulk/parser/fluentd_out_file/TestFluentdOutFileParserPlugin.java
|
64
|
+
- classpath/embulk-parser-fluentd_out_file-0.1.0.jar
|
65
|
+
homepage: https://github.com/muga/embulk-parser-fluentd_out_file
|
66
|
+
licenses:
|
67
|
+
- Apache 2.0
|
68
|
+
metadata: {}
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 2.1.9
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: Fluentd Out File parser plugin for Embulk
|
89
|
+
test_files: []
|