embulk-parser-xml2 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b527063f7a1921ce25a2d0b0468a570ddf8e1d2f
4
+ data.tar.gz: d5032e70c5f61ea28645cea41dcdb344acac8786
5
+ SHA512:
6
+ metadata.gz: c5a2406d80c9d5e95c576128dbe5d0398b38b649e2dd2bed22e5e3700c18a7763ec7567172921a2bc305077e6674af4196d70a57dbb80b9e64f692daadb59f47
7
+ data.tar.gz: 245911c41c615e50850bb20d531fbad79dd7f957340058f004dfd38e2500f592aa3b2786c61ec4d04b063f2cf726092ed297414528305f7c7aec56d7e275f5f5
@@ -0,0 +1,13 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ /.settings/
10
+ /.metadata/
11
+ .classpath
12
+ .project
13
+ /bin/
@@ -0,0 +1,23 @@
1
+ http://opensource.org/licenses/MIT
2
+ MIT License
3
+
4
+ Copyright (c) 2016 Yahoo Japan Corporation
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining
7
+ a copy of this software and associated documentation files (the
8
+ "Software"), to deal in the Software without restriction, including
9
+ without limitation the rights to use, copy, modify, merge, publish,
10
+ distribute, sublicense, and/or sell copies of the Software, and to
11
+ permit persons to whom the Software is furnished to do so, subject to
12
+ the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,67 @@
1
+ # Xml2 parser plugin for Embulk
2
+
3
+ Embulk parser plugin for parsing xml data. this plugin uses SAX parser, so you can parse very huge XML data with this plugin. also, support parsing sub-element under the root element which you specified. so you can parse and expand data more flexibly.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+ * **Guess supported**: no
9
+
10
+ ## Configuration
11
+
12
+ - **type**: specify this plugin as `"xml2"` (string, required)
13
+ - **root**: root element to start fetching each entries (integer, required)
14
+ - **schema**: specify the attribute of table and data type (required)
15
+
16
+ ## Example
17
+
18
+ ```yaml
19
+ parser:
20
+ type: xml2
21
+ root: mediawiki/page
22
+ schema:
23
+ - { name: id, type: long }
24
+ - { name: title, type: string }
25
+ - { name: revision/timestamp, type: timestamp, format: '%Y-%m-%dT%H:%M:%SZ' }
26
+ - { name: revision/text, type: string }
27
+ ```
28
+
29
+ Then you can fetch entries from the following xml (wikipedia archive xml format.) :
30
+ ```xml
31
+ <mediawiki>
32
+ <page>
33
+ <id>1</id>
34
+ <title>title 1</title>
35
+ <revision>
36
+ <timestamp>2004-04-30T14:46:00Z</timestamp>
37
+ <text>body text</text>
38
+ </revision>
39
+ </page>
40
+ <page>
41
+ <id>2</id>
42
+ <title>title 2</title>
43
+ <revision>
44
+ <timestamp>2004-04-30T14:46:00Z</timestamp>
45
+ <text>body text</text>
46
+ </revision>
47
+ </page>
48
+ </mediawiki>
49
+ ```
50
+
51
+ ## Build
52
+
53
+ ```
54
+ $ ./gradlew gem
55
+ ```
56
+
57
+ ## How to send Pull Request
58
+
59
+ If you would like to send a patch or Pull Request to this repository, please agree with our CLA before that. Please check following steps.
60
+
61
+ 1. You send Pull Request to our Yahoo! JAPAN OSS.
62
+ 2. We send you CLA to get agreement from you.
63
+ - Yahoo! JAPAN CLA https://gist.github.com/ydnjp/3095832f100d5c3d2592
64
+ 3. You agree with the CLA.
65
+ 4. We review your Pull Request and merge it.
66
+
67
+
@@ -0,0 +1,97 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ id "checkstyle"
6
+ id "eclipse"
7
+ }
8
+ import com.github.jrubygradle.JRubyExec
9
+ repositories {
10
+ mavenCentral()
11
+ jcenter()
12
+ }
13
+ configurations {
14
+ provided
15
+ }
16
+
17
+ version = "0.1.0"
18
+
19
+ sourceCompatibility = 1.7
20
+ targetCompatibility = 1.7
21
+
22
+ dependencies {
23
+ compile "org.embulk:embulk-core:0.8.3"
24
+ provided "org.embulk:embulk-core:0.8.3"
25
+ // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
26
+ testCompile "junit:junit:4.+"
27
+ testCompile "org.embulk:embulk-core:0.8.3:tests"
28
+ testCompile "org.embulk:embulk-standards:0.8.3"
29
+ }
30
+
31
+ task classpath(type: Copy, dependsOn: ["jar"]) {
32
+ doFirst { file("classpath").deleteDir() }
33
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
34
+ into "classpath"
35
+ }
36
+ clean { delete "classpath" }
37
+
38
+ checkstyle {
39
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
40
+ toolVersion = '6.14.1'
41
+ }
42
+ checkstyleMain {
43
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
44
+ ignoreFailures = true
45
+ }
46
+ checkstyleTest {
47
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
48
+ ignoreFailures = true
49
+ }
50
+ task checkstyle(type: Checkstyle) {
51
+ classpath = sourceSets.main.output + sourceSets.test.output
52
+ source = sourceSets.main.allJava + sourceSets.test.allJava
53
+ }
54
+
55
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
56
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
57
+ script "${project.name}.gemspec"
58
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
59
+ }
60
+
61
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
62
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
63
+ script "pkg/${project.name}-${project.version}.gem"
64
+ }
65
+
66
+ task "package"(dependsOn: ["gemspec", "classpath"]) << {
67
+ println "> Build succeeded."
68
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
69
+ }
70
+
71
+ task gemspec {
72
+ ext.gemspecFile = file("${project.name}.gemspec")
73
+ inputs.file "build.gradle"
74
+ outputs.file gemspecFile
75
+ doLast { gemspecFile.write($/
76
+ Gem::Specification.new do |spec|
77
+ spec.name = "${project.name}"
78
+ spec.version = "${project.version}"
79
+ spec.authors = ["yoshi0309"]
80
+ spec.summary = %[Xml2 parser plugin for Embulk]
81
+ spec.description = %[Parses Xml2 files read by other file input plugins.]
82
+ spec.email = ["takumyos@yahoo-corp.jp"]
83
+ spec.licenses = ["MIT"]
84
+ # TODO set this: spec.homepage = "https://github.com/takumyos/embulk-parser-xml2"
85
+
86
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
87
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
88
+ spec.require_paths = ["lib"]
89
+
90
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
91
+ spec.add_development_dependency 'bundler', ['~> 1.0']
92
+ spec.add_development_dependency 'rake', ['>= 10.0']
93
+ end
94
+ /$)
95
+ }
96
+ }
97
+ clean { delete "${project.name}.gemspec" }
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>