embulk-filter-csv_lookup 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 85cf9cea8405105d0aff2c41fc72ead054fb49b93258351317bc0dbda5e9af4b
4
+ data.tar.gz: b02342642fe2f35bcfb8f3abd1a329a36642b6b51ebdf45ff2b91615b81e3a77
5
+ SHA512:
6
+ metadata.gz: 62ba16d8c62ab45d4a83c07f057aca51a67f4232e58742066d3c2d6833c8d6f2d93ffe66c6e47efbf86cbd92e1a94ea175a4dc682b011762d1b8da220d9ab248
7
+ data.tar.gz: b06f69d7eb83ab313b4ad87940b5145d21d62725b7b206695bd195cbb413c3b22032216019cd9a8a698b2427a5444f31b975ddb39c4030cd30787f9cb19372d8
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ /.settings/
10
+ /.metadata/
11
+ .classpath
12
+ .project
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 InfoObjects Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,116 @@
1
+ # CSV lookup filter plugin for Embulk
2
+
3
+ An Embulk filter plugin for Lookup Transformation with CSV
4
+
5
+ ## Configuration
6
+
7
+ - **csv_lookup**: Required attributes for the LookUp Filter Plugin -
8
+ - **mapping_from**: (Name of columns to be matched with table 2 columns) (required)
9
+ - **Name of column-1**: column name-1 from input file
10
+ - **Name of column-2**: column name-2 from input file etc ...
11
+ - **mapping_to**: (Name of columns to be matched with table 1 columns) (required)
12
+ - **Name of column-1**: column name-1 from input file
13
+ - **Name of column-2**: column name-2 from input file
14
+ - **new_columns**: (New generated column names) (required)
15
+ - **Name-1,Type-1**: Any Name, Type of the name (name: country_name, type: string)
16
+ - **Name-2,Type-2**: Any Name, Type of the name (name: country_address, type: string) etc ...
17
+ ## Example - columns
18
+
19
+ Input1 for table 1 is as follows :-
20
+
21
+ ```
22
+ year country_code country_name literacy_rate
23
+
24
+ 1990 1 India 80%
25
+ 1993 2 USA 83%
26
+ 1997 3 JAPAN
27
+ 1999 4 China 72%
28
+ 2000 5 Ukraine 68%
29
+ 2002 6 Italy 79%
30
+ 2004 7 UK 75%
31
+ 2011 8 NULL 42%
32
+ ```
33
+
34
+ Input2 for table 2 is as follows :-
35
+
36
+ ```
37
+ id country_population country_address country_GDP
38
+
39
+ 1 11.3 India 1.67
40
+ 2 18.2 USA 16.72
41
+ 3 30 JAPAN 5.00
42
+ 4 4 China 9.33
43
+ 5 57 Ukraine 1.08
44
+ 6 63 Italy 2.068
45
+ 7 17 UK 2.49
46
+ 8 28 UAE 1.18
47
+
48
+
49
+ Note: country_population is calculated in Billion and country_GDP is calculated in $USD Trillion
50
+ ```
51
+
52
+ As shown in yaml below, columns mentioned in mapping_from will be mapped with columns mentioned in mapping_to
53
+ ie:
54
+
55
+
56
+ country_code : id
57
+ country_name : country_address
58
+
59
+ After successful mapping an Output.csv file containing the columns mentioned in new_columns will be generated
60
+
61
+ Output File generated :-
62
+
63
+ ```
64
+ year country_code country_name literacy_rate country_GDP country_population
65
+
66
+ 1990 1 India 80% 1.67 11.3
67
+ 1993 2 USA 83% 16.72 18.2
68
+ 1997 3 JAPAN 5.00 30
69
+ 1999 4 China 72% 9.33 4
70
+ 2000 5 Ukraine 68% 1.08 57
71
+ 2002 6 Italy 79% 2.068 63
72
+ 2004 7 UK 75% 2.49 17
73
+ 2011 8 NULL 42%
74
+ ```
75
+
76
+ ```yaml
77
+ - type: csv_lookup
78
+ mapping_from:
79
+ - country_code
80
+ - country_name
81
+ mapping_to:
82
+ - id
83
+ - country_address
84
+ new_columns:
85
+ - { name: country_GDP, type: string }
86
+ - { name: country_population, type: string }
87
+ ```
88
+
89
+ Notes:
90
+ 1. mapping_from attribute should be in same order as mentioned in input file.
91
+
92
+ ## Development
93
+
94
+ Run example:
95
+
96
+ ```
97
+ $ ./gradlew package
98
+ $ embulk run -I ./lib seed.yml
99
+ ```
100
+
101
+ Deployment Steps:
102
+
103
+ ```
104
+ Install ruby in your machine
105
+ $ gem install gemcutter (For windows OS)
106
+
107
+ $ ./gradlew gemPush
108
+ $ gem build NameOfYourPlugins (example: embulk-filter-csv_lookup)
109
+ $ gem push embulk-filter-csv_lookup-0.1.0.gem (You will get this name after running above command)
110
+ ```
111
+
112
+ Release gem:
113
+
114
+ ```
115
+ $ ./gradlew gemPush
116
+ ```
data/build.gradle ADDED
@@ -0,0 +1,98 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "1.5.0"
4
+ id "java"
5
+ id "checkstyle"
6
+ }
7
+ import com.github.jrubygradle.JRubyExec
8
+ repositories {
9
+ mavenCentral()
10
+ jcenter()
11
+ }
12
+ configurations {
13
+ provided
14
+ }
15
+
16
+ version = "0.1.1"
17
+
18
+ sourceCompatibility = 1.8
19
+ targetCompatibility = 1.8
20
+
21
+ dependencies {
22
+ compile 'com.opencsv:opencsv:5.6'
23
+ compile "org.embulk:embulk-core:0.9.8"
24
+ provided "org.embulk:embulk-core:0.9.8"
25
+ // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
26
+ testCompile "junit:junit:4.+"
27
+ }
28
+
29
+ task classpath(type: Copy, dependsOn: ["jar"]) {
30
+ doFirst { file("classpath").deleteDir() }
31
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
32
+ into "classpath"
33
+ }
34
+ clean { delete "classpath" }
35
+
36
+ checkstyle {
37
+ configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
38
+ toolVersion = '6.14.1'
39
+ }
40
+ checkstyleMain {
41
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
42
+ ignoreFailures = true
43
+ }
44
+ checkstyleTest {
45
+ configFile = file("${project.rootDir}/config/checkstyle/default.xml")
46
+ ignoreFailures = true
47
+ }
48
+ task checkstyle(type: Checkstyle) {
49
+ classpath = sourceSets.main.output + sourceSets.test.output
50
+ source = sourceSets.main.allJava + sourceSets.test.allJava
51
+ }
52
+
53
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
54
+ jrubyArgs "-S"
55
+ script "gem"
56
+ scriptArgs "build", "${project.name}.gemspec"
57
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
58
+ }
59
+
60
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
61
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
62
+ script "pkg/${project.name}-${project.version}.gem"
63
+ }
64
+
65
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
66
+ doLast {
67
+ println "> Build succeeded."
68
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
69
+ }
70
+ }
71
+
72
+ task gemspec {
73
+ ext.gemspecFile = file("${project.name}.gemspec")
74
+ inputs.file "build.gradle"
75
+ outputs.file gemspecFile
76
+ doLast { gemspecFile.write($/
77
+ Gem::Specification.new do |spec|
78
+ spec.name = "${project.name}"
79
+ spec.version = "${project.version}"
80
+ spec.authors = ["Infoobjects Inc."]
81
+ spec.summary = %[An Embulk filter plugin for Lookup Transformation with CSV]
82
+ spec.description = %[Csv Lookup Filter]
83
+ spec.email = ["utkarsh@infoobjects.com"]
84
+ spec.licenses = ["MIT"]
85
+ spec.homepage = "https://github.com/InfoObjects/embulk-filter-csv_lookup"
86
+
87
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
88
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
89
+ spec.require_paths = ["lib"]
90
+
91
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
92
+ spec.add_development_dependency 'bundler', ['~> 1.0']
93
+ spec.add_development_dependency 'rake', ['~> 12.0']
94
+ end
95
+ /$)
96
+ }
97
+ }
98
+ clean { delete "${project.name}.gemspec" }
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,128 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <module name="Checker">
6
+ <!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7
+ <module name="FileTabCharacter"/>
8
+ <module name="NewlineAtEndOfFile">
9
+ <property name="lineSeparator" value="lf"/>
10
+ </module>
11
+ <module name="RegexpMultiline">
12
+ <property name="format" value="\r"/>
13
+ <property name="message" value="Line contains carriage return"/>
14
+ </module>
15
+ <module name="RegexpMultiline">
16
+ <property name="format" value=" \n"/>
17
+ <property name="message" value="Line has trailing whitespace"/>
18
+ </module>
19
+ <module name="RegexpMultiline">
20
+ <property name="format" value="\{\n\n"/>
21
+ <property name="message" value="Blank line after opening brace"/>
22
+ </module>
23
+ <module name="RegexpMultiline">
24
+ <property name="format" value="\n\n\s*\}"/>
25
+ <property name="message" value="Blank line before closing brace"/>
26
+ </module>
27
+ <module name="RegexpMultiline">
28
+ <property name="format" value="\n\n\n"/>
29
+ <property name="message" value="Multiple consecutive blank lines"/>
30
+ </module>
31
+ <module name="RegexpMultiline">
32
+ <property name="format" value="\n\n\Z"/>
33
+ <property name="message" value="Blank line before end of file"/>
34
+ </module>
35
+ <module name="RegexpMultiline">
36
+ <property name="format" value="Preconditions\.checkNotNull"/>
37
+ <property name="message" value="Use of checkNotNull"/>
38
+ </module>
39
+
40
+ <module name="TreeWalker">
41
+ <module name="EmptyBlock">
42
+ <property name="option" value="text"/>
43
+ <property name="tokens" value="
44
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46
+ </module>
47
+ <module name="EmptyStatement"/>
48
+ <module name="EmptyForInitializerPad"/>
49
+ <module name="EmptyForIteratorPad">
50
+ <property name="option" value="space"/>
51
+ </module>
52
+ <module name="MethodParamPad">
53
+ <property name="allowLineBreaks" value="true"/>
54
+ <property name="option" value="nospace"/>
55
+ </module>
56
+ <module name="ParenPad"/>
57
+ <module name="TypecastParenPad"/>
58
+ <module name="NeedBraces"/>
59
+ <module name="LeftCurly">
60
+ <property name="option" value="nl"/>
61
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62
+ </module>
63
+ <module name="LeftCurly">
64
+ <property name="option" value="eol"/>
65
+ <property name="tokens" value="
66
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68
+ </module>
69
+ <module name="RightCurly">
70
+ <property name="option" value="alone"/>
71
+ </module>
72
+ <module name="GenericWhitespace"/>
73
+ <module name="WhitespaceAfter"/>
74
+ <module name="NoWhitespaceBefore"/>
75
+
76
+ <module name="UpperEll"/>
77
+ <module name="DefaultComesLast"/>
78
+ <module name="ArrayTypeStyle"/>
79
+ <module name="MultipleVariableDeclarations"/>
80
+ <module name="ModifierOrder"/>
81
+ <module name="OneStatementPerLine"/>
82
+ <module name="StringLiteralEquality"/>
83
+ <module name="MutableException"/>
84
+ <module name="EqualsHashCode"/>
85
+ <module name="InnerAssignment"/>
86
+ <module name="InterfaceIsType"/>
87
+ <module name="HideUtilityClassConstructor"/>
88
+
89
+ <module name="MemberName"/>
90
+ <module name="LocalVariableName"/>
91
+ <module name="LocalFinalVariableName"/>
92
+ <module name="TypeName"/>
93
+ <module name="PackageName"/>
94
+ <module name="ParameterName"/>
95
+ <module name="StaticVariableName"/>
96
+ <module name="ClassTypeParameterName">
97
+ <property name="format" value="^[A-Z][0-9]?$"/>
98
+ </module>
99
+ <module name="MethodTypeParameterName">
100
+ <property name="format" value="^[A-Z][0-9]?$"/>
101
+ </module>
102
+
103
+ <module name="AvoidStarImport"/>
104
+ <module name="RedundantImport"/>
105
+ <module name="UnusedImports"/>
106
+ <module name="ImportOrder">
107
+ <property name="groups" value="*,javax,java"/>
108
+ <property name="separated" value="true"/>
109
+ <property name="option" value="bottom"/>
110
+ <property name="sortStaticImportsAlphabetically" value="true"/>
111
+ </module>
112
+
113
+ <module name="WhitespaceAround">
114
+ <property name="allowEmptyConstructors" value="true"/>
115
+ <property name="allowEmptyMethods" value="true"/>
116
+ <property name="ignoreEnhancedForColon" value="false"/>
117
+ <property name="tokens" value="
118
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126
+ </module>
127
+ </module>
128
+ </module>
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -0,0 +1,54 @@
1
+ exec:
2
+ max_threads: 1
3
+ min_output_tasks: 1
4
+ in:
5
+ type: file
6
+ path_prefix: /home/infoobjects/Downloads/sample/calendarFloat.csv
7
+ parser:
8
+ type: csv
9
+ columns:
10
+ - {name: dim_calendar_key, type: long}
11
+ - {name: year_number, type: long}
12
+ - {name: quarter_number, type: long }
13
+ - {name: attr_1, type: string }
14
+ filters:
15
+ - type: csv_lookup
16
+ mappingColumn_Key: country_key
17
+ mappingColumn_Value:
18
+ - { name: country_name, type: string }
19
+ new_columns:
20
+ - { name: country_name, type: string }
21
+ - { name: country_address, type: string }
22
+ mapping_from:
23
+ - quarter_number
24
+ - attr_1
25
+ mapping_to:
26
+ - id
27
+ - country_code
28
+ - type: csv_lookup
29
+ mappingColumn_Key: country_key
30
+ mappingColumn_Value:
31
+ - { name: country_name, type: string }
32
+ new_columns:
33
+ - { name: country_code,type: double }
34
+ mapping_from:
35
+ - quarter_number
36
+ - attr_1
37
+ mapping_to:
38
+ - id
39
+ - country_code
40
+ out:
41
+ type: file
42
+ path_prefix: /home/infoobjects/GetFiles/output.csv
43
+ file_ext: csv
44
+ formatter:
45
+ type: csv
46
+ delimiter: "\t"
47
+ newline: CRLF
48
+ newline_in_field: LF
49
+ charset: UTF-8
50
+ quote_policy: MINIMAL
51
+ quote: '"'
52
+ escape: "\\"
53
+ null_string: "\\N"
54
+ default_timezone: 'UTC'
Binary file
@@ -0,0 +1,5 @@
1
+ distributionBase=GRADLE_USER_HOME
2
+ distributionPath=wrapper/dists
3
+ zipStoreBase=GRADLE_USER_HOME
4
+ zipStorePath=wrapper/dists
5
+ distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-all.zip
data/gradlew ADDED
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env sh
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Attempt to set APP_HOME
10
+ # Resolve links: $0 may be a link
11
+ PRG="$0"
12
+ # Need this for relative symlinks.
13
+ while [ -h "$PRG" ] ; do
14
+ ls=`ls -ld "$PRG"`
15
+ link=`expr "$ls" : '.*-> \(.*\)$'`
16
+ if expr "$link" : '/.*' > /dev/null; then
17
+ PRG="$link"
18
+ else
19
+ PRG=`dirname "$PRG"`"/$link"
20
+ fi
21
+ done
22
+ SAVED="`pwd`"
23
+ cd "`dirname \"$PRG\"`/" >/dev/null
24
+ APP_HOME="`pwd -P`"
25
+ cd "$SAVED" >/dev/null
26
+
27
+ APP_NAME="Gradle"
28
+ APP_BASE_NAME=`basename "$0"`
29
+
30
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31
+ DEFAULT_JVM_OPTS=""
32
+
33
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
34
+ MAX_FD="maximum"
35
+
36
+ warn () {
37
+ echo "$*"
38
+ }
39
+
40
+ die () {
41
+ echo
42
+ echo "$*"
43
+ echo
44
+ exit 1
45
+ }
46
+
47
+ # OS specific support (must be 'true' or 'false').
48
+ cygwin=false
49
+ msys=false
50
+ darwin=false
51
+ nonstop=false
52
+ case "`uname`" in
53
+ CYGWIN* )
54
+ cygwin=true
55
+ ;;
56
+ Darwin* )
57
+ darwin=true
58
+ ;;
59
+ MINGW* )
60
+ msys=true
61
+ ;;
62
+ NONSTOP* )
63
+ nonstop=true
64
+ ;;
65
+ esac
66
+
67
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68
+
69
+ # Determine the Java command to use to start the JVM.
70
+ if [ -n "$JAVA_HOME" ] ; then
71
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72
+ # IBM's JDK on AIX uses strange locations for the executables
73
+ JAVACMD="$JAVA_HOME/jre/sh/java"
74
+ else
75
+ JAVACMD="$JAVA_HOME/bin/java"
76
+ fi
77
+ if [ ! -x "$JAVACMD" ] ; then
78
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79
+
80
+ Please set the JAVA_HOME variable in your environment to match the
81
+ location of your Java installation."
82
+ fi
83
+ else
84
+ JAVACMD="java"
85
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86
+
87
+ Please set the JAVA_HOME variable in your environment to match the
88
+ location of your Java installation."
89
+ fi
90
+
91
+ # Increase the maximum file descriptors if we can.
92
+ if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93
+ MAX_FD_LIMIT=`ulimit -H -n`
94
+ if [ $? -eq 0 ] ; then
95
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96
+ MAX_FD="$MAX_FD_LIMIT"
97
+ fi
98
+ ulimit -n $MAX_FD
99
+ if [ $? -ne 0 ] ; then
100
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
101
+ fi
102
+ else
103
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104
+ fi
105
+ fi
106
+
107
+ # For Darwin, add options to specify how the application appears in the dock
108
+ if $darwin; then
109
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110
+ fi
111
+
112
+ # For Cygwin, switch paths to Windows format before running java
113
+ if $cygwin ; then
114
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Escape application args
158
+ save () {
159
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160
+ echo " "
161
+ }
162
+ APP_ARGS=$(save "$@")
163
+
164
+ # Collect all arguments for the java command, following the shell quoting and substitution rules
165
+ eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166
+
167
+ # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168
+ if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169
+ cd "$(dirname "$0")"
170
+ fi
171
+
172
+ exec "$JAVACMD" "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "csv_lookup", "org.embulk.filter.csv_lookup.CsvLookupFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,353 @@
1
+ package org.embulk.filter.csv_lookup;
2
+
3
+ import com.google.common.base.Optional;
4
+
5
+ import com.google.common.collect.ImmutableList;
6
+ import com.opencsv.CSVReader;
7
+ import com.opencsv.exceptions.CsvValidationException;
8
+ import org.embulk.config.Config;
9
+ import org.embulk.config.ConfigDefault;
10
+ import org.embulk.config.ConfigDiff;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.Task;
13
+ import org.embulk.config.TaskSource;
14
+ import org.embulk.spi.*;
15
+ import org.embulk.spi.time.Timestamp;
16
+ import org.embulk.spi.type.Types;
17
+
18
+ import java.io.BufferedReader;
19
+ import java.io.FileReader;
20
+ import java.io.IOException;
21
+ import java.sql.SQLException;
22
+ import java.time.Instant;
23
+ import java.util.*;
24
+
25
+ public class CsvLookupFilterPlugin
26
+ implements FilterPlugin
27
+ {
28
+ public interface PluginTask
29
+ extends Task
30
+ {
31
+ @Config("mapping_from")
32
+ public List<String> getMappingFrom();
33
+
34
+ @Config("mapping_to")
35
+ public List<String> getMappingTo();
36
+
37
+ @Config("new_columns")
38
+ public SchemaConfig getNewColumns();
39
+
40
+ }
41
+
42
+ @Override
43
+ public void transaction(ConfigSource config, Schema inputSchema,
44
+ FilterPlugin.Control control)
45
+ {
46
+ PluginTask task = config.loadConfig(PluginTask.class);
47
+
48
+ List<String> inputColumns = task.getMappingFrom();
49
+ List<String> keyColumns = task.getMappingTo();
50
+ if(inputColumns.size()!=keyColumns.size()){
51
+ throw new RuntimeException("Number of mapping_from columns must be exactly equals to number of mapping_to columns");
52
+ }
53
+
54
+ Schema outputSchema = inputSchema;
55
+
56
+ ImmutableList.Builder<Column> builder = ImmutableList.builder();
57
+ int i = 0;
58
+ for (Column inputColumn : inputSchema.getColumns()) {
59
+ Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
60
+ builder.add(outputColumn);
61
+ }
62
+
63
+ for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
64
+ builder.add(columnConfig.toColumn(i++));
65
+ }
66
+ outputSchema = new Schema(builder.build());
67
+
68
+ control.run(task.dump(), outputSchema);
69
+ }
70
+
71
+ @Override
72
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
73
+ Schema outputSchema, PageOutput output)
74
+ {
75
+ PluginTask task = taskSource.loadTask(PluginTask.class);
76
+ Map<String, List<String>> map = new HashMap<>();
77
+ try {
78
+ try {
79
+ map = getKeyValueMap(task);
80
+ } catch (CsvValidationException e) {
81
+ throw new RuntimeException(e);
82
+ }
83
+ } catch (SQLException e) {
84
+ e.printStackTrace();
85
+ } catch (IOException e) {
86
+ throw new RuntimeException(e);
87
+ }
88
+ PageReader pageReader = new PageReader(inputSchema);
89
+ return new MyOutput(pageReader, inputSchema, outputSchema, output, task, map);
90
+ }
91
+ private Map<String, List<String>> getKeyValueMap(PluginTask task) throws SQLException, IOException, CsvValidationException, CsvValidationException {
92
+ Map<String, List<String>> map = new LinkedHashMap<>();
93
+
94
+ List<String> targetColumns = task.getMappingTo();
95
+ List<String> newColumns = new ArrayList<>();
96
+
97
+ for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
98
+ newColumns.add(columnConfig.getName());
99
+ }
100
+ String file = "/home/infoobjects/Downloads/sample/countryKey_countryName.csv";
101
+ BufferedReader reader = null;
102
+ String line = "";
103
+ reader = new BufferedReader(new FileReader(file));
104
+ String[] lineDataArray;
105
+ Map<String, Integer> map1 = new LinkedHashMap<>();
106
+ List<Integer> list1 = new ArrayList<>();
107
+ List<Integer> list2 = new ArrayList<>();
108
+
109
+ while((line = reader.readLine()) != null) {
110
+ lineDataArray = line.split(",");
111
+ for (int s = 0; s< lineDataArray.length; s++) {
112
+ map1.put(lineDataArray[s], s);
113
+ }
114
+ break;
115
+ }
116
+
117
+ for (int x = 0; x< targetColumns.size(); x++){
118
+ if (!map1.containsKey(targetColumns.get(x))){
119
+ throw new RuntimeException("Target Columns Not Found!!");
120
+ }
121
+ list1.add(map1.get(targetColumns.get(x)));
122
+ }
123
+
124
+ for (int x = 0; x< newColumns.size(); x++){
125
+ if (!map1.containsKey(newColumns.get(x))){
126
+ throw new RuntimeException("New Columns field Not Found!!");
127
+ }
128
+ list2.add(map1.get(newColumns.get(x)));
129
+ }
130
+
131
+ CSVReader reader1 = new CSVReader(new FileReader(file));
132
+ String [] nextLine;
133
+ int i = 0;
134
+ while ((nextLine = reader1.readNext()) != null) {
135
+ if (i!=0){
136
+
137
+ //for Key
138
+ String key = "";
139
+ for (int z = 0; z< list1.size(); z++) {
140
+ key += nextLine[list1.get(z)];
141
+ if (z != list1.size() - 1) {
142
+ key += ",";
143
+ }
144
+ }
145
+
146
+ //for Values
147
+ List<String> keyArray = new ArrayList<>();
148
+ for (int z = 0; z < newColumns.size(); z++) {
149
+ keyArray.add(nextLine[list2.get(z)]);
150
+ }
151
+ map.put(key, keyArray);
152
+ }i++;
153
+ }
154
+ return map;
155
+ }
156
+
157
+ public static class MyOutput implements PageOutput {
158
+ private PageReader reader;
159
+ private PageBuilder builder;
160
+ private PluginTask task;
161
+ private Schema inputSchema;
162
+ private Map<String, List<String>> keyValuePair;
163
+
164
+ public MyOutput(PageReader pageReader, Schema inputSchema, Schema outputSchema, PageOutput pageOutput, PluginTask task, Map<String, List<String>> keyValuePair) {
165
+ this.reader = pageReader;
166
+ this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput);
167
+ this.task = task;
168
+ this.inputSchema = inputSchema;
169
+ this.keyValuePair = keyValuePair;
170
+ }
171
+
172
+ @Override
173
+ public void add(Page page) {
174
+ reader.setPage(page);
175
+ List<ColumnConfig> columnConfigList = new ArrayList<>();
176
+ for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
177
+ columnConfigList.add(columnConfig);
178
+ }
179
+
180
+ while (reader.nextRecord()) {
181
+
182
+ int colNum = 0;
183
+ List<String> inputColumns = task.getMappingFrom();
184
+ List<String> searchingKeyData = new ArrayList<>();
185
+ Map<String, Integer> keyMap = new HashMap<>();
186
+ keyMap.put("Key", 0);
187
+
188
+ for (Column column : inputSchema.getColumns()) {
189
+ if (reader.isNull(column)) {
190
+ if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
191
+ searchingKeyData.add("");
192
+ int key = keyMap.get("Key");
193
+ keyMap.put("Key", ++key);
194
+ }
195
+ builder.setNull(colNum++);
196
+ } else {
197
+ add_builder(colNum++, column, searchingKeyData, inputColumns, keyMap);
198
+ }
199
+ }
200
+
201
+ String key = "";
202
+ for (int k = 0; k < searchingKeyData.size(); k++) {
203
+ key += searchingKeyData.get(k);
204
+ if (k != searchingKeyData.size() - 1) {
205
+ key += ",";
206
+ }
207
+ }
208
+
209
+ List<String> matchedData = new ArrayList<>();
210
+ if (keyValuePair.containsKey(key)) {
211
+ matchedData = keyValuePair.get(key);
212
+ }
213
+
214
+ if (matchedData.size() == 0) {
215
+ for (int k = 0; k < columnConfigList.size(); k++) {
216
+ add_builder_for_new_column(colNum, columnConfigList.get(k).getType().getName(), "", false);
217
+ colNum++;
218
+ }
219
+ } else {
220
+ for (int k = 0; k < columnConfigList.size(); k++) {
221
+ add_builder_for_new_column(colNum, columnConfigList.get(k).getType().getName(), matchedData.get(k), true);
222
+ colNum++;
223
+ }
224
+ }
225
+ builder.addRecord();
226
+ }
227
+
228
+ }
229
+
230
+ @Override
231
+ public void finish() {
232
+ builder.finish();
233
+ }
234
+
235
+ @Override
236
+ public void close() {
237
+ builder.close();
238
+ }
239
+
240
+ private void add_builder(int colNum, Column column, List<String> searchingKeyData, List<String> inputColumns, Map<String, Integer> keyMap) {
241
+ if (Types.STRING.equals(column.getType())) {
242
+ if (keyMap.get("Key") < inputColumns.size()) {
243
+ if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
244
+ searchingKeyData.add(reader.getString(column));
245
+ int key = keyMap.get("Key");
246
+ keyMap.put("Key", ++key);
247
+ }
248
+ }
249
+ builder.setString(colNum, reader.getString(column));
250
+ } else if (Types.BOOLEAN.equals(column.getType())) {
251
+ if (keyMap.get("Key") < inputColumns.size()) {
252
+ if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
253
+ searchingKeyData.add(String.valueOf(reader.getBoolean(column)));
254
+ int key = keyMap.get("Key");
255
+ keyMap.put("Key", ++key);
256
+ }
257
+ }
258
+ builder.setBoolean(colNum, reader.getBoolean(column));
259
+ } else if (Types.DOUBLE.equals(column.getType())) {
260
+ if (keyMap.get("Key") < inputColumns.size()) {
261
+ if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
262
+ searchingKeyData.add(String.valueOf(reader.getDouble(column)));
263
+ int key = keyMap.get("Key");
264
+ keyMap.put("Key", ++key);
265
+ }
266
+ }
267
+ builder.setDouble(colNum, reader.getDouble(column));
268
+ } else if (Types.LONG.equals(column.getType())) {
269
+ if (keyMap.get("Key") < inputColumns.size()) {
270
+ if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
271
+ searchingKeyData.add(String.valueOf(reader.getLong(column)));
272
+ int key = keyMap.get("Key");
273
+ keyMap.put("Key", ++key);
274
+ }
275
+ }
276
+
277
+ builder.setLong(colNum, reader.getLong(column));
278
+ } else if (Types.TIMESTAMP.equals(column.getType())) {
279
+ if (keyMap.get("Key") < inputColumns.size()) {
280
+ if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
281
+ searchingKeyData.add(String.valueOf(reader.getTimestamp(column)));
282
+ int key = keyMap.get("Key");
283
+ keyMap.put("Key", ++key);
284
+ }
285
+ }
286
+ builder.setTimestamp(colNum, reader.getTimestamp(column));
287
+ }
288
+ }
289
+
290
+ private void add_builder_for_new_column(int colNum, String newlyAddedColumnType, String matchedData, Boolean isDataMatched) {
291
+ try{
292
+ if (newlyAddedColumnType.equalsIgnoreCase("string")) {
293
+ if (isDataMatched) {
294
+ builder.setString(colNum, matchedData);
295
+ } else {
296
+ builder.setString(colNum, "");
297
+ }
298
+
299
+ } else if (newlyAddedColumnType.equalsIgnoreCase("long")) {
300
+ if (isDataMatched) {
301
+ if (matchedData.length() == 0) {
302
+ builder.setLong(colNum, 0);
303
+ }else{
304
+ builder.setLong(colNum, Long.parseLong(matchedData));
305
+ }
306
+ } else {
307
+ builder.setLong(colNum, 0);
308
+ }
309
+
310
+ } else if (newlyAddedColumnType.equalsIgnoreCase("double")) {
311
+ if (isDataMatched) {
312
+ if (matchedData.length() == 0) {
313
+ builder.setDouble(colNum, 0.0);
314
+ }else{
315
+ builder.setDouble(colNum, Double.parseDouble(matchedData));
316
+ }
317
+ } else {
318
+ builder.setDouble(colNum, 0.0);
319
+ }
320
+ } else if (newlyAddedColumnType.equalsIgnoreCase("boolean")) {
321
+ if (isDataMatched) {
322
+ if (matchedData.length() == 0) {
323
+ builder.setNull(colNum);
324
+ }else{
325
+ builder.setBoolean(colNum, Boolean.parseBoolean(matchedData));
326
+ }
327
+ } else {
328
+ builder.setNull(colNum);
329
+ }
330
+ } else if (newlyAddedColumnType.equalsIgnoreCase("timestamp")) {
331
+ if (isDataMatched) {
332
+ if (matchedData.length() == 0) {
333
+ builder.setNull(colNum);
334
+ }else{
335
+ java.sql.Timestamp timestamp = java.sql.Timestamp.valueOf(matchedData);
336
+ Instant instant = timestamp.toInstant();
337
+ Timestamp spiTimeStamp = Timestamp.ofInstant(instant);
338
+ builder.setTimestamp(colNum, spiTimeStamp);
339
+ }
340
+ } else {
341
+ builder.setNull(colNum);
342
+ }
343
+
344
+ }
345
+ }catch (Exception e){
346
+ e.printStackTrace();
347
+ throw new RuntimeException("Data type could not be cast due to wrong data or issue in typecasting timestamp",e);
348
+ }
349
+
350
+ }
351
+
352
+ }
353
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.filter.csv_lookup;
2
+
3
+ public class TestCsvLookupFilterPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-csv_lookup
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Infoobjects Inc.
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-09-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '12.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ description: Csv Lookup Filter
42
+ email:
43
+ - utkarsh@infoobjects.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - LICENSE.txt
50
+ - README.md
51
+ - build.gradle
52
+ - classpath/commons-beanutils-1.9.4.jar
53
+ - classpath/commons-collections-3.2.2.jar
54
+ - classpath/commons-collections4-4.4.jar
55
+ - classpath/commons-lang3-3.12.0.jar
56
+ - classpath/commons-logging-1.2.jar
57
+ - classpath/commons-text-1.9.jar
58
+ - classpath/embulk-filter-csv_lookup-0.1.1.jar
59
+ - classpath/opencsv-5.6.jar
60
+ - config/checkstyle/checkstyle.xml
61
+ - config/checkstyle/default.xml
62
+ - example/config.yml
63
+ - gradle/wrapper/gradle-wrapper.jar
64
+ - gradle/wrapper/gradle-wrapper.properties
65
+ - gradlew
66
+ - gradlew.bat
67
+ - lib/embulk/filter/csv_lookup.rb
68
+ - src/main/java/org/embulk/filter/csv_lookup/CsvLookupFilterPlugin.java
69
+ - src/test/java/org/embulk/filter/csv_lookup/TestCsvLookupFilterPlugin.java
70
+ homepage: https://github.com/InfoObjects/embulk-filter-csv_lookup
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubygems_version: 3.2.33
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: An Embulk filter plugin for Lookup Transformation with CSV
93
+ test_files: []