embulk-parser-jdbc-schema-csv 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 775c538cd41b7550efbedec4ec547c21a28808d1
4
+ data.tar.gz: d1d95b87d4be564d71e3dbaf626caab9b80eed77
5
+ SHA512:
6
+ metadata.gz: bfb0d20a9f313288e5a66ac442797c450ee87ddaecabf13e23804677e035ba820b8c1f1c9c93bf6240dc9150ede9b78fc8a18a7f14493c0b2a8e8fff822f242f
7
+ data.tar.gz: 2b14eb314f62912fc7d97535f0634748250b88b7bd412f82f8b85e48cd8492fccf4b468960325751a4ae354b62ce375a5258b98f4fb178f2e1d36da264dd563c
@@ -0,0 +1,39 @@
1
+ # Embulk CSV parser plugin using JDBC to define schema
2
+
3
+ This Embulk plugin extends CSV parser to define columns based on database meta data.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+
9
+ ## Configuration
10
+
11
+ - **schema** database table to define columns. Same as embulk-input-plugin.
12
+
13
+ Others are same as CSV parser plugin except columns (columns are ignored).
14
+
15
+ ### Example
16
+
17
+ ```yaml
18
+ in:
19
+ type: file
20
+ path_prefix: 'data/test.csv'
21
+ parser:
22
+ type: jdbc-schema-csv
23
+ delimiter: ','
24
+ header_line: false
25
+ schema: &OUT
26
+ host: localhost
27
+ user: myuser
28
+ password: ""
29
+ database: my_database
30
+ table: my_table
31
+ mode: insert
32
+ out: *OUT
33
+ ```
34
+
35
+ ### Build
36
+
37
+ ```
38
+ $ ./gradle gem
39
+ ```
@@ -0,0 +1,71 @@
1
+ plugins {
2
+ id 'com.jfrog.bintray' version '1.1'
3
+ id 'com.github.jruby-gradle.base' version '0.1.5'
4
+ id 'java'
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+
8
+ apply plugin: 'java'
9
+ apply plugin: 'com.github.jruby-gradle.base'
10
+
11
+ [compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
12
+
13
+ project.version = '0.0.1'
14
+
15
+ repositories {
16
+ mavenCentral()
17
+ jcenter()
18
+ flatDir {
19
+ dirs 'depends'
20
+ }
21
+ }
22
+
23
+ configurations {
24
+ provided
25
+ }
26
+
27
+ dependencies {
28
+ compile 'org.embulk:embulk-core:0.6.5'
29
+ compile 'org.embulk:embulk-standards:0.6.5'
30
+ provided 'org.embulk:embulk-core:0.6.5'
31
+ provided 'org.embulk:embulk-standards:0.6.5'
32
+ testCompile 'junit:junit:4.+'
33
+ testCompile name:'embulk-input-jdbc-0.4.0'
34
+ testCompile name:'embulk-input-mysql-0.4.0'
35
+ testCompile name:'mysql-connector-java-5.1.34'
36
+ }
37
+
38
+ task classpath(type: Copy, dependsOn: ["jar"]) {
39
+ doFirst { file('classpath').deleteDir() }
40
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
41
+ into 'classpath'
42
+ }
43
+ clean { delete 'classpath' }
44
+
45
+ task gem(type: JRubyExec, dependsOn: ['build', 'gemspec', 'classpath']) {
46
+ jrubyArgs '-rrubygems/gem_runner', "-eGem::GemRunner.new.run(ARGV)", 'build'
47
+ script "build/gemspec"
48
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
49
+ }
50
+
51
+ task gemspec << {
52
+ file('build').mkdirs();
53
+ file('build/gemspec').write($/
54
+ Gem::Specification.new do |spec|
55
+ spec.name = "${project.name}"
56
+ spec.version = "${project.version}"
57
+ spec.authors = ["Hitoshi Tanaka"]
58
+ spec.homepage = "https://github.com/hito4t/embulk-parser-jdbc-schema-csv"
59
+ spec.summary = "This Embulk plugin extends CSV parser to define columns based on database meta data."
60
+ spec.licenses = ["Apache 2.0"]
61
+ spec.files = `git ls-files`.split("\n").grep(%r"^(?!\.)").grep(%r"^(?!depends/)") + Dir["classpath/*.jar"]
62
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
63
+ spec.require_paths = ["lib"]
64
+ end
65
+ /$)
66
+ }
67
+
68
+
69
+ task gempush << {
70
+ "gem push pkg/embulk-parser-jdbc-schema-csv-${project.version}.gem".execute().waitFor()
71
+ }
@@ -0,0 +1,7 @@
1
+ <project name="embulk-parser-jdbc-schema-csv" default="jar" basedir="./">
2
+ <target name="jar" depends="" description="embulk-parser-jdbc-schema-csv.jarファイルを作成します。">
3
+ <mkdir dir="${basedir}/jar" />
4
+ <jar destfile="${basedir}/jar/embulk-parser-jdbc-schema-csv-x.jar" basedir="${basedir}/out/main">
5
+ </jar>
6
+ </target>
7
+ </project>
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_parser(
2
+ "jdbc-schema-csv", "org.embulk.parser.JdbcSchemaCsvParser",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,54 @@
1
+ package org.embulk.parser;
2
+
3
+ import java.lang.reflect.Method;
4
+
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.spi.InputPlugin;
7
+ import org.embulk.spi.Schema;
8
+
9
+ public class JdbcInputPluginHelper
10
+ {
11
+ private final InputPlugin plugin;
12
+ private final Class<?> abstractJdbcInputPluginClass;
13
+
14
+ public JdbcInputPluginHelper(InputPlugin plugin) throws ClassNotFoundException
15
+ {
16
+ this.plugin = plugin;
17
+ abstractJdbcInputPluginClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.AbstractJdbcInputPlugin");
18
+ }
19
+
20
+ public Schema getSchema(ConfigSource config) throws Exception
21
+ {
22
+ Class<?> taskClass = getTaskClass();
23
+ Object task = config.loadConfig(taskClass);
24
+
25
+ try (AutoCloseable connection = newConnection(task)) {
26
+ return setupTask(connection, task);
27
+ }
28
+ }
29
+
30
+ private Class<?> getTaskClass() throws Exception
31
+ {
32
+ Method method = abstractJdbcInputPluginClass.getDeclaredMethod("getTaskClass");
33
+ method.setAccessible(true);
34
+ return (Class<?>)method.invoke(plugin);
35
+ }
36
+
37
+ private AutoCloseable newConnection(Object task) throws Exception
38
+ {
39
+ Class<?> taskClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.AbstractJdbcInputPlugin$PluginTask");
40
+ Method method = abstractJdbcInputPluginClass.getDeclaredMethod("newConnection", taskClass);
41
+ method.setAccessible(true);
42
+ return (AutoCloseable)method.invoke(plugin, task);
43
+ }
44
+
45
+ public Schema setupTask(Object connection, Object task) throws Exception
46
+ {
47
+ Class<?> connectionClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.JdbcInputConnection");
48
+ Class<?> taskClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.AbstractJdbcInputPlugin$PluginTask");
49
+ Method method = abstractJdbcInputPluginClass.getDeclaredMethod("setupTask", connectionClass, taskClass);
50
+ method.setAccessible(true);
51
+ return (Schema)method.invoke(plugin, connection, task);
52
+ }
53
+
54
+ }
@@ -0,0 +1,59 @@
1
+ package org.embulk.parser;
2
+
3
+
4
+ import java.util.List;
5
+
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.TaskSource;
8
+ import org.embulk.plugin.PluginType;
9
+ import org.embulk.spi.Column;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.ExecSession;
12
+ import org.embulk.spi.InputPlugin;
13
+ import org.embulk.spi.Schema;
14
+ import org.embulk.spi.SchemaConfig;
15
+ import org.embulk.standards.CsvParserPlugin;
16
+ import org.slf4j.Logger;
17
+
18
+ public class JdbcSchemaCsvParser extends CsvParserPlugin
19
+ {
20
+ private final Logger logger = Exec.getLogger(JdbcSchemaCsvParser.class);
21
+
22
+ public static interface PluginTask extends CsvParserPlugin.PluginTask
23
+ {
24
+ @Override
25
+ public SchemaConfig getSchemaConfig();
26
+
27
+ }
28
+
29
+ @Override
30
+ public void transaction(ConfigSource config, final Control control)
31
+ {
32
+ // to avoid config error
33
+ config.set("columns", java.util.Collections.emptyList());
34
+ ExecSession session = Exec.session();
35
+ ConfigSource child = config.getNested("schema");
36
+ String type = child.get(String.class, "type");
37
+ InputPlugin input = session.newPlugin(InputPlugin.class, new PluginType(type));
38
+
39
+ try {
40
+ JdbcInputPluginHelper helper = new JdbcInputPluginHelper(input);
41
+ final Schema schemaFromJdbc = helper.getSchema(child);
42
+ for (int i = 0; i < schemaFromJdbc.getColumnCount(); i++) {
43
+ Column column = schemaFromJdbc.getColumn(i);
44
+ logger.info(String.format("column %d : name = %s, type = %s", i + 1, column.getName(), column.getType()));
45
+ }
46
+
47
+ super.transaction(config, new Control() {
48
+ @Override
49
+ public void run(TaskSource taskSource, Schema schema) {
50
+ control.run(taskSource, schemaFromJdbc);
51
+ }
52
+ });
53
+
54
+ } catch (Exception e) {
55
+ throw new RuntimeException(e);
56
+ }
57
+ }
58
+
59
+ }
@@ -0,0 +1,62 @@
1
+ package org.embulk.parser;
2
+
3
+ import static org.junit.Assert.assertEquals;
4
+
5
+ import java.nio.charset.Charset;
6
+ import java.nio.file.FileSystem;
7
+ import java.nio.file.FileSystems;
8
+ import java.nio.file.Files;
9
+ import java.util.List;
10
+
11
+ import org.embulk.input.MySQLInputPlugin;
12
+ import org.embulk.spi.InputPlugin;
13
+ import org.embulk.spi.ParserPlugin;
14
+ import org.embulk.test.EmbulkPluginTester;
15
+ import org.embulk.test.TestExtension;
16
+ import org.junit.Test;
17
+
18
+ public class JdbcSchemaCsvParserTest
19
+ {
20
+
21
+ /*
22
+ Prepare MySQL table:
23
+
24
+ create table embulk_test;
25
+ grant all on embulk_test.* to embulk_user@"%" identified by 'embulk_pass';
26
+
27
+ create table embulk_test.input_test (
28
+ id bigint,
29
+ name char(8),
30
+ value double,
31
+ creation timestamp
32
+ );
33
+ */
34
+
35
+ private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "jdbc-schema-csv", JdbcSchemaCsvParser.class);
36
+ static {
37
+ TestExtension.addPlugin(InputPlugin.class, "mysql", MySQLInputPlugin.class);
38
+ }
39
+
40
+ @Test
41
+ public void testCsv() throws Exception
42
+ {
43
+ test("yml/csv.yml");
44
+ }
45
+
46
+ @Test
47
+ public void testJdbcSchemaCsv() throws Exception
48
+ {
49
+ test("yml/jdbc-csv.yml");
50
+ }
51
+
52
+ private void test(String yml) throws Exception
53
+ {
54
+ tester.run(yml);
55
+
56
+ FileSystem fs = FileSystems.getDefault();
57
+ List<String> lines = Files.readAllLines(fs.getPath("result.000.00.tsv"), Charset.forName("UTF-8"));
58
+ assertEquals(2, lines.size());
59
+ assertEquals("1\ttest\t123.4\t2015-04-27 11:23:45", lines.get(0));
60
+ assertEquals("2\tsample\t-1.0\t2015-12-31 23:59:59", lines.get(1));
61
+ }
62
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.test;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.BufferedWriter;
5
+ import java.io.File;
6
+ import java.io.FileReader;
7
+ import java.io.FileWriter;
8
+ import java.util.regex.Matcher;
9
+ import java.util.regex.Pattern;
10
+
11
+ import org.embulk.command.Runner;
12
+
13
+ public class EmbulkPluginTester {
14
+
15
+ public EmbulkPluginTester(Class<?> iface, String name, Class<?> impl)
16
+ {
17
+ TestExtension.addPlugin(iface, name, impl);
18
+ }
19
+
20
+ public void run(String ymlPath) throws Exception
21
+ {
22
+ Runner runner = new Runner("{}");
23
+ runner.run(convert(ymlPath));
24
+ }
25
+
26
+ private String convert(String yml) throws Exception
27
+ {
28
+ File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
29
+ File ymlPath = new File(EmbulkPluginTester.class.getResource("/" + yml).toURI());
30
+ File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
31
+ Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
32
+ try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
33
+ try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
34
+ String line;
35
+ while ((line = reader.readLine()) != null) {
36
+ Matcher matcher = pathPrefixPattern.matcher(line);
37
+ if (matcher.matches()) {
38
+ int group = 2;
39
+ writer.write(line.substring(0, matcher.start(group)));
40
+ writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
41
+ writer.write(line.substring(matcher.end(group)));
42
+ } else {
43
+ writer.write(line);
44
+ }
45
+ writer.newLine();
46
+ }
47
+ }
48
+ }
49
+ return tempYmlPath.getAbsolutePath();
50
+ }
51
+
52
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.test;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.plugin.InjectedPluginSource;
8
+ import org.embulk.spi.Extension;
9
+
10
+ import com.google.common.collect.ImmutableList;
11
+ import com.google.inject.Binder;
12
+ import com.google.inject.Module;
13
+
14
+
15
+ public class TestExtension implements Extension
16
+ {
17
+ private static class PluginDefinition
18
+ {
19
+ public final Class<?> iface;
20
+ public final String name;
21
+ public final Class<?> impl;
22
+
23
+ public PluginDefinition(Class<?> iface, String name, Class<?> impl)
24
+ {
25
+ this.iface = iface;
26
+ this.name = name;
27
+ this.impl = impl;
28
+ }
29
+ }
30
+
31
+ private static List<PluginDefinition> plugins = new ArrayList<PluginDefinition>();
32
+
33
+ public static void addPlugin(Class<?> iface, String name, Class<?> impl)
34
+ {
35
+ plugins.add(new PluginDefinition(iface, name, impl));
36
+ }
37
+
38
+ @Override
39
+ public List<Module> getModules(ConfigSource configsource) {
40
+ Module module = new Module() {
41
+
42
+ @Override
43
+ public void configure(Binder binder) {
44
+ for (PluginDefinition plugin : plugins) {
45
+ InjectedPluginSource.registerPluginTo(binder, plugin.iface, plugin.name, plugin.impl);
46
+ }
47
+ }
48
+ };
49
+ return ImmutableList.of(module);
50
+ }
51
+
52
+ }
@@ -0,0 +1 @@
1
+ org.embulk.test.TestExtension
@@ -0,0 +1,2 @@
1
+ 1,test,123.4,2015-4-27 11:23:45
2
+ 2,sample,-1,2015-12-31 23:59:59
File without changes
@@ -0,0 +1,20 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: csv
6
+ delimiter: ","
7
+ header_line: false
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: name, type: string}
11
+ - {name: value, type: double}
12
+ - {name: creation, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
13
+ out:
14
+ type: file
15
+ path_prefix: result
16
+ file_ext: .tsv
17
+ formatter:
18
+ type: csv
19
+ delimiter: "\t"
20
+ header_line: false
@@ -0,0 +1,16 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: jdbc-schema-csv
6
+ delimiter: ","
7
+ header_line: false
8
+ schema: &OUT
9
+ type: mysql
10
+ host: localhost
11
+ database: embulk_test
12
+ user: embulk_user
13
+ password: embulk_pass
14
+ table: input_test
15
+ mode: insert
16
+ out: *OUT
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: jdbc-schema-csv
6
+ delimiter: ","
7
+ header_line: false
8
+ schema:
9
+ type: mysql
10
+ host: localhost
11
+ database: embulk_test
12
+ user: embulk_user
13
+ password: embulk_pass
14
+ table: input_test
15
+ out:
16
+ type: mysql
17
+ host: localhost
18
+ database: embulk_test
19
+ user: embulk_user
20
+ password: embulk_pass
21
+ table: input_test
22
+ mode: insert
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: jdbc-schema-csv
6
+ delimiter: ","
7
+ header_line: false
8
+ schema:
9
+ type: mysql
10
+ host: localhost
11
+ database: embulk_test
12
+ user: embulk_user
13
+ password: embulk_pass
14
+ table: input_test
15
+ out:
16
+ type: file
17
+ path_prefix: result
18
+ file_ext: .tsv
19
+ formatter:
20
+ type: csv
21
+ delimiter: "\t"
22
+ header_line: false
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-jdbc-schema-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Hitoshi Tanaka
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-27 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README.md
20
+ - build.gradle
21
+ - build.xml
22
+ - lib/embulk/parser/jdbc-schema-csv.rb
23
+ - src/main/java/org/embulk/parser/JdbcInputPluginHelper.java
24
+ - src/main/java/org/embulk/parser/JdbcSchemaCsvParser.java
25
+ - src/test/java/org/embulk/parser/JdbcSchemaCsvParserTest.java
26
+ - src/test/java/org/embulk/test/EmbulkPluginTester.java
27
+ - src/test/java/org/embulk/test/TestExtension.java
28
+ - src/test/resources/META-INF/services/org.embulk.spi.Extension
29
+ - src/test/resources/data/test.csv
30
+ - src/test/resources/resource.txt
31
+ - src/test/resources/yml/csv.yml
32
+ - src/test/resources/yml/jdbc-csv-jdbc-alias.yml
33
+ - src/test/resources/yml/jdbc-csv-jdbc.yml
34
+ - src/test/resources/yml/jdbc-csv.yml
35
+ - classpath/embulk-parser-jdbc-schema-csv-0.0.1.jar
36
+ homepage: https://github.com/hito4t/embulk-parser-jdbc-schema-csv
37
+ licenses:
38
+ - Apache 2.0
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 2.1.9
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: This Embulk plugin extends CSV parser to define columns based on database meta data.
60
+ test_files: []