embulk-parser-jdbc-schema-csv 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 775c538cd41b7550efbedec4ec547c21a28808d1
4
+ data.tar.gz: d1d95b87d4be564d71e3dbaf626caab9b80eed77
5
+ SHA512:
6
+ metadata.gz: bfb0d20a9f313288e5a66ac442797c450ee87ddaecabf13e23804677e035ba820b8c1f1c9c93bf6240dc9150ede9b78fc8a18a7f14493c0b2a8e8fff822f242f
7
+ data.tar.gz: 2b14eb314f62912fc7d97535f0634748250b88b7bd412f82f8b85e48cd8492fccf4b468960325751a4ae354b62ce375a5258b98f4fb178f2e1d36da264dd563c
@@ -0,0 +1,39 @@
1
+ # Embulk CSV parser plugin using JDBC to define schema
2
+
3
+ This Embulk plugin extends CSV parser to define columns based on database meta data.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+
9
+ ## Configuration
10
+
11
+ - **schema** database table to define columns. Same as embulk-input-plugin.
12
+
13
+ Others are same as CSV parser plugin except columns (columns are ignored).
14
+
15
+ ### Example
16
+
17
+ ```yaml
18
+ in:
19
+ type: file
20
+ path_prefix: 'data/test.csv'
21
+ parser:
22
+ type: jdbc-schema-csv
23
+ delimiter: ','
24
+ header_line: false
25
+ schema: &OUT
26
+ host: localhost
27
+ user: myuser
28
+ password: ""
29
+ database: my_database
30
+ table: my_table
31
+ mode: insert
32
+ out: *OUT
33
+ ```
34
+
35
+ ### Build
36
+
37
+ ```
38
+ $ ./gradle gem
39
+ ```
@@ -0,0 +1,71 @@
1
+ plugins {
2
+ id 'com.jfrog.bintray' version '1.1'
3
+ id 'com.github.jruby-gradle.base' version '0.1.5'
4
+ id 'java'
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+
8
+ apply plugin: 'java'
9
+ apply plugin: 'com.github.jruby-gradle.base'
10
+
11
+ [compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
12
+
13
+ project.version = '0.0.1'
14
+
15
+ repositories {
16
+ mavenCentral()
17
+ jcenter()
18
+ flatDir {
19
+ dirs 'depends'
20
+ }
21
+ }
22
+
23
+ configurations {
24
+ provided
25
+ }
26
+
27
+ dependencies {
28
+ compile 'org.embulk:embulk-core:0.6.5'
29
+ compile 'org.embulk:embulk-standards:0.6.5'
30
+ provided 'org.embulk:embulk-core:0.6.5'
31
+ provided 'org.embulk:embulk-standards:0.6.5'
32
+ testCompile 'junit:junit:4.+'
33
+ testCompile name:'embulk-input-jdbc-0.4.0'
34
+ testCompile name:'embulk-input-mysql-0.4.0'
35
+ testCompile name:'mysql-connector-java-5.1.34'
36
+ }
37
+
38
+ task classpath(type: Copy, dependsOn: ["jar"]) {
39
+ doFirst { file('classpath').deleteDir() }
40
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
41
+ into 'classpath'
42
+ }
43
+ clean { delete 'classpath' }
44
+
45
+ task gem(type: JRubyExec, dependsOn: ['build', 'gemspec', 'classpath']) {
46
+ jrubyArgs '-rrubygems/gem_runner', "-eGem::GemRunner.new.run(ARGV)", 'build'
47
+ script "build/gemspec"
48
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
49
+ }
50
+
51
+ task gemspec << {
52
+ file('build').mkdirs();
53
+ file('build/gemspec').write($/
54
+ Gem::Specification.new do |spec|
55
+ spec.name = "${project.name}"
56
+ spec.version = "${project.version}"
57
+ spec.authors = ["Hitoshi Tanaka"]
58
+ spec.homepage = "https://github.com/hito4t/embulk-parser-jdbc-schema-csv"
59
+ spec.summary = "This Embulk plugin extends CSV parser to define columns based on database meta data."
60
+ spec.licenses = ["Apache 2.0"]
61
+ spec.files = `git ls-files`.split("\n").grep(%r"^(?!\.)").grep(%r"^(?!depends/)") + Dir["classpath/*.jar"]
62
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
63
+ spec.require_paths = ["lib"]
64
+ end
65
+ /$)
66
+ }
67
+
68
+
69
+ task gempush << {
70
+ "gem push pkg/embulk-parser-jdbc-schema-csv-${project.version}.gem".execute().waitFor()
71
+ }
@@ -0,0 +1,7 @@
1
+ <project name="embulk-parser-jdbc-schema-csv" default="jar" basedir="./">
2
+ <target name="jar" depends="" description="embulk-parser-jdbc-schema-csv.jarファイルを作成します。">
3
+ <mkdir dir="${basedir}/jar" />
4
+ <jar destfile="${basedir}/jar/embulk-parser-jdbc-schema-csv-x.jar" basedir="${basedir}/out/main">
5
+ </jar>
6
+ </target>
7
+ </project>
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_parser(
2
+ "jdbc-schema-csv", "org.embulk.parser.JdbcSchemaCsvParser",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,54 @@
1
+ package org.embulk.parser;
2
+
3
+ import java.lang.reflect.Method;
4
+
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.spi.InputPlugin;
7
+ import org.embulk.spi.Schema;
8
+
9
+ public class JdbcInputPluginHelper
10
+ {
11
+ private final InputPlugin plugin;
12
+ private final Class<?> abstractJdbcInputPluginClass;
13
+
14
+ public JdbcInputPluginHelper(InputPlugin plugin) throws ClassNotFoundException
15
+ {
16
+ this.plugin = plugin;
17
+ abstractJdbcInputPluginClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.AbstractJdbcInputPlugin");
18
+ }
19
+
20
+ public Schema getSchema(ConfigSource config) throws Exception
21
+ {
22
+ Class<?> taskClass = getTaskClass();
23
+ Object task = config.loadConfig(taskClass);
24
+
25
+ try (AutoCloseable connection = newConnection(task)) {
26
+ return setupTask(connection, task);
27
+ }
28
+ }
29
+
30
+ private Class<?> getTaskClass() throws Exception
31
+ {
32
+ Method method = abstractJdbcInputPluginClass.getDeclaredMethod("getTaskClass");
33
+ method.setAccessible(true);
34
+ return (Class<?>)method.invoke(plugin);
35
+ }
36
+
37
+ private AutoCloseable newConnection(Object task) throws Exception
38
+ {
39
+ Class<?> taskClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.AbstractJdbcInputPlugin$PluginTask");
40
+ Method method = abstractJdbcInputPluginClass.getDeclaredMethod("newConnection", taskClass);
41
+ method.setAccessible(true);
42
+ return (AutoCloseable)method.invoke(plugin, task);
43
+ }
44
+
45
+ public Schema setupTask(Object connection, Object task) throws Exception
46
+ {
47
+ Class<?> connectionClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.JdbcInputConnection");
48
+ Class<?> taskClass = plugin.getClass().getClassLoader().loadClass("org.embulk.input.jdbc.AbstractJdbcInputPlugin$PluginTask");
49
+ Method method = abstractJdbcInputPluginClass.getDeclaredMethod("setupTask", connectionClass, taskClass);
50
+ method.setAccessible(true);
51
+ return (Schema)method.invoke(plugin, connection, task);
52
+ }
53
+
54
+ }
@@ -0,0 +1,59 @@
1
+ package org.embulk.parser;
2
+
3
+
4
+ import java.util.List;
5
+
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.TaskSource;
8
+ import org.embulk.plugin.PluginType;
9
+ import org.embulk.spi.Column;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.ExecSession;
12
+ import org.embulk.spi.InputPlugin;
13
+ import org.embulk.spi.Schema;
14
+ import org.embulk.spi.SchemaConfig;
15
+ import org.embulk.standards.CsvParserPlugin;
16
+ import org.slf4j.Logger;
17
+
18
+ public class JdbcSchemaCsvParser extends CsvParserPlugin
19
+ {
20
+ private final Logger logger = Exec.getLogger(JdbcSchemaCsvParser.class);
21
+
22
+ public static interface PluginTask extends CsvParserPlugin.PluginTask
23
+ {
24
+ @Override
25
+ public SchemaConfig getSchemaConfig();
26
+
27
+ }
28
+
29
+ @Override
30
+ public void transaction(ConfigSource config, final Control control)
31
+ {
32
+ // to avoid config error
33
+ config.set("columns", java.util.Collections.emptyList());
34
+ ExecSession session = Exec.session();
35
+ ConfigSource child = config.getNested("schema");
36
+ String type = child.get(String.class, "type");
37
+ InputPlugin input = session.newPlugin(InputPlugin.class, new PluginType(type));
38
+
39
+ try {
40
+ JdbcInputPluginHelper helper = new JdbcInputPluginHelper(input);
41
+ final Schema schemaFromJdbc = helper.getSchema(child);
42
+ for (int i = 0; i < schemaFromJdbc.getColumnCount(); i++) {
43
+ Column column = schemaFromJdbc.getColumn(i);
44
+ logger.info(String.format("column %d : name = %s, type = %s", i + 1, column.getName(), column.getType()));
45
+ }
46
+
47
+ super.transaction(config, new Control() {
48
+ @Override
49
+ public void run(TaskSource taskSource, Schema schema) {
50
+ control.run(taskSource, schemaFromJdbc);
51
+ }
52
+ });
53
+
54
+ } catch (Exception e) {
55
+ throw new RuntimeException(e);
56
+ }
57
+ }
58
+
59
+ }
@@ -0,0 +1,62 @@
1
+ package org.embulk.parser;
2
+
3
+ import static org.junit.Assert.assertEquals;
4
+
5
+ import java.nio.charset.Charset;
6
+ import java.nio.file.FileSystem;
7
+ import java.nio.file.FileSystems;
8
+ import java.nio.file.Files;
9
+ import java.util.List;
10
+
11
+ import org.embulk.input.MySQLInputPlugin;
12
+ import org.embulk.spi.InputPlugin;
13
+ import org.embulk.spi.ParserPlugin;
14
+ import org.embulk.test.EmbulkPluginTester;
15
+ import org.embulk.test.TestExtension;
16
+ import org.junit.Test;
17
+
18
+ public class JdbcSchemaCsvParserTest
19
+ {
20
+
21
+ /*
22
+ Prepare MySQL table:
23
+
24
+ create table embulk_test;
25
+ grant all on embulk_test.* to embulk_user@"%" identified by 'embulk_pass';
26
+
27
+ create table embulk_test.input_test (
28
+ id bigint,
29
+ name char(8),
30
+ value double,
31
+ creation timestamp
32
+ );
33
+ */
34
+
35
+ private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "jdbc-schema-csv", JdbcSchemaCsvParser.class);
36
+ static {
37
+ TestExtension.addPlugin(InputPlugin.class, "mysql", MySQLInputPlugin.class);
38
+ }
39
+
40
+ @Test
41
+ public void testCsv() throws Exception
42
+ {
43
+ test("yml/csv.yml");
44
+ }
45
+
46
+ @Test
47
+ public void testJdbcSchemaCsv() throws Exception
48
+ {
49
+ test("yml/jdbc-csv.yml");
50
+ }
51
+
52
+ private void test(String yml) throws Exception
53
+ {
54
+ tester.run(yml);
55
+
56
+ FileSystem fs = FileSystems.getDefault();
57
+ List<String> lines = Files.readAllLines(fs.getPath("result.000.00.tsv"), Charset.forName("UTF-8"));
58
+ assertEquals(2, lines.size());
59
+ assertEquals("1\ttest\t123.4\t2015-04-27 11:23:45", lines.get(0));
60
+ assertEquals("2\tsample\t-1.0\t2015-12-31 23:59:59", lines.get(1));
61
+ }
62
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.test;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.BufferedWriter;
5
+ import java.io.File;
6
+ import java.io.FileReader;
7
+ import java.io.FileWriter;
8
+ import java.util.regex.Matcher;
9
+ import java.util.regex.Pattern;
10
+
11
+ import org.embulk.command.Runner;
12
+
13
+ public class EmbulkPluginTester {
14
+
15
+ public EmbulkPluginTester(Class<?> iface, String name, Class<?> impl)
16
+ {
17
+ TestExtension.addPlugin(iface, name, impl);
18
+ }
19
+
20
+ public void run(String ymlPath) throws Exception
21
+ {
22
+ Runner runner = new Runner("{}");
23
+ runner.run(convert(ymlPath));
24
+ }
25
+
26
+ private String convert(String yml) throws Exception
27
+ {
28
+ File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
29
+ File ymlPath = new File(EmbulkPluginTester.class.getResource("/" + yml).toURI());
30
+ File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
31
+ Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
32
+ try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
33
+ try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
34
+ String line;
35
+ while ((line = reader.readLine()) != null) {
36
+ Matcher matcher = pathPrefixPattern.matcher(line);
37
+ if (matcher.matches()) {
38
+ int group = 2;
39
+ writer.write(line.substring(0, matcher.start(group)));
40
+ writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
41
+ writer.write(line.substring(matcher.end(group)));
42
+ } else {
43
+ writer.write(line);
44
+ }
45
+ writer.newLine();
46
+ }
47
+ }
48
+ }
49
+ return tempYmlPath.getAbsolutePath();
50
+ }
51
+
52
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.test;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.plugin.InjectedPluginSource;
8
+ import org.embulk.spi.Extension;
9
+
10
+ import com.google.common.collect.ImmutableList;
11
+ import com.google.inject.Binder;
12
+ import com.google.inject.Module;
13
+
14
+
15
+ public class TestExtension implements Extension
16
+ {
17
+ private static class PluginDefinition
18
+ {
19
+ public final Class<?> iface;
20
+ public final String name;
21
+ public final Class<?> impl;
22
+
23
+ public PluginDefinition(Class<?> iface, String name, Class<?> impl)
24
+ {
25
+ this.iface = iface;
26
+ this.name = name;
27
+ this.impl = impl;
28
+ }
29
+ }
30
+
31
+ private static List<PluginDefinition> plugins = new ArrayList<PluginDefinition>();
32
+
33
+ public static void addPlugin(Class<?> iface, String name, Class<?> impl)
34
+ {
35
+ plugins.add(new PluginDefinition(iface, name, impl));
36
+ }
37
+
38
+ @Override
39
+ public List<Module> getModules(ConfigSource configsource) {
40
+ Module module = new Module() {
41
+
42
+ @Override
43
+ public void configure(Binder binder) {
44
+ for (PluginDefinition plugin : plugins) {
45
+ InjectedPluginSource.registerPluginTo(binder, plugin.iface, plugin.name, plugin.impl);
46
+ }
47
+ }
48
+ };
49
+ return ImmutableList.of(module);
50
+ }
51
+
52
+ }
@@ -0,0 +1 @@
1
+ org.embulk.test.TestExtension
@@ -0,0 +1,2 @@
1
+ 1,test,123.4,2015-4-27 11:23:45
2
+ 2,sample,-1,2015-12-31 23:59:59
File without changes
@@ -0,0 +1,20 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: csv
6
+ delimiter: ","
7
+ header_line: false
8
+ columns:
9
+ - {name: id, type: long}
10
+ - {name: name, type: string}
11
+ - {name: value, type: double}
12
+ - {name: creation, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
13
+ out:
14
+ type: file
15
+ path_prefix: result
16
+ file_ext: .tsv
17
+ formatter:
18
+ type: csv
19
+ delimiter: "\t"
20
+ header_line: false
@@ -0,0 +1,16 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: jdbc-schema-csv
6
+ delimiter: ","
7
+ header_line: false
8
+ schema: &OUT
9
+ type: mysql
10
+ host: localhost
11
+ database: embulk_test
12
+ user: embulk_user
13
+ password: embulk_pass
14
+ table: input_test
15
+ mode: insert
16
+ out: *OUT
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: jdbc-schema-csv
6
+ delimiter: ","
7
+ header_line: false
8
+ schema:
9
+ type: mysql
10
+ host: localhost
11
+ database: embulk_test
12
+ user: embulk_user
13
+ password: embulk_pass
14
+ table: input_test
15
+ out:
16
+ type: mysql
17
+ host: localhost
18
+ database: embulk_test
19
+ user: embulk_user
20
+ password: embulk_pass
21
+ table: input_test
22
+ mode: insert
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/test.csv'
4
+ parser:
5
+ type: jdbc-schema-csv
6
+ delimiter: ","
7
+ header_line: false
8
+ schema:
9
+ type: mysql
10
+ host: localhost
11
+ database: embulk_test
12
+ user: embulk_user
13
+ password: embulk_pass
14
+ table: input_test
15
+ out:
16
+ type: file
17
+ path_prefix: result
18
+ file_ext: .tsv
19
+ formatter:
20
+ type: csv
21
+ delimiter: "\t"
22
+ header_line: false
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-jdbc-schema-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Hitoshi Tanaka
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-27 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README.md
20
+ - build.gradle
21
+ - build.xml
22
+ - lib/embulk/parser/jdbc-schema-csv.rb
23
+ - src/main/java/org/embulk/parser/JdbcInputPluginHelper.java
24
+ - src/main/java/org/embulk/parser/JdbcSchemaCsvParser.java
25
+ - src/test/java/org/embulk/parser/JdbcSchemaCsvParserTest.java
26
+ - src/test/java/org/embulk/test/EmbulkPluginTester.java
27
+ - src/test/java/org/embulk/test/TestExtension.java
28
+ - src/test/resources/META-INF/services/org.embulk.spi.Extension
29
+ - src/test/resources/data/test.csv
30
+ - src/test/resources/resource.txt
31
+ - src/test/resources/yml/csv.yml
32
+ - src/test/resources/yml/jdbc-csv-jdbc-alias.yml
33
+ - src/test/resources/yml/jdbc-csv-jdbc.yml
34
+ - src/test/resources/yml/jdbc-csv.yml
35
+ - classpath/embulk-parser-jdbc-schema-csv-0.0.1.jar
36
+ homepage: https://github.com/hito4t/embulk-parser-jdbc-schema-csv
37
+ licenses:
38
+ - Apache 2.0
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 2.1.9
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: This Embulk plugin extends CSV parser to define columns based on database meta data.
60
+ test_files: []