embulk-filter-join_file 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ package org.embulk.filter.join_file;
2
+
3
+ import com.fasterxml.jackson.core.JsonParseException;
4
+ import com.fasterxml.jackson.core.type.TypeReference;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
6
+ import com.google.common.collect.Maps;
7
+ import org.embulk.config.ConfigException;
8
+ import org.embulk.spi.ColumnConfig;
9
+ import org.embulk.spi.Exec;
10
+ import org.slf4j.Logger;
11
+ import sun.reflect.generics.reflectiveObjects.NotImplementedException;
12
+
13
+ import java.io.File;
14
+ import java.io.IOException;
15
+ import java.util.ArrayList;
16
+ import java.util.HashMap;
17
+ import java.util.List;
18
+
19
+ /**
20
+ * Created by takahiro.nakayama on 10/11/15.
21
+ */
22
+ public class TableBuilder
23
+ {
24
+ private final Logger logger = Exec.getLogger(TableBuilder.class);
25
+ private final String filePath;
26
+ private final String fileFormat;
27
+ private final List<ColumnConfig> columns;
28
+ private final String rowKeyName;
29
+ private final String columnPrefix;
30
+
31
+ public TableBuilder(String filePath, String fileFormat, List<ColumnConfig> columns, String rowKeyName, String columnPrefix)
32
+ {
33
+ this.filePath = filePath;
34
+ this.fileFormat = fileFormat;
35
+ this.columns = columns;
36
+ this.rowKeyName = rowKeyName;
37
+ this.columnPrefix = columnPrefix;
38
+ }
39
+
40
+ public HashMap<String, HashMap<String, String>> build()
41
+ throws IOException
42
+ {
43
+ HashMap<String, HashMap<String, String>> table = Maps.newHashMap();
44
+
45
+ for (HashMap<String, String> rawRecord: loadFile()) {
46
+
47
+ HashMap<String, String> record = Maps.newHashMap();
48
+
49
+ for (ColumnConfig column: columns) {
50
+ String columnKey = columnPrefix + column.getName();
51
+ String value = rawRecord.get(column.getName());
52
+
53
+ record.put(columnKey, value);
54
+ }
55
+
56
+ String rowKey = rawRecord.get(rowKeyName);
57
+ table.put(rowKey, record);
58
+ }
59
+
60
+ return table;
61
+ }
62
+
63
+ private List<HashMap<String, String>> loadFile()
64
+ throws IOException
65
+ {
66
+ List<HashMap<String, String>> rawData;
67
+ switch (fileFormat) {
68
+ case "csv":
69
+ logger.error("will support csv format, but not yet.");
70
+ throw new NotImplementedException(); // TODO: will support csv format, but not yet.
71
+ case "tsv":
72
+ logger.error("will support tsv format, but not yet.");
73
+ throw new NotImplementedException(); // TODO: will support tsv format, but not yet.
74
+ case "yaml":
75
+ logger.error("will support yaml format, but not yet.");
76
+ throw new NotImplementedException(); // TODO: will support yaml format, but not yet.
77
+ case "json":
78
+ ObjectMapper mapper = new ObjectMapper();
79
+ rawData = mapper.readValue(new File(filePath), new TypeReference<ArrayList<HashMap<String, String>>>(){});
80
+ break;
81
+ default:
82
+ throw new ConfigException("Unsupported File Format: " + fileFormat);
83
+ }
84
+
85
+ return rawData;
86
+ }
87
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.filter.join_file;
2
+
3
+ public class TestJoinFileFilterPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-join_file
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Civitaspo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
+ description: Join File
42
+ email:
43
+ - civitaspo@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - LICENSE.txt
50
+ - README.md
51
+ - build.gradle
52
+ - example/config.yml
53
+ - example/data.csv
54
+ - example/master.json
55
+ - gradle/wrapper/gradle-wrapper.jar
56
+ - gradle/wrapper/gradle-wrapper.properties
57
+ - gradlew
58
+ - gradlew.bat
59
+ - lib/embulk/filter/join_file.rb
60
+ - src/main/java/org/embulk/filter/join_file/FilteredPageOutput.java
61
+ - src/main/java/org/embulk/filter/join_file/JoinFileFilterPlugin.java
62
+ - src/main/java/org/embulk/filter/join_file/TableBuilder.java
63
+ - src/test/java/org/embulk/filter/join_file/TestJoinFileFilterPlugin.java
64
+ - classpath/embulk-filter-join_file-0.0.1.jar
65
+ homepage: https://github.com/civitaspo/embulk-filter-join_file
66
+ licenses:
67
+ - MIT
68
+ metadata: {}
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 2.1.9
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Join File filter plugin for Embulk
89
+ test_files: []