embulk-filter-join_file 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/LICENSE.txt +21 -0
- data/README.md +122 -0
- data/build.gradle +75 -0
- data/example/config.yml +32 -0
- data/example/data.csv +100 -0
- data/example/master.json +765 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/join_file.rb +3 -0
- data/src/main/java/org/embulk/filter/join_file/FilteredPageOutput.java +165 -0
- data/src/main/java/org/embulk/filter/join_file/JoinFileFilterPlugin.java +162 -0
- data/src/main/java/org/embulk/filter/join_file/TableBuilder.java +87 -0
- data/src/test/java/org/embulk/filter/join_file/TestJoinFileFilterPlugin.java +5 -0
- metadata +89 -0
@@ -0,0 +1,87 @@
|
|
1
|
+
package org.embulk.filter.join_file;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.core.JsonParseException;
|
4
|
+
import com.fasterxml.jackson.core.type.TypeReference;
|
5
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
|
+
import com.google.common.collect.Maps;
|
7
|
+
import org.embulk.config.ConfigException;
|
8
|
+
import org.embulk.spi.ColumnConfig;
|
9
|
+
import org.embulk.spi.Exec;
|
10
|
+
import org.slf4j.Logger;
|
11
|
+
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
|
12
|
+
|
13
|
+
import java.io.File;
|
14
|
+
import java.io.IOException;
|
15
|
+
import java.util.ArrayList;
|
16
|
+
import java.util.HashMap;
|
17
|
+
import java.util.List;
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Created by takahiro.nakayama on 10/11/15.
|
21
|
+
*/
|
22
|
+
public class TableBuilder
|
23
|
+
{
|
24
|
+
private final Logger logger = Exec.getLogger(TableBuilder.class);
|
25
|
+
private final String filePath;
|
26
|
+
private final String fileFormat;
|
27
|
+
private final List<ColumnConfig> columns;
|
28
|
+
private final String rowKeyName;
|
29
|
+
private final String columnPrefix;
|
30
|
+
|
31
|
+
public TableBuilder(String filePath, String fileFormat, List<ColumnConfig> columns, String rowKeyName, String columnPrefix)
|
32
|
+
{
|
33
|
+
this.filePath = filePath;
|
34
|
+
this.fileFormat = fileFormat;
|
35
|
+
this.columns = columns;
|
36
|
+
this.rowKeyName = rowKeyName;
|
37
|
+
this.columnPrefix = columnPrefix;
|
38
|
+
}
|
39
|
+
|
40
|
+
public HashMap<String, HashMap<String, String>> build()
|
41
|
+
throws IOException
|
42
|
+
{
|
43
|
+
HashMap<String, HashMap<String, String>> table = Maps.newHashMap();
|
44
|
+
|
45
|
+
for (HashMap<String, String> rawRecord: loadFile()) {
|
46
|
+
|
47
|
+
HashMap<String, String> record = Maps.newHashMap();
|
48
|
+
|
49
|
+
for (ColumnConfig column: columns) {
|
50
|
+
String columnKey = columnPrefix + column.getName();
|
51
|
+
String value = rawRecord.get(column.getName());
|
52
|
+
|
53
|
+
record.put(columnKey, value);
|
54
|
+
}
|
55
|
+
|
56
|
+
String rowKey = rawRecord.get(rowKeyName);
|
57
|
+
table.put(rowKey, record);
|
58
|
+
}
|
59
|
+
|
60
|
+
return table;
|
61
|
+
}
|
62
|
+
|
63
|
+
private List<HashMap<String, String>> loadFile()
|
64
|
+
throws IOException
|
65
|
+
{
|
66
|
+
List<HashMap<String, String>> rawData;
|
67
|
+
switch (fileFormat) {
|
68
|
+
case "csv":
|
69
|
+
logger.error("will support csv format, but not yet.");
|
70
|
+
throw new NotImplementedException(); // TODO: will support csv format, but not yet.
|
71
|
+
case "tsv":
|
72
|
+
logger.error("will support tsv format, but not yet.");
|
73
|
+
throw new NotImplementedException(); // TODO: will support tsv format, but not yet.
|
74
|
+
case "yaml":
|
75
|
+
logger.error("will support yaml format, but not yet.");
|
76
|
+
throw new NotImplementedException(); // TODO: will support yaml format, but not yet.
|
77
|
+
case "json":
|
78
|
+
ObjectMapper mapper = new ObjectMapper();
|
79
|
+
rawData = mapper.readValue(new File(filePath), new TypeReference<ArrayList<HashMap<String, String>>>(){});
|
80
|
+
break;
|
81
|
+
default:
|
82
|
+
throw new ConfigException("Unsupported File Format: " + fileFormat);
|
83
|
+
}
|
84
|
+
|
85
|
+
return rawData;
|
86
|
+
}
|
87
|
+
}
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-filter-join_file
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Civitaspo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
|
+
description: Join File
|
42
|
+
email:
|
43
|
+
- civitaspo@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- LICENSE.txt
|
50
|
+
- README.md
|
51
|
+
- build.gradle
|
52
|
+
- example/config.yml
|
53
|
+
- example/data.csv
|
54
|
+
- example/master.json
|
55
|
+
- gradle/wrapper/gradle-wrapper.jar
|
56
|
+
- gradle/wrapper/gradle-wrapper.properties
|
57
|
+
- gradlew
|
58
|
+
- gradlew.bat
|
59
|
+
- lib/embulk/filter/join_file.rb
|
60
|
+
- src/main/java/org/embulk/filter/join_file/FilteredPageOutput.java
|
61
|
+
- src/main/java/org/embulk/filter/join_file/JoinFileFilterPlugin.java
|
62
|
+
- src/main/java/org/embulk/filter/join_file/TableBuilder.java
|
63
|
+
- src/test/java/org/embulk/filter/join_file/TestJoinFileFilterPlugin.java
|
64
|
+
- classpath/embulk-filter-join_file-0.0.1.jar
|
65
|
+
homepage: https://github.com/civitaspo/embulk-filter-join_file
|
66
|
+
licenses:
|
67
|
+
- MIT
|
68
|
+
metadata: {}
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 2.1.9
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: Join File filter plugin for Embulk
|
89
|
+
test_files: []
|