embulk-filter-hash 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +17 -2
- data/src/main/kotlin/org/embulk/filter/hash/HashFilterPlugin.kt +140 -0
- data/src/test/kotlin/org/embulk/filter/hash/TestHashFilterPlugin.kt +96 -0
- data/src/test/resources/yaml/input_null_column.yml +9 -0
- metadata +8 -5
- data/src/main/java/org/embulk/filter/hash/HashFilterPlugin.java +0 -181
- data/src/test/java/org/embulk/filter/hash/TestHashFilterPlugin.java +0 -90
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0f52e09e73a986928c5c653de319a33aa57beec
|
4
|
+
data.tar.gz: cd8bd547ea9d23c0a844e000a468ea0e897202dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f747472a2553c25acdac402c4087479ae1905623a603fdb16eee22af386598ebc641b3070f56e246cc04bf66818c2923b0634673073eb6d7f7916774c8653b0
|
7
|
+
data.tar.gz: 350eb6742331943747ad15f694698520a10c9cf2d360b9ef13389e386385ff96ad8adb2b4ae634eb2029602324e543916ce9e643e5184df76f48577ba076f79d
|
data/build.gradle
CHANGED
@@ -1,8 +1,20 @@
|
|
1
|
+
buildscript {
|
2
|
+
ext.kotlin_version = '1.0.6'
|
3
|
+
repositories {
|
4
|
+
mavenCentral()
|
5
|
+
}
|
6
|
+
dependencies {
|
7
|
+
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
|
8
|
+
}
|
9
|
+
}
|
10
|
+
|
1
11
|
plugins {
|
2
12
|
id "com.jfrog.bintray" version "1.1"
|
3
13
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
14
|
id "java"
|
5
15
|
}
|
16
|
+
apply plugin: "kotlin"
|
17
|
+
|
6
18
|
import com.github.jrubygradle.JRubyExec
|
7
19
|
repositories {
|
8
20
|
mavenCentral()
|
@@ -13,12 +25,15 @@ configurations {
|
|
13
25
|
provided
|
14
26
|
}
|
15
27
|
|
16
|
-
version = "0.
|
28
|
+
version = "0.3.0"
|
29
|
+
sourceCompatibility = 1.7
|
30
|
+
targetCompatibility = 1.7
|
17
31
|
|
18
32
|
dependencies {
|
19
33
|
compile "org.embulk:embulk-core:0.8.16"
|
20
34
|
provided "org.embulk:embulk-core:0.8.16"
|
21
|
-
|
35
|
+
compile "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
|
36
|
+
testCompile "com.kamatama41:embulk-test-helpers:0.1.3"
|
22
37
|
// Uncomment when using local embulk-test-helpers (and settings.gradle as well)
|
23
38
|
//testCompile project(':embulk-test-helpers')
|
24
39
|
}
|
@@ -0,0 +1,140 @@
|
|
1
|
+
package org.embulk.filter.hash
|
2
|
+
|
3
|
+
import com.google.common.base.Optional
|
4
|
+
import org.embulk.config.Config
|
5
|
+
import org.embulk.config.ConfigDefault
|
6
|
+
import org.embulk.config.ConfigSource
|
7
|
+
import org.embulk.config.Task
|
8
|
+
import org.embulk.config.TaskSource
|
9
|
+
import org.embulk.spi.Column
|
10
|
+
import org.embulk.spi.DataException
|
11
|
+
import org.embulk.spi.Exec
|
12
|
+
import org.embulk.spi.FilterPlugin
|
13
|
+
import org.embulk.spi.Page
|
14
|
+
import org.embulk.spi.PageBuilder
|
15
|
+
import org.embulk.spi.PageOutput
|
16
|
+
import org.embulk.spi.PageReader
|
17
|
+
import org.embulk.spi.Schema
|
18
|
+
import org.embulk.spi.type.Types
|
19
|
+
import java.security.MessageDigest
|
20
|
+
|
21
|
+
class HashFilterPlugin : FilterPlugin {
|
22
|
+
|
23
|
+
interface PluginTask : Task {
|
24
|
+
@Config("columns")
|
25
|
+
fun getColumns(): List<HashColumn>
|
26
|
+
}
|
27
|
+
|
28
|
+
interface HashColumn : Task {
|
29
|
+
@Config("name")
|
30
|
+
fun getName(): String
|
31
|
+
|
32
|
+
@Config("algorithm")
|
33
|
+
@ConfigDefault("\"SHA-256\"")
|
34
|
+
fun getAlgorithm(): Optional<String>
|
35
|
+
|
36
|
+
@Config("new_name")
|
37
|
+
@ConfigDefault("null")
|
38
|
+
fun getNewName(): Optional<String>
|
39
|
+
}
|
40
|
+
|
41
|
+
override fun transaction(config: ConfigSource, inputSchema: Schema, control: FilterPlugin.Control) {
|
42
|
+
|
43
|
+
val task = config.loadConfig(PluginTask::class.java)
|
44
|
+
val hashColumnMap = convertHashColumnListToMap(task.getColumns())
|
45
|
+
|
46
|
+
val builder = Schema.builder()
|
47
|
+
inputSchema.columns.forEach { column ->
|
48
|
+
val hashColumn = hashColumnMap[column.name]
|
49
|
+
if (hashColumn != null) {
|
50
|
+
builder.add(hashColumn.getNewName().or(column.name), Types.STRING)
|
51
|
+
} else {
|
52
|
+
builder.add(column.name, column.type)
|
53
|
+
}
|
54
|
+
}
|
55
|
+
control.run(task.dump(), builder.build())
|
56
|
+
}
|
57
|
+
|
58
|
+
override fun open(taskSource: TaskSource, inputSchema: Schema,
|
59
|
+
outputSchema: Schema, output: PageOutput): PageOutput {
|
60
|
+
|
61
|
+
val task = taskSource.loadTask(PluginTask::class.java)
|
62
|
+
val hashColumnMap = convertHashColumnListToMap(task.getColumns())
|
63
|
+
val outputColumnMap = convertColumnListToMap(outputSchema.columns)
|
64
|
+
|
65
|
+
return object : PageOutput {
|
66
|
+
private val reader = PageReader(inputSchema)
|
67
|
+
private val builder = PageBuilder(Exec.getBufferAllocator(), outputSchema, output)
|
68
|
+
|
69
|
+
override fun add(page: Page) {
|
70
|
+
reader.setPage(page)
|
71
|
+
while (reader.nextRecord()) {
|
72
|
+
setValue()
|
73
|
+
builder.addRecord()
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
private fun setValue() {
|
78
|
+
for (inputColumn in inputSchema.columns) {
|
79
|
+
if (reader.isNull(inputColumn)) {
|
80
|
+
builder.setNull(inputColumn)
|
81
|
+
continue
|
82
|
+
}
|
83
|
+
|
84
|
+
// Write the original data
|
85
|
+
val inputValue : Any = when (inputColumn.type) {
|
86
|
+
Types.STRING -> {
|
87
|
+
reader.getString(inputColumn).apply { builder.setString(inputColumn, this) }
|
88
|
+
}
|
89
|
+
Types.BOOLEAN -> {
|
90
|
+
reader.getBoolean(inputColumn).apply { builder.setBoolean(inputColumn, this) }
|
91
|
+
}
|
92
|
+
Types.DOUBLE -> {
|
93
|
+
reader.getDouble(inputColumn).apply { builder.setDouble(inputColumn, this) }
|
94
|
+
}
|
95
|
+
Types.LONG -> {
|
96
|
+
reader.getLong(inputColumn).apply { builder.setLong(inputColumn, this) }
|
97
|
+
}
|
98
|
+
Types.TIMESTAMP -> {
|
99
|
+
reader.getTimestamp(inputColumn).apply { builder.setTimestamp(inputColumn, this) }
|
100
|
+
}
|
101
|
+
Types.JSON -> {
|
102
|
+
reader.getJson(inputColumn).apply { builder.setJson(inputColumn, this) }
|
103
|
+
} else -> {
|
104
|
+
throw DataException("Unexpected type:" + inputColumn.type)
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
// Overwrite the column if it's hash column.
|
109
|
+
hashColumnMap[inputColumn.name]?.let { hashColumn ->
|
110
|
+
val outputColumn = outputColumnMap[hashColumn.getNewName().or(inputColumn.name)]
|
111
|
+
val hashedValue = generateHash(inputValue.toString(), hashColumn.getAlgorithm().get())
|
112
|
+
builder.setString(outputColumn, hashedValue)
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
private fun generateHash(value: String, algorithm: String): String {
|
118
|
+
val md = MessageDigest.getInstance(algorithm)
|
119
|
+
md.update(value.toByteArray())
|
120
|
+
return md.digest().joinToString("") { "%02x".format(it) }
|
121
|
+
}
|
122
|
+
|
123
|
+
override fun finish() {
|
124
|
+
builder.finish()
|
125
|
+
}
|
126
|
+
|
127
|
+
override fun close() {
|
128
|
+
builder.close()
|
129
|
+
}
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
private fun convertHashColumnListToMap(hashColumns: List<HashColumn>?): Map<String, HashColumn> {
|
134
|
+
return hashColumns!!.associate { Pair(it.getName(), it) }
|
135
|
+
}
|
136
|
+
|
137
|
+
private fun convertColumnListToMap(columns: List<Column>?): Map<String, Column> {
|
138
|
+
return columns!!.associate { Pair(it.name, it) }
|
139
|
+
}
|
140
|
+
}
|
@@ -0,0 +1,96 @@
|
|
1
|
+
package org.embulk.filter.hash
|
2
|
+
|
3
|
+
import org.embulk.spi.FilterPlugin
|
4
|
+
import org.embulk.test.EmbulkPluginTest
|
5
|
+
import org.embulk.test.TestingEmbulk
|
6
|
+
import org.junit.Test
|
7
|
+
|
8
|
+
import org.embulk.spi.type.Types.STRING
|
9
|
+
import org.embulk.test.TestOutputPlugin.assertRecords
|
10
|
+
import org.embulk.test.TestOutputPlugin.assertSchema
|
11
|
+
import org.embulk.test.Utils.column
|
12
|
+
import org.embulk.test.Utils.record
|
13
|
+
|
14
|
+
class TestHashFilterPlugin : EmbulkPluginTest() {
|
15
|
+
|
16
|
+
override fun setup(builder: TestingEmbulk.Builder) {
|
17
|
+
builder.registerPlugin(FilterPlugin::class.java, "hash", HashFilterPlugin::class.java)
|
18
|
+
}
|
19
|
+
|
20
|
+
@Test fun specifiedColumnIsHashedAndRenamed() {
|
21
|
+
val inConfigPath = "yaml/input_basic.yml"
|
22
|
+
|
23
|
+
val config = newConfig()
|
24
|
+
.set("type", "hash")
|
25
|
+
.set("columns", listOf(newConfig()
|
26
|
+
.set("name", "age")
|
27
|
+
.set("algorithm", "MD5")
|
28
|
+
.set("new_name", "hashed_age")
|
29
|
+
))
|
30
|
+
|
31
|
+
runFilter(config, inConfigPath)
|
32
|
+
|
33
|
+
assertSchema(
|
34
|
+
column("username", STRING),
|
35
|
+
column("hashed_age", STRING)
|
36
|
+
)
|
37
|
+
|
38
|
+
assertRecords(
|
39
|
+
record("user1", "98f13708210194c475687be6106a3b84")
|
40
|
+
)
|
41
|
+
}
|
42
|
+
|
43
|
+
@Test fun allColumnTypesAreHashed() {
|
44
|
+
val inConfigPath = "yaml/input_column_types.yml"
|
45
|
+
|
46
|
+
val config = newConfig()
|
47
|
+
.set("type", "hash")
|
48
|
+
.set("columns", listOf(
|
49
|
+
newConfig().set("name", "username"),
|
50
|
+
newConfig().set("name", "age"),
|
51
|
+
newConfig().set("name", "weight"),
|
52
|
+
newConfig().set("name", "active"),
|
53
|
+
newConfig().set("name", "created_at"),
|
54
|
+
newConfig().set("name", "options")
|
55
|
+
))
|
56
|
+
|
57
|
+
runFilter(config, inConfigPath)
|
58
|
+
|
59
|
+
assertSchema(
|
60
|
+
column("username", STRING),
|
61
|
+
column("age", STRING),
|
62
|
+
column("weight", STRING),
|
63
|
+
column("active", STRING),
|
64
|
+
column("created_at", STRING),
|
65
|
+
column("options", STRING)
|
66
|
+
)
|
67
|
+
|
68
|
+
assertRecords(
|
69
|
+
record(
|
70
|
+
"0a041b9462caa4a31bac3567e0b6e6fd9100787db2ab433d96f6d178cabfce90",
|
71
|
+
"6f4b6612125fb3a0daecd2799dfd6c9c299424fd920f9b308110a2c1fbd8f443",
|
72
|
+
"70822ecbef5bee37d162492107a3127fc0a4de0564f34ce92713a7baaeb582b0",
|
73
|
+
"b5bea41b6c623f7c09f1bf24dcae58ebab3c0cdd90ad966bc43a45b44867e12b",
|
74
|
+
"9673fe7b67d880e2c9071428c63f6e1bea9dde98283297277a20b92ea0acdc72",
|
75
|
+
"3ff0e331ca59a2a1194bac0e36359ed4540a97383e1cdf6eb95c7de9309143fc"
|
76
|
+
)
|
77
|
+
)
|
78
|
+
}
|
79
|
+
|
80
|
+
@Test fun columnIsNull() {
|
81
|
+
val inConfigPath = "yaml/input_null_column.yml"
|
82
|
+
|
83
|
+
val config = newConfig()
|
84
|
+
.set("type", "hash")
|
85
|
+
.set("columns", listOf(
|
86
|
+
newConfig().set("name", "username"),
|
87
|
+
newConfig().set("name", "age")
|
88
|
+
))
|
89
|
+
|
90
|
+
runFilter(config, inConfigPath)
|
91
|
+
|
92
|
+
assertRecords(
|
93
|
+
record(null, "f5ca38f748a1d6eaf726b8a42fb575c3c71f1864a8143301782de13da2d9202b")
|
94
|
+
)
|
95
|
+
}
|
96
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-hash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shinichi Ishimura
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,11 +56,14 @@ files:
|
|
56
56
|
- gradlew.bat
|
57
57
|
- lib/embulk/filter/hash.rb
|
58
58
|
- settings.gradle
|
59
|
-
- src/main/
|
60
|
-
- src/test/
|
59
|
+
- src/main/kotlin/org/embulk/filter/hash/HashFilterPlugin.kt
|
60
|
+
- src/test/kotlin/org/embulk/filter/hash/TestHashFilterPlugin.kt
|
61
61
|
- src/test/resources/yaml/input_basic.yml
|
62
62
|
- src/test/resources/yaml/input_column_types.yml
|
63
|
-
-
|
63
|
+
- src/test/resources/yaml/input_null_column.yml
|
64
|
+
- classpath/embulk-filter-hash-0.3.0.jar
|
65
|
+
- classpath/kotlin-runtime-1.0.6.jar
|
66
|
+
- classpath/kotlin-stdlib-1.0.6.jar
|
64
67
|
homepage: https://github.com/kamatama41/embulk-filter-hash
|
65
68
|
licenses:
|
66
69
|
- MIT
|
@@ -1,181 +0,0 @@
|
|
1
|
-
package org.embulk.filter.hash;
|
2
|
-
|
3
|
-
import com.google.common.base.Optional;
|
4
|
-
import com.google.common.base.Throwables;
|
5
|
-
import org.embulk.config.Config;
|
6
|
-
import org.embulk.config.ConfigDefault;
|
7
|
-
import org.embulk.config.ConfigSource;
|
8
|
-
import org.embulk.config.Task;
|
9
|
-
import org.embulk.config.TaskSource;
|
10
|
-
import org.embulk.spi.Column;
|
11
|
-
import org.embulk.spi.DataException;
|
12
|
-
import org.embulk.spi.Exec;
|
13
|
-
import org.embulk.spi.FilterPlugin;
|
14
|
-
import org.embulk.spi.Page;
|
15
|
-
import org.embulk.spi.PageBuilder;
|
16
|
-
import org.embulk.spi.PageOutput;
|
17
|
-
import org.embulk.spi.PageReader;
|
18
|
-
import org.embulk.spi.Schema;
|
19
|
-
import org.embulk.spi.time.Timestamp;
|
20
|
-
import org.embulk.spi.type.Types;
|
21
|
-
import org.msgpack.value.Value;
|
22
|
-
|
23
|
-
import java.security.MessageDigest;
|
24
|
-
import java.security.NoSuchAlgorithmException;
|
25
|
-
import java.util.HashMap;
|
26
|
-
import java.util.List;
|
27
|
-
import java.util.Map;
|
28
|
-
|
29
|
-
public class HashFilterPlugin implements FilterPlugin {
|
30
|
-
|
31
|
-
public interface PluginTask extends Task {
|
32
|
-
@Config("columns")
|
33
|
-
List<HashColumn> getColumns();
|
34
|
-
}
|
35
|
-
|
36
|
-
public interface HashColumn extends Task {
|
37
|
-
@Config("name")
|
38
|
-
String getName();
|
39
|
-
|
40
|
-
@Config("algorithm")
|
41
|
-
@ConfigDefault("\"SHA-256\"")
|
42
|
-
Optional<String> getAlgorithm();
|
43
|
-
|
44
|
-
@Config("new_name")
|
45
|
-
@ConfigDefault("null")
|
46
|
-
Optional<String> getNewName();
|
47
|
-
}
|
48
|
-
|
49
|
-
@Override
|
50
|
-
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
|
51
|
-
|
52
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
53
|
-
Map<String, HashColumn> hashColumnMap = convertHashColumnListToMap(task.getColumns());
|
54
|
-
|
55
|
-
Schema.Builder builder = Schema.builder();
|
56
|
-
for (Column column : inputSchema.getColumns()) {
|
57
|
-
|
58
|
-
HashColumn hashColumn = hashColumnMap.get(column.getName());
|
59
|
-
|
60
|
-
if (hashColumn != null) {
|
61
|
-
builder.add(hashColumn.getNewName().or(column.getName()), Types.STRING);
|
62
|
-
} else {
|
63
|
-
builder.add(column.getName(), column.getType());
|
64
|
-
}
|
65
|
-
}
|
66
|
-
control.run(task.dump(), builder.build());
|
67
|
-
}
|
68
|
-
|
69
|
-
@Override
|
70
|
-
public PageOutput open(final TaskSource taskSource, final Schema inputSchema,
|
71
|
-
final Schema outputSchema, final PageOutput output) {
|
72
|
-
|
73
|
-
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
74
|
-
final Map<String, HashColumn> hashColumnMap = convertHashColumnListToMap(task.getColumns());
|
75
|
-
final Map<String, Column> outputColumnMap = convertColumnListToMap(outputSchema.getColumns());
|
76
|
-
|
77
|
-
return new PageOutput() {
|
78
|
-
private PageReader reader = new PageReader(inputSchema);
|
79
|
-
private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
80
|
-
|
81
|
-
@Override
|
82
|
-
public void add(Page page) {
|
83
|
-
reader.setPage(page);
|
84
|
-
while (reader.nextRecord()) {
|
85
|
-
setValue();
|
86
|
-
builder.addRecord();
|
87
|
-
}
|
88
|
-
}
|
89
|
-
|
90
|
-
private void setValue() {
|
91
|
-
for (Column inputColumn : inputSchema.getColumns()) {
|
92
|
-
if (reader.isNull(inputColumn)) {
|
93
|
-
builder.setNull(inputColumn);
|
94
|
-
continue;
|
95
|
-
}
|
96
|
-
|
97
|
-
// Write the original data
|
98
|
-
Object inputValue;
|
99
|
-
if (Types.STRING.equals(inputColumn.getType())) {
|
100
|
-
final String value = reader.getString(inputColumn);
|
101
|
-
inputValue = value;
|
102
|
-
builder.setString(inputColumn, value);
|
103
|
-
} else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
104
|
-
final boolean value = reader.getBoolean(inputColumn);
|
105
|
-
inputValue = value;
|
106
|
-
builder.setBoolean(inputColumn, value);
|
107
|
-
} else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
108
|
-
final double value = reader.getDouble(inputColumn);
|
109
|
-
inputValue = value;
|
110
|
-
builder.setDouble(inputColumn, value);
|
111
|
-
} else if (Types.LONG.equals(inputColumn.getType())) {
|
112
|
-
final long value = reader.getLong(inputColumn);
|
113
|
-
inputValue = value;
|
114
|
-
builder.setLong(inputColumn, value);
|
115
|
-
} else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
116
|
-
final Timestamp value = reader.getTimestamp(inputColumn);
|
117
|
-
inputValue = value;
|
118
|
-
builder.setTimestamp(inputColumn, value);
|
119
|
-
} else if (Types.JSON.equals(inputColumn.getType())) {
|
120
|
-
final Value value = reader.getJson(inputColumn);
|
121
|
-
inputValue = value;
|
122
|
-
builder.setJson(inputColumn, value);
|
123
|
-
} else {
|
124
|
-
throw new DataException("Unexpected type:" + inputColumn.getType());
|
125
|
-
}
|
126
|
-
|
127
|
-
// Overwrite the column if it's hash column.
|
128
|
-
HashColumn hashColumn = hashColumnMap.get(inputColumn.getName());
|
129
|
-
if (hashColumn != null) {
|
130
|
-
final Column outputColumn = outputColumnMap.get(hashColumn.getNewName().or(inputColumn.getName()));
|
131
|
-
final String hashedValue = generateHash(inputValue.toString(), hashColumn.getAlgorithm().get());
|
132
|
-
builder.setString(outputColumn, hashedValue);
|
133
|
-
}
|
134
|
-
}
|
135
|
-
}
|
136
|
-
|
137
|
-
private String generateHash(String value, String algorithm) {
|
138
|
-
String result = null;
|
139
|
-
try {
|
140
|
-
MessageDigest md = MessageDigest.getInstance(algorithm);
|
141
|
-
md.update(value.getBytes());
|
142
|
-
|
143
|
-
StringBuilder sb = new StringBuilder();
|
144
|
-
for (byte b : md.digest()) {
|
145
|
-
sb.append(String.format("%02x", b));
|
146
|
-
}
|
147
|
-
result = sb.toString();
|
148
|
-
} catch (NoSuchAlgorithmException e) {
|
149
|
-
Throwables.propagate(e);
|
150
|
-
}
|
151
|
-
return result;
|
152
|
-
}
|
153
|
-
|
154
|
-
@Override
|
155
|
-
public void finish() {
|
156
|
-
builder.finish();
|
157
|
-
}
|
158
|
-
|
159
|
-
@Override
|
160
|
-
public void close() {
|
161
|
-
builder.close();
|
162
|
-
}
|
163
|
-
};
|
164
|
-
}
|
165
|
-
|
166
|
-
private static Map<String, HashColumn> convertHashColumnListToMap(List<HashColumn> hashColumns) {
|
167
|
-
Map<String, HashColumn> result = new HashMap<>();
|
168
|
-
for (HashColumn hashColumn : hashColumns) {
|
169
|
-
result.put(hashColumn.getName(), hashColumn);
|
170
|
-
}
|
171
|
-
return result;
|
172
|
-
}
|
173
|
-
|
174
|
-
private static Map<String, Column> convertColumnListToMap(List<Column> columns) {
|
175
|
-
Map<String, Column> result = new HashMap<>();
|
176
|
-
for (Column column : columns) {
|
177
|
-
result.put(column.getName(), column);
|
178
|
-
}
|
179
|
-
return result;
|
180
|
-
}
|
181
|
-
}
|
@@ -1,90 +0,0 @@
|
|
1
|
-
package org.embulk.filter.hash;
|
2
|
-
|
3
|
-
import org.embulk.config.ConfigSource;
|
4
|
-
import org.embulk.spi.FilterPlugin;
|
5
|
-
import org.embulk.test.ExtendedTestingEmbulk;
|
6
|
-
import org.junit.Rule;
|
7
|
-
import org.junit.Test;
|
8
|
-
|
9
|
-
import java.util.Arrays;
|
10
|
-
import java.util.Collections;
|
11
|
-
|
12
|
-
import static org.embulk.spi.type.Types.STRING;
|
13
|
-
import static org.embulk.test.TestOutputPlugin.assertRecords;
|
14
|
-
import static org.embulk.test.TestOutputPlugin.assertSchema;
|
15
|
-
import static org.embulk.test.Utils.column;
|
16
|
-
import static org.embulk.test.Utils.record;
|
17
|
-
|
18
|
-
public class TestHashFilterPlugin {
|
19
|
-
@Rule
|
20
|
-
public ExtendedTestingEmbulk embulk = (ExtendedTestingEmbulk) ExtendedTestingEmbulk
|
21
|
-
.builder()
|
22
|
-
.registerPlugin(FilterPlugin.class, "hash", HashFilterPlugin.class)
|
23
|
-
.build();
|
24
|
-
|
25
|
-
@Test
|
26
|
-
public void specifiedColumnIsHashedAndRenamed() {
|
27
|
-
final String inConfigPath = "yaml/input_basic.yml";
|
28
|
-
|
29
|
-
ConfigSource config = embulk.newConfig()
|
30
|
-
.set("type", "hash")
|
31
|
-
.set("columns", Collections.singletonList(
|
32
|
-
config().set("name", "age").set("algorithm", "MD5").set("new_name", "hashed_age")
|
33
|
-
)
|
34
|
-
);
|
35
|
-
|
36
|
-
embulk.runFilter(config, inConfigPath);
|
37
|
-
|
38
|
-
assertSchema(
|
39
|
-
column("username", STRING),
|
40
|
-
column("hashed_age", STRING)
|
41
|
-
);
|
42
|
-
|
43
|
-
assertRecords(
|
44
|
-
record("user1", "98f13708210194c475687be6106a3b84")
|
45
|
-
);
|
46
|
-
}
|
47
|
-
|
48
|
-
@Test
|
49
|
-
public void allColumnTypesAreHashed() {
|
50
|
-
final String inConfigPath = "yaml/input_column_types.yml";
|
51
|
-
|
52
|
-
ConfigSource config = embulk.newConfig()
|
53
|
-
.set("type", "hash")
|
54
|
-
.set("columns", Arrays.asList(
|
55
|
-
config().set("name", "username"),
|
56
|
-
config().set("name", "age"),
|
57
|
-
config().set("name", "weight"),
|
58
|
-
config().set("name", "active"),
|
59
|
-
config().set("name", "created_at"),
|
60
|
-
config().set("name", "options")
|
61
|
-
)
|
62
|
-
);
|
63
|
-
|
64
|
-
embulk.runFilter(config, inConfigPath);
|
65
|
-
|
66
|
-
assertSchema(
|
67
|
-
column("username", STRING),
|
68
|
-
column("age", STRING),
|
69
|
-
column("weight", STRING),
|
70
|
-
column("active", STRING),
|
71
|
-
column("created_at", STRING),
|
72
|
-
column("options", STRING)
|
73
|
-
);
|
74
|
-
|
75
|
-
assertRecords(
|
76
|
-
record(
|
77
|
-
"0a041b9462caa4a31bac3567e0b6e6fd9100787db2ab433d96f6d178cabfce90",
|
78
|
-
"6f4b6612125fb3a0daecd2799dfd6c9c299424fd920f9b308110a2c1fbd8f443",
|
79
|
-
"70822ecbef5bee37d162492107a3127fc0a4de0564f34ce92713a7baaeb582b0",
|
80
|
-
"b5bea41b6c623f7c09f1bf24dcae58ebab3c0cdd90ad966bc43a45b44867e12b",
|
81
|
-
"9673fe7b67d880e2c9071428c63f6e1bea9dde98283297277a20b92ea0acdc72",
|
82
|
-
"3ff0e331ca59a2a1194bac0e36359ed4540a97383e1cdf6eb95c7de9309143fc"
|
83
|
-
)
|
84
|
-
);
|
85
|
-
}
|
86
|
-
|
87
|
-
private ConfigSource config() {
|
88
|
-
return embulk.newConfig();
|
89
|
-
}
|
90
|
-
}
|