embulk-filter-hash 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +17 -2
- data/src/main/kotlin/org/embulk/filter/hash/HashFilterPlugin.kt +140 -0
- data/src/test/kotlin/org/embulk/filter/hash/TestHashFilterPlugin.kt +96 -0
- data/src/test/resources/yaml/input_null_column.yml +9 -0
- metadata +8 -5
- data/src/main/java/org/embulk/filter/hash/HashFilterPlugin.java +0 -181
- data/src/test/java/org/embulk/filter/hash/TestHashFilterPlugin.java +0 -90
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0f52e09e73a986928c5c653de319a33aa57beec
|
4
|
+
data.tar.gz: cd8bd547ea9d23c0a844e000a468ea0e897202dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f747472a2553c25acdac402c4087479ae1905623a603fdb16eee22af386598ebc641b3070f56e246cc04bf66818c2923b0634673073eb6d7f7916774c8653b0
|
7
|
+
data.tar.gz: 350eb6742331943747ad15f694698520a10c9cf2d360b9ef13389e386385ff96ad8adb2b4ae634eb2029602324e543916ce9e643e5184df76f48577ba076f79d
|
data/build.gradle
CHANGED
@@ -1,8 +1,20 @@
|
|
1
|
+
buildscript {
|
2
|
+
ext.kotlin_version = '1.0.6'
|
3
|
+
repositories {
|
4
|
+
mavenCentral()
|
5
|
+
}
|
6
|
+
dependencies {
|
7
|
+
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
|
8
|
+
}
|
9
|
+
}
|
10
|
+
|
1
11
|
plugins {
|
2
12
|
id "com.jfrog.bintray" version "1.1"
|
3
13
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
14
|
id "java"
|
5
15
|
}
|
16
|
+
apply plugin: "kotlin"
|
17
|
+
|
6
18
|
import com.github.jrubygradle.JRubyExec
|
7
19
|
repositories {
|
8
20
|
mavenCentral()
|
@@ -13,12 +25,15 @@ configurations {
|
|
13
25
|
provided
|
14
26
|
}
|
15
27
|
|
16
|
-
version = "0.
|
28
|
+
version = "0.3.0"
|
29
|
+
sourceCompatibility = 1.7
|
30
|
+
targetCompatibility = 1.7
|
17
31
|
|
18
32
|
dependencies {
|
19
33
|
compile "org.embulk:embulk-core:0.8.16"
|
20
34
|
provided "org.embulk:embulk-core:0.8.16"
|
21
|
-
|
35
|
+
compile "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
|
36
|
+
testCompile "com.kamatama41:embulk-test-helpers:0.1.3"
|
22
37
|
// Uncomment when using local embulk-test-helpers (and settings.gradle as well)
|
23
38
|
//testCompile project(':embulk-test-helpers')
|
24
39
|
}
|
@@ -0,0 +1,140 @@
|
|
1
|
+
package org.embulk.filter.hash
|
2
|
+
|
3
|
+
import com.google.common.base.Optional
|
4
|
+
import org.embulk.config.Config
|
5
|
+
import org.embulk.config.ConfigDefault
|
6
|
+
import org.embulk.config.ConfigSource
|
7
|
+
import org.embulk.config.Task
|
8
|
+
import org.embulk.config.TaskSource
|
9
|
+
import org.embulk.spi.Column
|
10
|
+
import org.embulk.spi.DataException
|
11
|
+
import org.embulk.spi.Exec
|
12
|
+
import org.embulk.spi.FilterPlugin
|
13
|
+
import org.embulk.spi.Page
|
14
|
+
import org.embulk.spi.PageBuilder
|
15
|
+
import org.embulk.spi.PageOutput
|
16
|
+
import org.embulk.spi.PageReader
|
17
|
+
import org.embulk.spi.Schema
|
18
|
+
import org.embulk.spi.type.Types
|
19
|
+
import java.security.MessageDigest
|
20
|
+
|
21
|
+
class HashFilterPlugin : FilterPlugin {
|
22
|
+
|
23
|
+
interface PluginTask : Task {
|
24
|
+
@Config("columns")
|
25
|
+
fun getColumns(): List<HashColumn>
|
26
|
+
}
|
27
|
+
|
28
|
+
interface HashColumn : Task {
|
29
|
+
@Config("name")
|
30
|
+
fun getName(): String
|
31
|
+
|
32
|
+
@Config("algorithm")
|
33
|
+
@ConfigDefault("\"SHA-256\"")
|
34
|
+
fun getAlgorithm(): Optional<String>
|
35
|
+
|
36
|
+
@Config("new_name")
|
37
|
+
@ConfigDefault("null")
|
38
|
+
fun getNewName(): Optional<String>
|
39
|
+
}
|
40
|
+
|
41
|
+
override fun transaction(config: ConfigSource, inputSchema: Schema, control: FilterPlugin.Control) {
|
42
|
+
|
43
|
+
val task = config.loadConfig(PluginTask::class.java)
|
44
|
+
val hashColumnMap = convertHashColumnListToMap(task.getColumns())
|
45
|
+
|
46
|
+
val builder = Schema.builder()
|
47
|
+
inputSchema.columns.forEach { column ->
|
48
|
+
val hashColumn = hashColumnMap[column.name]
|
49
|
+
if (hashColumn != null) {
|
50
|
+
builder.add(hashColumn.getNewName().or(column.name), Types.STRING)
|
51
|
+
} else {
|
52
|
+
builder.add(column.name, column.type)
|
53
|
+
}
|
54
|
+
}
|
55
|
+
control.run(task.dump(), builder.build())
|
56
|
+
}
|
57
|
+
|
58
|
+
override fun open(taskSource: TaskSource, inputSchema: Schema,
|
59
|
+
outputSchema: Schema, output: PageOutput): PageOutput {
|
60
|
+
|
61
|
+
val task = taskSource.loadTask(PluginTask::class.java)
|
62
|
+
val hashColumnMap = convertHashColumnListToMap(task.getColumns())
|
63
|
+
val outputColumnMap = convertColumnListToMap(outputSchema.columns)
|
64
|
+
|
65
|
+
return object : PageOutput {
|
66
|
+
private val reader = PageReader(inputSchema)
|
67
|
+
private val builder = PageBuilder(Exec.getBufferAllocator(), outputSchema, output)
|
68
|
+
|
69
|
+
override fun add(page: Page) {
|
70
|
+
reader.setPage(page)
|
71
|
+
while (reader.nextRecord()) {
|
72
|
+
setValue()
|
73
|
+
builder.addRecord()
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
private fun setValue() {
|
78
|
+
for (inputColumn in inputSchema.columns) {
|
79
|
+
if (reader.isNull(inputColumn)) {
|
80
|
+
builder.setNull(inputColumn)
|
81
|
+
continue
|
82
|
+
}
|
83
|
+
|
84
|
+
// Write the original data
|
85
|
+
val inputValue : Any = when (inputColumn.type) {
|
86
|
+
Types.STRING -> {
|
87
|
+
reader.getString(inputColumn).apply { builder.setString(inputColumn, this) }
|
88
|
+
}
|
89
|
+
Types.BOOLEAN -> {
|
90
|
+
reader.getBoolean(inputColumn).apply { builder.setBoolean(inputColumn, this) }
|
91
|
+
}
|
92
|
+
Types.DOUBLE -> {
|
93
|
+
reader.getDouble(inputColumn).apply { builder.setDouble(inputColumn, this) }
|
94
|
+
}
|
95
|
+
Types.LONG -> {
|
96
|
+
reader.getLong(inputColumn).apply { builder.setLong(inputColumn, this) }
|
97
|
+
}
|
98
|
+
Types.TIMESTAMP -> {
|
99
|
+
reader.getTimestamp(inputColumn).apply { builder.setTimestamp(inputColumn, this) }
|
100
|
+
}
|
101
|
+
Types.JSON -> {
|
102
|
+
reader.getJson(inputColumn).apply { builder.setJson(inputColumn, this) }
|
103
|
+
} else -> {
|
104
|
+
throw DataException("Unexpected type:" + inputColumn.type)
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
// Overwrite the column if it's hash column.
|
109
|
+
hashColumnMap[inputColumn.name]?.let { hashColumn ->
|
110
|
+
val outputColumn = outputColumnMap[hashColumn.getNewName().or(inputColumn.name)]
|
111
|
+
val hashedValue = generateHash(inputValue.toString(), hashColumn.getAlgorithm().get())
|
112
|
+
builder.setString(outputColumn, hashedValue)
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
private fun generateHash(value: String, algorithm: String): String {
|
118
|
+
val md = MessageDigest.getInstance(algorithm)
|
119
|
+
md.update(value.toByteArray())
|
120
|
+
return md.digest().joinToString("") { "%02x".format(it) }
|
121
|
+
}
|
122
|
+
|
123
|
+
override fun finish() {
|
124
|
+
builder.finish()
|
125
|
+
}
|
126
|
+
|
127
|
+
override fun close() {
|
128
|
+
builder.close()
|
129
|
+
}
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
private fun convertHashColumnListToMap(hashColumns: List<HashColumn>?): Map<String, HashColumn> {
|
134
|
+
return hashColumns!!.associate { Pair(it.getName(), it) }
|
135
|
+
}
|
136
|
+
|
137
|
+
private fun convertColumnListToMap(columns: List<Column>?): Map<String, Column> {
|
138
|
+
return columns!!.associate { Pair(it.name, it) }
|
139
|
+
}
|
140
|
+
}
|
@@ -0,0 +1,96 @@
|
|
1
|
+
package org.embulk.filter.hash
|
2
|
+
|
3
|
+
import org.embulk.spi.FilterPlugin
|
4
|
+
import org.embulk.test.EmbulkPluginTest
|
5
|
+
import org.embulk.test.TestingEmbulk
|
6
|
+
import org.junit.Test
|
7
|
+
|
8
|
+
import org.embulk.spi.type.Types.STRING
|
9
|
+
import org.embulk.test.TestOutputPlugin.assertRecords
|
10
|
+
import org.embulk.test.TestOutputPlugin.assertSchema
|
11
|
+
import org.embulk.test.Utils.column
|
12
|
+
import org.embulk.test.Utils.record
|
13
|
+
|
14
|
+
class TestHashFilterPlugin : EmbulkPluginTest() {
|
15
|
+
|
16
|
+
override fun setup(builder: TestingEmbulk.Builder) {
|
17
|
+
builder.registerPlugin(FilterPlugin::class.java, "hash", HashFilterPlugin::class.java)
|
18
|
+
}
|
19
|
+
|
20
|
+
@Test fun specifiedColumnIsHashedAndRenamed() {
|
21
|
+
val inConfigPath = "yaml/input_basic.yml"
|
22
|
+
|
23
|
+
val config = newConfig()
|
24
|
+
.set("type", "hash")
|
25
|
+
.set("columns", listOf(newConfig()
|
26
|
+
.set("name", "age")
|
27
|
+
.set("algorithm", "MD5")
|
28
|
+
.set("new_name", "hashed_age")
|
29
|
+
))
|
30
|
+
|
31
|
+
runFilter(config, inConfigPath)
|
32
|
+
|
33
|
+
assertSchema(
|
34
|
+
column("username", STRING),
|
35
|
+
column("hashed_age", STRING)
|
36
|
+
)
|
37
|
+
|
38
|
+
assertRecords(
|
39
|
+
record("user1", "98f13708210194c475687be6106a3b84")
|
40
|
+
)
|
41
|
+
}
|
42
|
+
|
43
|
+
@Test fun allColumnTypesAreHashed() {
|
44
|
+
val inConfigPath = "yaml/input_column_types.yml"
|
45
|
+
|
46
|
+
val config = newConfig()
|
47
|
+
.set("type", "hash")
|
48
|
+
.set("columns", listOf(
|
49
|
+
newConfig().set("name", "username"),
|
50
|
+
newConfig().set("name", "age"),
|
51
|
+
newConfig().set("name", "weight"),
|
52
|
+
newConfig().set("name", "active"),
|
53
|
+
newConfig().set("name", "created_at"),
|
54
|
+
newConfig().set("name", "options")
|
55
|
+
))
|
56
|
+
|
57
|
+
runFilter(config, inConfigPath)
|
58
|
+
|
59
|
+
assertSchema(
|
60
|
+
column("username", STRING),
|
61
|
+
column("age", STRING),
|
62
|
+
column("weight", STRING),
|
63
|
+
column("active", STRING),
|
64
|
+
column("created_at", STRING),
|
65
|
+
column("options", STRING)
|
66
|
+
)
|
67
|
+
|
68
|
+
assertRecords(
|
69
|
+
record(
|
70
|
+
"0a041b9462caa4a31bac3567e0b6e6fd9100787db2ab433d96f6d178cabfce90",
|
71
|
+
"6f4b6612125fb3a0daecd2799dfd6c9c299424fd920f9b308110a2c1fbd8f443",
|
72
|
+
"70822ecbef5bee37d162492107a3127fc0a4de0564f34ce92713a7baaeb582b0",
|
73
|
+
"b5bea41b6c623f7c09f1bf24dcae58ebab3c0cdd90ad966bc43a45b44867e12b",
|
74
|
+
"9673fe7b67d880e2c9071428c63f6e1bea9dde98283297277a20b92ea0acdc72",
|
75
|
+
"3ff0e331ca59a2a1194bac0e36359ed4540a97383e1cdf6eb95c7de9309143fc"
|
76
|
+
)
|
77
|
+
)
|
78
|
+
}
|
79
|
+
|
80
|
+
@Test fun columnIsNull() {
|
81
|
+
val inConfigPath = "yaml/input_null_column.yml"
|
82
|
+
|
83
|
+
val config = newConfig()
|
84
|
+
.set("type", "hash")
|
85
|
+
.set("columns", listOf(
|
86
|
+
newConfig().set("name", "username"),
|
87
|
+
newConfig().set("name", "age")
|
88
|
+
))
|
89
|
+
|
90
|
+
runFilter(config, inConfigPath)
|
91
|
+
|
92
|
+
assertRecords(
|
93
|
+
record(null, "f5ca38f748a1d6eaf726b8a42fb575c3c71f1864a8143301782de13da2d9202b")
|
94
|
+
)
|
95
|
+
}
|
96
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-hash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shinichi Ishimura
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,11 +56,14 @@ files:
|
|
56
56
|
- gradlew.bat
|
57
57
|
- lib/embulk/filter/hash.rb
|
58
58
|
- settings.gradle
|
59
|
-
- src/main/
|
60
|
-
- src/test/
|
59
|
+
- src/main/kotlin/org/embulk/filter/hash/HashFilterPlugin.kt
|
60
|
+
- src/test/kotlin/org/embulk/filter/hash/TestHashFilterPlugin.kt
|
61
61
|
- src/test/resources/yaml/input_basic.yml
|
62
62
|
- src/test/resources/yaml/input_column_types.yml
|
63
|
-
-
|
63
|
+
- src/test/resources/yaml/input_null_column.yml
|
64
|
+
- classpath/embulk-filter-hash-0.3.0.jar
|
65
|
+
- classpath/kotlin-runtime-1.0.6.jar
|
66
|
+
- classpath/kotlin-stdlib-1.0.6.jar
|
64
67
|
homepage: https://github.com/kamatama41/embulk-filter-hash
|
65
68
|
licenses:
|
66
69
|
- MIT
|
@@ -1,181 +0,0 @@
|
|
1
|
-
package org.embulk.filter.hash;
|
2
|
-
|
3
|
-
import com.google.common.base.Optional;
|
4
|
-
import com.google.common.base.Throwables;
|
5
|
-
import org.embulk.config.Config;
|
6
|
-
import org.embulk.config.ConfigDefault;
|
7
|
-
import org.embulk.config.ConfigSource;
|
8
|
-
import org.embulk.config.Task;
|
9
|
-
import org.embulk.config.TaskSource;
|
10
|
-
import org.embulk.spi.Column;
|
11
|
-
import org.embulk.spi.DataException;
|
12
|
-
import org.embulk.spi.Exec;
|
13
|
-
import org.embulk.spi.FilterPlugin;
|
14
|
-
import org.embulk.spi.Page;
|
15
|
-
import org.embulk.spi.PageBuilder;
|
16
|
-
import org.embulk.spi.PageOutput;
|
17
|
-
import org.embulk.spi.PageReader;
|
18
|
-
import org.embulk.spi.Schema;
|
19
|
-
import org.embulk.spi.time.Timestamp;
|
20
|
-
import org.embulk.spi.type.Types;
|
21
|
-
import org.msgpack.value.Value;
|
22
|
-
|
23
|
-
import java.security.MessageDigest;
|
24
|
-
import java.security.NoSuchAlgorithmException;
|
25
|
-
import java.util.HashMap;
|
26
|
-
import java.util.List;
|
27
|
-
import java.util.Map;
|
28
|
-
|
29
|
-
public class HashFilterPlugin implements FilterPlugin {
|
30
|
-
|
31
|
-
public interface PluginTask extends Task {
|
32
|
-
@Config("columns")
|
33
|
-
List<HashColumn> getColumns();
|
34
|
-
}
|
35
|
-
|
36
|
-
public interface HashColumn extends Task {
|
37
|
-
@Config("name")
|
38
|
-
String getName();
|
39
|
-
|
40
|
-
@Config("algorithm")
|
41
|
-
@ConfigDefault("\"SHA-256\"")
|
42
|
-
Optional<String> getAlgorithm();
|
43
|
-
|
44
|
-
@Config("new_name")
|
45
|
-
@ConfigDefault("null")
|
46
|
-
Optional<String> getNewName();
|
47
|
-
}
|
48
|
-
|
49
|
-
@Override
|
50
|
-
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
|
51
|
-
|
52
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
53
|
-
Map<String, HashColumn> hashColumnMap = convertHashColumnListToMap(task.getColumns());
|
54
|
-
|
55
|
-
Schema.Builder builder = Schema.builder();
|
56
|
-
for (Column column : inputSchema.getColumns()) {
|
57
|
-
|
58
|
-
HashColumn hashColumn = hashColumnMap.get(column.getName());
|
59
|
-
|
60
|
-
if (hashColumn != null) {
|
61
|
-
builder.add(hashColumn.getNewName().or(column.getName()), Types.STRING);
|
62
|
-
} else {
|
63
|
-
builder.add(column.getName(), column.getType());
|
64
|
-
}
|
65
|
-
}
|
66
|
-
control.run(task.dump(), builder.build());
|
67
|
-
}
|
68
|
-
|
69
|
-
@Override
|
70
|
-
public PageOutput open(final TaskSource taskSource, final Schema inputSchema,
|
71
|
-
final Schema outputSchema, final PageOutput output) {
|
72
|
-
|
73
|
-
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
74
|
-
final Map<String, HashColumn> hashColumnMap = convertHashColumnListToMap(task.getColumns());
|
75
|
-
final Map<String, Column> outputColumnMap = convertColumnListToMap(outputSchema.getColumns());
|
76
|
-
|
77
|
-
return new PageOutput() {
|
78
|
-
private PageReader reader = new PageReader(inputSchema);
|
79
|
-
private PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
80
|
-
|
81
|
-
@Override
|
82
|
-
public void add(Page page) {
|
83
|
-
reader.setPage(page);
|
84
|
-
while (reader.nextRecord()) {
|
85
|
-
setValue();
|
86
|
-
builder.addRecord();
|
87
|
-
}
|
88
|
-
}
|
89
|
-
|
90
|
-
private void setValue() {
|
91
|
-
for (Column inputColumn : inputSchema.getColumns()) {
|
92
|
-
if (reader.isNull(inputColumn)) {
|
93
|
-
builder.setNull(inputColumn);
|
94
|
-
continue;
|
95
|
-
}
|
96
|
-
|
97
|
-
// Write the original data
|
98
|
-
Object inputValue;
|
99
|
-
if (Types.STRING.equals(inputColumn.getType())) {
|
100
|
-
final String value = reader.getString(inputColumn);
|
101
|
-
inputValue = value;
|
102
|
-
builder.setString(inputColumn, value);
|
103
|
-
} else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
104
|
-
final boolean value = reader.getBoolean(inputColumn);
|
105
|
-
inputValue = value;
|
106
|
-
builder.setBoolean(inputColumn, value);
|
107
|
-
} else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
108
|
-
final double value = reader.getDouble(inputColumn);
|
109
|
-
inputValue = value;
|
110
|
-
builder.setDouble(inputColumn, value);
|
111
|
-
} else if (Types.LONG.equals(inputColumn.getType())) {
|
112
|
-
final long value = reader.getLong(inputColumn);
|
113
|
-
inputValue = value;
|
114
|
-
builder.setLong(inputColumn, value);
|
115
|
-
} else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
116
|
-
final Timestamp value = reader.getTimestamp(inputColumn);
|
117
|
-
inputValue = value;
|
118
|
-
builder.setTimestamp(inputColumn, value);
|
119
|
-
} else if (Types.JSON.equals(inputColumn.getType())) {
|
120
|
-
final Value value = reader.getJson(inputColumn);
|
121
|
-
inputValue = value;
|
122
|
-
builder.setJson(inputColumn, value);
|
123
|
-
} else {
|
124
|
-
throw new DataException("Unexpected type:" + inputColumn.getType());
|
125
|
-
}
|
126
|
-
|
127
|
-
// Overwrite the column if it's hash column.
|
128
|
-
HashColumn hashColumn = hashColumnMap.get(inputColumn.getName());
|
129
|
-
if (hashColumn != null) {
|
130
|
-
final Column outputColumn = outputColumnMap.get(hashColumn.getNewName().or(inputColumn.getName()));
|
131
|
-
final String hashedValue = generateHash(inputValue.toString(), hashColumn.getAlgorithm().get());
|
132
|
-
builder.setString(outputColumn, hashedValue);
|
133
|
-
}
|
134
|
-
}
|
135
|
-
}
|
136
|
-
|
137
|
-
private String generateHash(String value, String algorithm) {
|
138
|
-
String result = null;
|
139
|
-
try {
|
140
|
-
MessageDigest md = MessageDigest.getInstance(algorithm);
|
141
|
-
md.update(value.getBytes());
|
142
|
-
|
143
|
-
StringBuilder sb = new StringBuilder();
|
144
|
-
for (byte b : md.digest()) {
|
145
|
-
sb.append(String.format("%02x", b));
|
146
|
-
}
|
147
|
-
result = sb.toString();
|
148
|
-
} catch (NoSuchAlgorithmException e) {
|
149
|
-
Throwables.propagate(e);
|
150
|
-
}
|
151
|
-
return result;
|
152
|
-
}
|
153
|
-
|
154
|
-
@Override
|
155
|
-
public void finish() {
|
156
|
-
builder.finish();
|
157
|
-
}
|
158
|
-
|
159
|
-
@Override
|
160
|
-
public void close() {
|
161
|
-
builder.close();
|
162
|
-
}
|
163
|
-
};
|
164
|
-
}
|
165
|
-
|
166
|
-
private static Map<String, HashColumn> convertHashColumnListToMap(List<HashColumn> hashColumns) {
|
167
|
-
Map<String, HashColumn> result = new HashMap<>();
|
168
|
-
for (HashColumn hashColumn : hashColumns) {
|
169
|
-
result.put(hashColumn.getName(), hashColumn);
|
170
|
-
}
|
171
|
-
return result;
|
172
|
-
}
|
173
|
-
|
174
|
-
private static Map<String, Column> convertColumnListToMap(List<Column> columns) {
|
175
|
-
Map<String, Column> result = new HashMap<>();
|
176
|
-
for (Column column : columns) {
|
177
|
-
result.put(column.getName(), column);
|
178
|
-
}
|
179
|
-
return result;
|
180
|
-
}
|
181
|
-
}
|
@@ -1,90 +0,0 @@
|
|
1
|
-
package org.embulk.filter.hash;
|
2
|
-
|
3
|
-
import org.embulk.config.ConfigSource;
|
4
|
-
import org.embulk.spi.FilterPlugin;
|
5
|
-
import org.embulk.test.ExtendedTestingEmbulk;
|
6
|
-
import org.junit.Rule;
|
7
|
-
import org.junit.Test;
|
8
|
-
|
9
|
-
import java.util.Arrays;
|
10
|
-
import java.util.Collections;
|
11
|
-
|
12
|
-
import static org.embulk.spi.type.Types.STRING;
|
13
|
-
import static org.embulk.test.TestOutputPlugin.assertRecords;
|
14
|
-
import static org.embulk.test.TestOutputPlugin.assertSchema;
|
15
|
-
import static org.embulk.test.Utils.column;
|
16
|
-
import static org.embulk.test.Utils.record;
|
17
|
-
|
18
|
-
public class TestHashFilterPlugin {
|
19
|
-
@Rule
|
20
|
-
public ExtendedTestingEmbulk embulk = (ExtendedTestingEmbulk) ExtendedTestingEmbulk
|
21
|
-
.builder()
|
22
|
-
.registerPlugin(FilterPlugin.class, "hash", HashFilterPlugin.class)
|
23
|
-
.build();
|
24
|
-
|
25
|
-
@Test
|
26
|
-
public void specifiedColumnIsHashedAndRenamed() {
|
27
|
-
final String inConfigPath = "yaml/input_basic.yml";
|
28
|
-
|
29
|
-
ConfigSource config = embulk.newConfig()
|
30
|
-
.set("type", "hash")
|
31
|
-
.set("columns", Collections.singletonList(
|
32
|
-
config().set("name", "age").set("algorithm", "MD5").set("new_name", "hashed_age")
|
33
|
-
)
|
34
|
-
);
|
35
|
-
|
36
|
-
embulk.runFilter(config, inConfigPath);
|
37
|
-
|
38
|
-
assertSchema(
|
39
|
-
column("username", STRING),
|
40
|
-
column("hashed_age", STRING)
|
41
|
-
);
|
42
|
-
|
43
|
-
assertRecords(
|
44
|
-
record("user1", "98f13708210194c475687be6106a3b84")
|
45
|
-
);
|
46
|
-
}
|
47
|
-
|
48
|
-
@Test
|
49
|
-
public void allColumnTypesAreHashed() {
|
50
|
-
final String inConfigPath = "yaml/input_column_types.yml";
|
51
|
-
|
52
|
-
ConfigSource config = embulk.newConfig()
|
53
|
-
.set("type", "hash")
|
54
|
-
.set("columns", Arrays.asList(
|
55
|
-
config().set("name", "username"),
|
56
|
-
config().set("name", "age"),
|
57
|
-
config().set("name", "weight"),
|
58
|
-
config().set("name", "active"),
|
59
|
-
config().set("name", "created_at"),
|
60
|
-
config().set("name", "options")
|
61
|
-
)
|
62
|
-
);
|
63
|
-
|
64
|
-
embulk.runFilter(config, inConfigPath);
|
65
|
-
|
66
|
-
assertSchema(
|
67
|
-
column("username", STRING),
|
68
|
-
column("age", STRING),
|
69
|
-
column("weight", STRING),
|
70
|
-
column("active", STRING),
|
71
|
-
column("created_at", STRING),
|
72
|
-
column("options", STRING)
|
73
|
-
);
|
74
|
-
|
75
|
-
assertRecords(
|
76
|
-
record(
|
77
|
-
"0a041b9462caa4a31bac3567e0b6e6fd9100787db2ab433d96f6d178cabfce90",
|
78
|
-
"6f4b6612125fb3a0daecd2799dfd6c9c299424fd920f9b308110a2c1fbd8f443",
|
79
|
-
"70822ecbef5bee37d162492107a3127fc0a4de0564f34ce92713a7baaeb582b0",
|
80
|
-
"b5bea41b6c623f7c09f1bf24dcae58ebab3c0cdd90ad966bc43a45b44867e12b",
|
81
|
-
"9673fe7b67d880e2c9071428c63f6e1bea9dde98283297277a20b92ea0acdc72",
|
82
|
-
"3ff0e331ca59a2a1194bac0e36359ed4540a97383e1cdf6eb95c7de9309143fc"
|
83
|
-
)
|
84
|
-
);
|
85
|
-
}
|
86
|
-
|
87
|
-
private ConfigSource config() {
|
88
|
-
return embulk.newConfig();
|
89
|
-
}
|
90
|
-
}
|