embulk-filter-hash 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 681859abe9dc6462e1ca8bfc9d32da28b11be9d4
4
- data.tar.gz: 364021a86b7841541741d68ad0dfa2c4a42e196a
3
+ metadata.gz: df57d10cff5411c1b2d482c912fd2b2d352919ba
4
+ data.tar.gz: ef0fe81747abaa517010653c6d509c8c1b2af846
5
5
  SHA512:
6
- metadata.gz: f0cb9f3e5b8fd629e6ab69940a605ff5c2631494d3b4f63968f602f66225c5596f3016c30bbf108017c83491b1421a28babb21509657efce9fe5696c522a1c09
7
- data.tar.gz: 3229e2f0fdff0a6bd9617c8a977d2473c64a3d4e11f341d48ef41a8c69a13d6ae25bfe46b09cd1244675527517281dff74e7b03dc6132685e6d167380581188a
6
+ metadata.gz: 315b799b9893dbae9c60abd9614d7efdc2978e67a4e3c3969d5b98efd441166cfbfdb655017d8aa648166d9fd2b794f041378653e30b8bfd3642d9a9ebac5c4c
7
+ data.tar.gz: dfda22d8e8403757d2e7865f243b92aae836b73a6c62fb3df2642d9eceaaded297a56bbd78b829f8ae103e60207c3c3d14463e7a57e2d42832437c8e843ca080
data/.gitignore CHANGED
@@ -10,3 +10,4 @@ build/
10
10
  /.metadata/
11
11
  .classpath
12
12
  .project
13
+ *.iml
data/README.md CHANGED
@@ -12,8 +12,9 @@ Embulk filter plugin to convert an input to a hash value.
12
12
 
13
13
  - **columns**: Columns to hash (array, required)
14
14
  - **name**: Name of input column (string, required)
15
- - **algorithm**: A hash algorithm. [See also](#hash_algorithm) (string, default:`"SHA-256"`)
16
- - **new_name**: New column name if you want to rename (string, default: `null`)
15
+ - **algorithm**: Hash algorithm. [See also](#hash_algorithm) (string, default:`"SHA-256"`)
16
+ - **secret_key**: Secret key for HMAC hashing. (string, required when specifying HMAC algorithm)
17
+ - **new_name**: New column name if you want to rename the column (string, default: `null`)
17
18
 
18
19
  ## Example
19
20
 
@@ -23,22 +24,22 @@ filters:
23
24
  columns:
24
25
  - { name: username }
25
26
  - { name: email, algorithm: SHA-512, new_name: hashed_email }
27
+ - { name: phone_number, algorithm: HmacSHA256, secret_key: passw0rd }
26
28
  ```
27
29
 
28
30
  ## Hash Algorithm
29
31
  <a name ="hash_algorithm">
30
32
 
31
- This plugin uses [MessageDigest](https://docs.oracle.com/javase/7/docs/api/java/security/MessageDigest.html) for hashing.
32
- Every implementation of the Java platform supports the following MessageDigest algorithms:
33
- - MD5
34
- - SHA-1
35
- - SHA-256
36
-
33
+ You can choose either of [MessageDigest](https://docs.oracle.com/javase/8/docs/api/java/security/MessageDigest.html) algorithm or [HMAC](https://docs.oracle.com/javase/8/docs/api/javax/crypto/Mac.html) algorithm.
37
34
  If you want to know all algorithms that your platform supports, run the following snippet.
35
+
38
36
  ```java
39
37
  for (String algorithm : java.security.Security.getAlgorithms("MessageDigest")) {
40
38
  System.out.println(algorithm);
41
39
  }
40
+ for (String algorithm : java.security.Security.getAlgorithms("Mac")) {
41
+ System.out.println(algorithm);
42
+ }
42
43
  ```
43
44
 
44
45
  ## Build
@@ -1,24 +1,21 @@
1
1
  buildscript {
2
- ext.kotlinVersion = '1.2.31'
3
2
  repositories {
4
- mavenCentral()
5
3
  jcenter()
6
4
  maven { url 'http://kamatama41.github.com/maven-repository/repository' }
7
5
  }
8
6
  dependencies {
9
- classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlinVersion"
10
- classpath "com.github.kamatama41:gradle-embulk-plugin:0.1.4"
7
+ classpath "com.github.kamatama41:gradle-embulk-plugin:0.3.0"
11
8
  classpath "net.researchgate:gradle-release:2.5.0"
12
9
  }
13
10
  }
14
11
 
15
- apply plugin: "kotlin"
12
+ plugins {
13
+ id "org.jetbrains.kotlin.jvm" version "1.2.31"
14
+ }
16
15
  apply plugin: "com.github.kamatama41.embulk"
17
16
  apply plugin: 'net.researchgate.release'
18
17
 
19
18
  repositories {
20
- mavenCentral()
21
- jcenter()
22
19
  maven { url 'http://kamatama41.github.com/maven-repository/repository' }
23
20
  }
24
21
 
@@ -26,12 +23,12 @@ sourceCompatibility = 1.8
26
23
  targetCompatibility = 1.8
27
24
 
28
25
  dependencies {
29
- compile "org.jetbrains.kotlin:kotlin-stdlib:$kotlinVersion"
30
- testCompile "com.github.kamatama41:embulk-test-helpers:0.4.0"
26
+ compile "org.jetbrains.kotlin:kotlin-stdlib"
27
+ testCompile "com.github.kamatama41:embulk-test-helpers:0.5.0"
31
28
  }
32
29
 
33
30
  embulk {
34
- version = "0.9.7"
31
+ version = "0.9.9"
35
32
  category = "filter"
36
33
  name = "hash"
37
34
  authors = ["Shinichi Ishimura"]
data/circle.yml CHANGED
@@ -1,23 +1,44 @@
1
- machine:
2
- java:
3
- version: oraclejdk8
1
+ version: 2
2
+ jobs:
3
+ build:
4
+ docker:
5
+ - image: openjdk:8
6
+ working_directory: ~/embulk-filter-hash
7
+ steps:
8
+ - checkout
4
9
 
5
- dependencies:
6
- pre:
7
- - git config --global user.email "shiketaudonko41@gmail.com"
8
- - git config --global user.name "kamatama41"
9
- override:
10
- - ./gradlew dependencies
10
+ - restore_cache:
11
+ keys:
12
+ - v1-gradle-{{ checksum "build.gradle" }}
13
+ - v1-gradle-
14
+ - run: ./gradlew checkstyle
15
+ - run: ./gradlew check --info
16
+ - run:
17
+ name: Save test results
18
+ command: |
19
+ mkdir -p ~/junit/
20
+ find . -type f -regex ".*/build/test-results/.*xml" -exec cp {} ~/junit/ \;
21
+ when: always
22
+ - store_test_results:
23
+ path: ~/junit
24
+ - store_artifacts:
25
+ path: ~/junit
11
26
 
12
- test:
13
- override:
14
- - ./gradlew check --stacktrace
27
+ - save_cache:
28
+ paths:
29
+ - "~/.gradle"
30
+ - "~/.m2"
31
+ key: v1-gradle-{{ checksum "build.gradle" }}
15
32
 
16
- deployment:
17
- release:
18
- branch: release
19
- commands:
20
- - curl -f -u $RUBYGEMS_USER:$RUBYGEMS_PASSWORD https://rubygems.org/api/v1/api_key.yaml > ~/.gem/credentials; chmod 0600 ~/.gem/credentials
21
- - git checkout master
22
- - git reset --hard origin/master
23
- - ./gradlew release -Prelease.useAutomaticVersion=true
33
+ - deploy:
34
+ name: Push Gem to RubyGems.org and bump up
35
+ command: |
36
+ if [ "${CIRCLE_BRANCH}" == "release" ]; then
37
+ mkdir -p ~/.gem
38
+ curl -f -u $RUBYGEMS_USER:$RUBYGEMS_PASSWORD https://rubygems.org/api/v1/api_key.yaml > ~/.gem/credentials; chmod 0600 ~/.gem/credentials
39
+ git config --global user.email "shiketaudonko41@gmail.com"
40
+ git config --global user.name "kamatama41"
41
+ git checkout master
42
+ git reset --hard origin/master
43
+ ./gradlew release -Prelease.useAutomaticVersion=true
44
+ fi
@@ -1 +1 @@
1
- version=0.4.0
1
+ version=0.5.0
@@ -1,6 +1,6 @@
1
- #Sun Jan 29 22:41:06 JST 2017
1
+ #Sat Apr 28 09:20:37 JST 2018
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-3.3-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-4.5.1-all.zip
@@ -1,11 +1,7 @@
1
1
  package org.embulk.filter.hash
2
2
 
3
3
  import com.google.common.base.Optional
4
- import org.embulk.config.Config
5
- import org.embulk.config.ConfigDefault
6
- import org.embulk.config.ConfigSource
7
- import org.embulk.config.Task
8
- import org.embulk.config.TaskSource
4
+ import org.embulk.config.*
9
5
  import org.embulk.spi.Column
10
6
  import org.embulk.spi.DataException
11
7
  import org.embulk.spi.Exec
@@ -17,6 +13,9 @@ import org.embulk.spi.PageReader
17
13
  import org.embulk.spi.Schema
18
14
  import org.embulk.spi.type.Types
19
15
  import java.security.MessageDigest
16
+ import java.util.Locale
17
+ import javax.crypto.Mac
18
+ import javax.crypto.spec.SecretKeySpec
20
19
 
21
20
  class HashFilterPlugin : FilterPlugin {
22
21
  interface PluginTask : Task {
@@ -32,6 +31,10 @@ class HashFilterPlugin : FilterPlugin {
32
31
  @get:ConfigDefault("\"SHA-256\"")
33
32
  val algorithm: Optional<String>
34
33
 
34
+ @get:Config("secret_key")
35
+ @get:ConfigDefault("null")
36
+ val secretKey: Optional<String>
37
+
35
38
  @get:Config("new_name")
36
39
  @get:ConfigDefault("null")
37
40
  val newName: Optional<String>
@@ -45,6 +48,8 @@ class HashFilterPlugin : FilterPlugin {
45
48
  inputSchema.columns.forEach { column ->
46
49
  val hashColumn = hashColumnMap[column.name]
47
50
  if (hashColumn != null) {
51
+ // Check algorithm is valid
52
+ getAlgorithmType(hashColumn.algorithm.get()).validate(hashColumn)
48
53
  builder.add(hashColumn.newName.or(column.name), Types.STRING)
49
54
  } else {
50
55
  builder.add(column.name, column.type)
@@ -114,7 +119,7 @@ class HashFilterPlugin : FilterPlugin {
114
119
  hashColumnMap[inputColumn.name]?.let { hashColumn ->
115
120
  // Write hashed value if it's hash column.
116
121
  val outputColumn = outputColumnMap[hashColumn.newName.or(inputColumn.name)]
117
- val hashedValue = generateHash(inputValue.toString(), hashColumn.algorithm.get())
122
+ val hashedValue = generateHash(inputValue.toString(), hashColumn)
118
123
  builder.setString(outputColumn, hashedValue)
119
124
  } ?: run {
120
125
  // Write the original data
@@ -122,10 +127,8 @@ class HashFilterPlugin : FilterPlugin {
122
127
  }
123
128
  }
124
129
 
125
- private fun generateHash(value: String, algorithm: String): String {
126
- val md = MessageDigest.getInstance(algorithm)
127
- md.update(value.toByteArray())
128
- return md.digest().joinToString("") { "%02x".format(it) }
130
+ private fun generateHash(value: String, config: HashColumn): String {
131
+ return getAlgorithmType(config.algorithm.get()).generateHash(value, config)
129
132
  }
130
133
 
131
134
  override fun finish() {
@@ -145,4 +148,54 @@ class HashFilterPlugin : FilterPlugin {
145
148
  private fun convertColumnListToMap(columns: List<Column>?): Map<String, Column> {
146
149
  return columns!!.associate { Pair(it.name, it) }
147
150
  }
151
+
152
+ private fun getAlgorithmType(algorithm: String): AlgorithmType {
153
+ return when {
154
+ MD_ALGORITHMS.contains(algorithm.toUpperCase(Locale.ENGLISH)) -> {
155
+ AlgorithmType.MESSAGE_DIGEST
156
+ }
157
+ MAC_ALGORITHMS.contains(algorithm.toUpperCase(Locale.ENGLISH)) -> {
158
+ AlgorithmType.MAC
159
+ }
160
+ else -> throw ConfigException("No such algorithm: $algorithm")
161
+ }
162
+ }
163
+
164
+ enum class AlgorithmType {
165
+ MESSAGE_DIGEST {
166
+ override fun validate(config: HashColumn) {}
167
+
168
+ override fun generateHash(value: String, config: HashColumn): String {
169
+ val algorithm = config.algorithm.get()
170
+ return MessageDigest.getInstance(algorithm).run {
171
+ update(value.toByteArray())
172
+ digest().joinToString("") { "%02x".format(it) }
173
+ }
174
+ }
175
+ },
176
+ MAC {
177
+ override fun validate(config: HashColumn) {
178
+ if (!config.secretKey.isPresent) {
179
+ throw ConfigException("Secret key must not be null.")
180
+ }
181
+ }
182
+
183
+ override fun generateHash(value: String, config: HashColumn): String {
184
+ val secretKey = config.secretKey.get()
185
+ val algorithm = config.algorithm.get()
186
+ return Mac.getInstance(algorithm).run {
187
+ init(SecretKeySpec(secretKey.toByteArray(), algorithm))
188
+ doFinal(value.toByteArray()).joinToString("") { "%02x".format(it) }
189
+ }
190
+ }
191
+ };
192
+
193
+ abstract fun validate(config: HashColumn)
194
+ abstract fun generateHash(value: String, config: HashColumn): String
195
+ }
196
+
197
+ companion object {
198
+ val MD_ALGORITHMS = java.security.Security.getAlgorithms("MessageDigest") ?: emptySet<String>()
199
+ val MAC_ALGORITHMS = java.security.Security.getAlgorithms("Mac") ?: emptySet<String>()
200
+ }
148
201
  }
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.hash
2
2
 
3
+ import org.embulk.config.ConfigException
4
+ import org.embulk.exec.PartialExecutionException
3
5
  import org.embulk.test.EmbulkPluginTest
4
6
  import org.junit.Test
5
7
 
@@ -9,6 +11,10 @@ import org.embulk.test.TestOutputPlugin.Matcher.assertSchema
9
11
  import org.embulk.test.record
10
12
  import org.embulk.test.registerPlugins
11
13
  import org.embulk.test.set
14
+ import org.hamcrest.Matchers.`is`
15
+ import org.hamcrest.Matchers.instanceOf
16
+ import org.junit.Assert.assertThat
17
+ import org.junit.Assert.fail
12
18
  import org.junit.Before
13
19
 
14
20
  class TestHashFilterPlugin : EmbulkPluginTest() {
@@ -16,7 +22,8 @@ class TestHashFilterPlugin : EmbulkPluginTest() {
16
22
  builder.registerPlugins(HashFilterPlugin::class)
17
23
  }
18
24
 
19
- @Test fun specifiedColumnIsHashedAndRenamed() {
25
+ @Test
26
+ fun specifiedColumnIsHashedAndRenamed() {
20
27
  val config = config().set(
21
28
  "type" to "hash",
22
29
  "columns" to listOf(config().set(
@@ -37,7 +44,8 @@ class TestHashFilterPlugin : EmbulkPluginTest() {
37
44
  )
38
45
  }
39
46
 
40
- @Test fun allColumnTypesAreHashed() {
47
+ @Test
48
+ fun allColumnTypesAreHashed() {
41
49
  val config = config().set(
42
50
  "type" to "hash",
43
51
  "columns" to listOf(
@@ -71,7 +79,65 @@ class TestHashFilterPlugin : EmbulkPluginTest() {
71
79
  )
72
80
  }
73
81
 
74
- @Test fun columnIsNull() {
82
+ @Test
83
+ fun specifiedColumnIsHashedByMac() {
84
+ val config = config().set(
85
+ "type" to "hash",
86
+ "columns" to listOf(config().set(
87
+ "name" to "age",
88
+ "algorithm" to "HmacSHA256",
89
+ "secret_key" to "passw0rd",
90
+ "new_name" to "hashed_age"
91
+ )))
92
+
93
+ runFilter(config, inConfigPath = "yaml/input_basic.yml")
94
+
95
+ assertSchema(
96
+ "username" to STRING,
97
+ "hashed_age" to STRING
98
+ )
99
+
100
+ assertRecords(
101
+ record("user1", "5f9959eac71ad30782ebf4d3c98d12a4c33eadee156a6c5d3881204030811989")
102
+ )
103
+ }
104
+
105
+ @Test
106
+ fun exceptionThrownWithInvalidAlgorithm() {
107
+ try {
108
+ val config = config().set(
109
+ "type" to "hash",
110
+ "columns" to listOf(config().set(
111
+ "name" to "age",
112
+ "algorithm" to "Foo"
113
+ )))
114
+ runFilter(config, inConfigPath = "yaml/input_basic.yml")
115
+ fail("No exception")
116
+ } catch (e: PartialExecutionException) {
117
+ assertThat(e.cause, instanceOf(ConfigException::class.java))
118
+ assertThat(e.cause?.message, `is`("No such algorithm: Foo"))
119
+ }
120
+ }
121
+
122
+ @Test
123
+ fun exceptionThrownWithMacAndNoSecretKey() {
124
+ try {
125
+ val config = config().set(
126
+ "type" to "hash",
127
+ "columns" to listOf(config().set(
128
+ "name" to "age",
129
+ "algorithm" to "HmacSHA256"
130
+ )))
131
+ runFilter(config, inConfigPath = "yaml/input_basic.yml")
132
+ fail("No exception")
133
+ } catch (e: PartialExecutionException) {
134
+ assertThat(e.cause, instanceOf(ConfigException::class.java))
135
+ assertThat(e.cause?.message, `is`("Secret key must not be null."))
136
+ }
137
+ }
138
+
139
+ @Test
140
+ fun columnIsNull() {
75
141
  val config = config().set(
76
142
  "type" to "hash",
77
143
  "columns" to listOf(
@@ -1,8 +1,6 @@
1
- type: test
2
- data:
3
- - user1,20
4
- parser:
5
- type: csv
6
- columns:
7
- - {name: username, type: string}
8
- - {name: age, type: long}
1
+ type: config
2
+ columns:
3
+ - {name: username, type: string}
4
+ - {name: age, type: long}
5
+ values:
6
+ - - [user1,20]
@@ -1,13 +1,10 @@
1
- type: test
2
- data:
3
- - >-
4
- user1,21,65.4,true,2016-03-27 01:23:45,{"a":{"b":"c"}}"
5
- parser:
6
- type: csv
7
- columns:
8
- - {name: username, type: string}
9
- - {name: age, type: long}
10
- - {name: weight, type: double}
11
- - {name: active, type: boolean}
12
- - {name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
13
- - {name: options, type: json}
1
+ type: config
2
+ columns:
3
+ - {name: username, type: string}
4
+ - {name: age, type: long}
5
+ - {name: weight, type: double}
6
+ - {name: active, type: boolean}
7
+ - {name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
8
+ - {name: options, type: json}
9
+ values:
10
+ - - [user1,21,65.4,true,"2016-03-27 01:23:45",{"a":{"b":"c"}}]
@@ -1,9 +1,6 @@
1
- type: test
2
- data:
3
- - null,20
4
- parser:
5
- type: csv
6
- null_string: 'null'
7
- columns:
8
- - {name: username, type: string}
9
- - {name: age, type: long}
1
+ type: config
2
+ columns:
3
+ - {name: username, type: string}
4
+ - {name: age, type: long}
5
+ values:
6
+ - - [null,20]
metadata CHANGED
@@ -1,19 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-hash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shinichi Ishimura
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-19 00:00:00.000000000 Z
11
+ date: 2018-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - ~>
16
+ - - "~>"
17
17
  - !ruby/object:Gem::Version
18
18
  version: '1.0'
19
19
  name: bundler
@@ -21,13 +21,13 @@ dependencies:
21
21
  type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
- - - '>='
30
+ - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: '10.0'
33
33
  name: rake
@@ -35,7 +35,7 @@ dependencies:
35
35
  type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  description: Embulk filter plugin to convert an input to a hash value.
@@ -45,11 +45,14 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
- - .gitignore
48
+ - ".gitignore"
49
49
  - LICENSE.txt
50
50
  - README.md
51
51
  - build.gradle
52
52
  - circle.yml
53
+ - classpath/annotations-13.0.jar
54
+ - classpath/embulk-filter-hash-0.5.0.jar
55
+ - classpath/kotlin-stdlib-1.2.31.jar
53
56
  - gradle.properties
54
57
  - gradle/wrapper/gradle-wrapper.jar
55
58
  - gradle/wrapper/gradle-wrapper.properties
@@ -62,9 +65,6 @@ files:
62
65
  - src/test/resources/yaml/input_basic.yml
63
66
  - src/test/resources/yaml/input_column_types.yml
64
67
  - src/test/resources/yaml/input_null_column.yml
65
- - classpath/kotlin-stdlib-1.2.31.jar
66
- - classpath/annotations-13.0.jar
67
- - classpath/embulk-filter-hash-0.4.0.jar
68
68
  homepage: https://github.com/kamatama41/embulk-filter-hash
69
69
  licenses:
70
70
  - MIT
@@ -75,17 +75,17 @@ require_paths:
75
75
  - lib
76
76
  required_ruby_version: !ruby/object:Gem::Requirement
77
77
  requirements:
78
- - - '>='
78
+ - - ">="
79
79
  - !ruby/object:Gem::Version
80
80
  version: '0'
81
81
  required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  requirements:
83
- - - '>='
83
+ - - ">="
84
84
  - !ruby/object:Gem::Version
85
85
  version: '0'
86
86
  requirements: []
87
87
  rubyforge_project:
88
- rubygems_version: 2.1.9
88
+ rubygems_version: 2.6.8
89
89
  signing_key:
90
90
  specification_version: 4
91
91
  summary: Hash filter plugin for Embulk