embulk-filter-hash 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 681859abe9dc6462e1ca8bfc9d32da28b11be9d4
4
- data.tar.gz: 364021a86b7841541741d68ad0dfa2c4a42e196a
3
+ metadata.gz: df57d10cff5411c1b2d482c912fd2b2d352919ba
4
+ data.tar.gz: ef0fe81747abaa517010653c6d509c8c1b2af846
5
5
  SHA512:
6
- metadata.gz: f0cb9f3e5b8fd629e6ab69940a605ff5c2631494d3b4f63968f602f66225c5596f3016c30bbf108017c83491b1421a28babb21509657efce9fe5696c522a1c09
7
- data.tar.gz: 3229e2f0fdff0a6bd9617c8a977d2473c64a3d4e11f341d48ef41a8c69a13d6ae25bfe46b09cd1244675527517281dff74e7b03dc6132685e6d167380581188a
6
+ metadata.gz: 315b799b9893dbae9c60abd9614d7efdc2978e67a4e3c3969d5b98efd441166cfbfdb655017d8aa648166d9fd2b794f041378653e30b8bfd3642d9a9ebac5c4c
7
+ data.tar.gz: dfda22d8e8403757d2e7865f243b92aae836b73a6c62fb3df2642d9eceaaded297a56bbd78b829f8ae103e60207c3c3d14463e7a57e2d42832437c8e843ca080
data/.gitignore CHANGED
@@ -10,3 +10,4 @@ build/
10
10
  /.metadata/
11
11
  .classpath
12
12
  .project
13
+ *.iml
data/README.md CHANGED
@@ -12,8 +12,9 @@ Embulk filter plugin to convert an input to a hash value.
12
12
 
13
13
  - **columns**: Columns to hash (array, required)
14
14
  - **name**: Name of input column (string, required)
15
- - **algorithm**: A hash algorithm. [See also](#hash_algorithm) (string, default:`"SHA-256"`)
16
- - **new_name**: New column name if you want to rename (string, default: `null`)
15
+ - **algorithm**: Hash algorithm. [See also](#hash_algorithm) (string, default:`"SHA-256"`)
16
+ - **secret_key**: Secret key for HMAC hashing. (string, required when specifying HMAC algorithm)
17
+ - **new_name**: New column name if you want to rename the column (string, default: `null`)
17
18
 
18
19
  ## Example
19
20
 
@@ -23,22 +24,22 @@ filters:
23
24
  columns:
24
25
  - { name: username }
25
26
  - { name: email, algorithm: SHA-512, new_name: hashed_email }
27
+ - { name: phone_number, algorithm: HmacSHA256, secret_key: passw0rd }
26
28
  ```
27
29
 
28
30
  ## Hash Algorithm
29
31
  <a name ="hash_algorithm">
30
32
 
31
- This plugin uses [MessageDigest](https://docs.oracle.com/javase/7/docs/api/java/security/MessageDigest.html) for hashing.
32
- Every implementation of the Java platform supports the following MessageDigest algorithms:
33
- - MD5
34
- - SHA-1
35
- - SHA-256
36
-
33
+ You can choose either of [MessageDigest](https://docs.oracle.com/javase/8/docs/api/java/security/MessageDigest.html) algorithm or [HMAC](https://docs.oracle.com/javase/8/docs/api/javax/crypto/Mac.html) algorithm.
37
34
  If you want to know all algorithms that your platform supports, run the following snippet.
35
+
38
36
  ```java
39
37
  for (String algorithm : java.security.Security.getAlgorithms("MessageDigest")) {
40
38
  System.out.println(algorithm);
41
39
  }
40
+ for (String algorithm : java.security.Security.getAlgorithms("Mac")) {
41
+ System.out.println(algorithm);
42
+ }
42
43
  ```
43
44
 
44
45
  ## Build
@@ -1,24 +1,21 @@
1
1
  buildscript {
2
- ext.kotlinVersion = '1.2.31'
3
2
  repositories {
4
- mavenCentral()
5
3
  jcenter()
6
4
  maven { url 'http://kamatama41.github.com/maven-repository/repository' }
7
5
  }
8
6
  dependencies {
9
- classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlinVersion"
10
- classpath "com.github.kamatama41:gradle-embulk-plugin:0.1.4"
7
+ classpath "com.github.kamatama41:gradle-embulk-plugin:0.3.0"
11
8
  classpath "net.researchgate:gradle-release:2.5.0"
12
9
  }
13
10
  }
14
11
 
15
- apply plugin: "kotlin"
12
+ plugins {
13
+ id "org.jetbrains.kotlin.jvm" version "1.2.31"
14
+ }
16
15
  apply plugin: "com.github.kamatama41.embulk"
17
16
  apply plugin: 'net.researchgate.release'
18
17
 
19
18
  repositories {
20
- mavenCentral()
21
- jcenter()
22
19
  maven { url 'http://kamatama41.github.com/maven-repository/repository' }
23
20
  }
24
21
 
@@ -26,12 +23,12 @@ sourceCompatibility = 1.8
26
23
  targetCompatibility = 1.8
27
24
 
28
25
  dependencies {
29
- compile "org.jetbrains.kotlin:kotlin-stdlib:$kotlinVersion"
30
- testCompile "com.github.kamatama41:embulk-test-helpers:0.4.0"
26
+ compile "org.jetbrains.kotlin:kotlin-stdlib"
27
+ testCompile "com.github.kamatama41:embulk-test-helpers:0.5.0"
31
28
  }
32
29
 
33
30
  embulk {
34
- version = "0.9.7"
31
+ version = "0.9.9"
35
32
  category = "filter"
36
33
  name = "hash"
37
34
  authors = ["Shinichi Ishimura"]
data/circle.yml CHANGED
@@ -1,23 +1,44 @@
1
- machine:
2
- java:
3
- version: oraclejdk8
1
+ version: 2
2
+ jobs:
3
+ build:
4
+ docker:
5
+ - image: openjdk:8
6
+ working_directory: ~/embulk-filter-hash
7
+ steps:
8
+ - checkout
4
9
 
5
- dependencies:
6
- pre:
7
- - git config --global user.email "shiketaudonko41@gmail.com"
8
- - git config --global user.name "kamatama41"
9
- override:
10
- - ./gradlew dependencies
10
+ - restore_cache:
11
+ keys:
12
+ - v1-gradle-{{ checksum "build.gradle" }}
13
+ - v1-gradle-
14
+ - run: ./gradlew checkstyle
15
+ - run: ./gradlew check --info
16
+ - run:
17
+ name: Save test results
18
+ command: |
19
+ mkdir -p ~/junit/
20
+ find . -type f -regex ".*/build/test-results/.*xml" -exec cp {} ~/junit/ \;
21
+ when: always
22
+ - store_test_results:
23
+ path: ~/junit
24
+ - store_artifacts:
25
+ path: ~/junit
11
26
 
12
- test:
13
- override:
14
- - ./gradlew check --stacktrace
27
+ - save_cache:
28
+ paths:
29
+ - "~/.gradle"
30
+ - "~/.m2"
31
+ key: v1-gradle-{{ checksum "build.gradle" }}
15
32
 
16
- deployment:
17
- release:
18
- branch: release
19
- commands:
20
- - curl -f -u $RUBYGEMS_USER:$RUBYGEMS_PASSWORD https://rubygems.org/api/v1/api_key.yaml > ~/.gem/credentials; chmod 0600 ~/.gem/credentials
21
- - git checkout master
22
- - git reset --hard origin/master
23
- - ./gradlew release -Prelease.useAutomaticVersion=true
33
+ - deploy:
34
+ name: Push Gem to RubyGems.org and bump up
35
+ command: |
36
+ if [ "${CIRCLE_BRANCH}" == "release" ]; then
37
+ mkdir -p ~/.gem
38
+ curl -f -u $RUBYGEMS_USER:$RUBYGEMS_PASSWORD https://rubygems.org/api/v1/api_key.yaml > ~/.gem/credentials; chmod 0600 ~/.gem/credentials
39
+ git config --global user.email "shiketaudonko41@gmail.com"
40
+ git config --global user.name "kamatama41"
41
+ git checkout master
42
+ git reset --hard origin/master
43
+ ./gradlew release -Prelease.useAutomaticVersion=true
44
+ fi
@@ -1 +1 @@
1
- version=0.4.0
1
+ version=0.5.0
@@ -1,6 +1,6 @@
1
- #Sun Jan 29 22:41:06 JST 2017
1
+ #Sat Apr 28 09:20:37 JST 2018
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-3.3-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-4.5.1-all.zip
@@ -1,11 +1,7 @@
1
1
  package org.embulk.filter.hash
2
2
 
3
3
  import com.google.common.base.Optional
4
- import org.embulk.config.Config
5
- import org.embulk.config.ConfigDefault
6
- import org.embulk.config.ConfigSource
7
- import org.embulk.config.Task
8
- import org.embulk.config.TaskSource
4
+ import org.embulk.config.*
9
5
  import org.embulk.spi.Column
10
6
  import org.embulk.spi.DataException
11
7
  import org.embulk.spi.Exec
@@ -17,6 +13,9 @@ import org.embulk.spi.PageReader
17
13
  import org.embulk.spi.Schema
18
14
  import org.embulk.spi.type.Types
19
15
  import java.security.MessageDigest
16
+ import java.util.Locale
17
+ import javax.crypto.Mac
18
+ import javax.crypto.spec.SecretKeySpec
20
19
 
21
20
  class HashFilterPlugin : FilterPlugin {
22
21
  interface PluginTask : Task {
@@ -32,6 +31,10 @@ class HashFilterPlugin : FilterPlugin {
32
31
  @get:ConfigDefault("\"SHA-256\"")
33
32
  val algorithm: Optional<String>
34
33
 
34
+ @get:Config("secret_key")
35
+ @get:ConfigDefault("null")
36
+ val secretKey: Optional<String>
37
+
35
38
  @get:Config("new_name")
36
39
  @get:ConfigDefault("null")
37
40
  val newName: Optional<String>
@@ -45,6 +48,8 @@ class HashFilterPlugin : FilterPlugin {
45
48
  inputSchema.columns.forEach { column ->
46
49
  val hashColumn = hashColumnMap[column.name]
47
50
  if (hashColumn != null) {
51
+ // Check algorithm is valid
52
+ getAlgorithmType(hashColumn.algorithm.get()).validate(hashColumn)
48
53
  builder.add(hashColumn.newName.or(column.name), Types.STRING)
49
54
  } else {
50
55
  builder.add(column.name, column.type)
@@ -114,7 +119,7 @@ class HashFilterPlugin : FilterPlugin {
114
119
  hashColumnMap[inputColumn.name]?.let { hashColumn ->
115
120
  // Write hashed value if it's hash column.
116
121
  val outputColumn = outputColumnMap[hashColumn.newName.or(inputColumn.name)]
117
- val hashedValue = generateHash(inputValue.toString(), hashColumn.algorithm.get())
122
+ val hashedValue = generateHash(inputValue.toString(), hashColumn)
118
123
  builder.setString(outputColumn, hashedValue)
119
124
  } ?: run {
120
125
  // Write the original data
@@ -122,10 +127,8 @@ class HashFilterPlugin : FilterPlugin {
122
127
  }
123
128
  }
124
129
 
125
- private fun generateHash(value: String, algorithm: String): String {
126
- val md = MessageDigest.getInstance(algorithm)
127
- md.update(value.toByteArray())
128
- return md.digest().joinToString("") { "%02x".format(it) }
130
+ private fun generateHash(value: String, config: HashColumn): String {
131
+ return getAlgorithmType(config.algorithm.get()).generateHash(value, config)
129
132
  }
130
133
 
131
134
  override fun finish() {
@@ -145,4 +148,54 @@ class HashFilterPlugin : FilterPlugin {
145
148
  private fun convertColumnListToMap(columns: List<Column>?): Map<String, Column> {
146
149
  return columns!!.associate { Pair(it.name, it) }
147
150
  }
151
+
152
+ private fun getAlgorithmType(algorithm: String): AlgorithmType {
153
+ return when {
154
+ MD_ALGORITHMS.contains(algorithm.toUpperCase(Locale.ENGLISH)) -> {
155
+ AlgorithmType.MESSAGE_DIGEST
156
+ }
157
+ MAC_ALGORITHMS.contains(algorithm.toUpperCase(Locale.ENGLISH)) -> {
158
+ AlgorithmType.MAC
159
+ }
160
+ else -> throw ConfigException("No such algorithm: $algorithm")
161
+ }
162
+ }
163
+
164
+ enum class AlgorithmType {
165
+ MESSAGE_DIGEST {
166
+ override fun validate(config: HashColumn) {}
167
+
168
+ override fun generateHash(value: String, config: HashColumn): String {
169
+ val algorithm = config.algorithm.get()
170
+ return MessageDigest.getInstance(algorithm).run {
171
+ update(value.toByteArray())
172
+ digest().joinToString("") { "%02x".format(it) }
173
+ }
174
+ }
175
+ },
176
+ MAC {
177
+ override fun validate(config: HashColumn) {
178
+ if (!config.secretKey.isPresent) {
179
+ throw ConfigException("Secret key must not be null.")
180
+ }
181
+ }
182
+
183
+ override fun generateHash(value: String, config: HashColumn): String {
184
+ val secretKey = config.secretKey.get()
185
+ val algorithm = config.algorithm.get()
186
+ return Mac.getInstance(algorithm).run {
187
+ init(SecretKeySpec(secretKey.toByteArray(), algorithm))
188
+ doFinal(value.toByteArray()).joinToString("") { "%02x".format(it) }
189
+ }
190
+ }
191
+ };
192
+
193
+ abstract fun validate(config: HashColumn)
194
+ abstract fun generateHash(value: String, config: HashColumn): String
195
+ }
196
+
197
+ companion object {
198
+ val MD_ALGORITHMS = java.security.Security.getAlgorithms("MessageDigest") ?: emptySet<String>()
199
+ val MAC_ALGORITHMS = java.security.Security.getAlgorithms("Mac") ?: emptySet<String>()
200
+ }
148
201
  }
@@ -1,5 +1,7 @@
1
1
  package org.embulk.filter.hash
2
2
 
3
+ import org.embulk.config.ConfigException
4
+ import org.embulk.exec.PartialExecutionException
3
5
  import org.embulk.test.EmbulkPluginTest
4
6
  import org.junit.Test
5
7
 
@@ -9,6 +11,10 @@ import org.embulk.test.TestOutputPlugin.Matcher.assertSchema
9
11
  import org.embulk.test.record
10
12
  import org.embulk.test.registerPlugins
11
13
  import org.embulk.test.set
14
+ import org.hamcrest.Matchers.`is`
15
+ import org.hamcrest.Matchers.instanceOf
16
+ import org.junit.Assert.assertThat
17
+ import org.junit.Assert.fail
12
18
  import org.junit.Before
13
19
 
14
20
  class TestHashFilterPlugin : EmbulkPluginTest() {
@@ -16,7 +22,8 @@ class TestHashFilterPlugin : EmbulkPluginTest() {
16
22
  builder.registerPlugins(HashFilterPlugin::class)
17
23
  }
18
24
 
19
- @Test fun specifiedColumnIsHashedAndRenamed() {
25
+ @Test
26
+ fun specifiedColumnIsHashedAndRenamed() {
20
27
  val config = config().set(
21
28
  "type" to "hash",
22
29
  "columns" to listOf(config().set(
@@ -37,7 +44,8 @@ class TestHashFilterPlugin : EmbulkPluginTest() {
37
44
  )
38
45
  }
39
46
 
40
- @Test fun allColumnTypesAreHashed() {
47
+ @Test
48
+ fun allColumnTypesAreHashed() {
41
49
  val config = config().set(
42
50
  "type" to "hash",
43
51
  "columns" to listOf(
@@ -71,7 +79,65 @@ class TestHashFilterPlugin : EmbulkPluginTest() {
71
79
  )
72
80
  }
73
81
 
74
- @Test fun columnIsNull() {
82
+ @Test
83
+ fun specifiedColumnIsHashedByMac() {
84
+ val config = config().set(
85
+ "type" to "hash",
86
+ "columns" to listOf(config().set(
87
+ "name" to "age",
88
+ "algorithm" to "HmacSHA256",
89
+ "secret_key" to "passw0rd",
90
+ "new_name" to "hashed_age"
91
+ )))
92
+
93
+ runFilter(config, inConfigPath = "yaml/input_basic.yml")
94
+
95
+ assertSchema(
96
+ "username" to STRING,
97
+ "hashed_age" to STRING
98
+ )
99
+
100
+ assertRecords(
101
+ record("user1", "5f9959eac71ad30782ebf4d3c98d12a4c33eadee156a6c5d3881204030811989")
102
+ )
103
+ }
104
+
105
+ @Test
106
+ fun exceptionThrownWithInvalidAlgorithm() {
107
+ try {
108
+ val config = config().set(
109
+ "type" to "hash",
110
+ "columns" to listOf(config().set(
111
+ "name" to "age",
112
+ "algorithm" to "Foo"
113
+ )))
114
+ runFilter(config, inConfigPath = "yaml/input_basic.yml")
115
+ fail("No exception")
116
+ } catch (e: PartialExecutionException) {
117
+ assertThat(e.cause, instanceOf(ConfigException::class.java))
118
+ assertThat(e.cause?.message, `is`("No such algorithm: Foo"))
119
+ }
120
+ }
121
+
122
+ @Test
123
+ fun exceptionThrownWithMacAndNoSecretKey() {
124
+ try {
125
+ val config = config().set(
126
+ "type" to "hash",
127
+ "columns" to listOf(config().set(
128
+ "name" to "age",
129
+ "algorithm" to "HmacSHA256"
130
+ )))
131
+ runFilter(config, inConfigPath = "yaml/input_basic.yml")
132
+ fail("No exception")
133
+ } catch (e: PartialExecutionException) {
134
+ assertThat(e.cause, instanceOf(ConfigException::class.java))
135
+ assertThat(e.cause?.message, `is`("Secret key must not be null."))
136
+ }
137
+ }
138
+
139
+ @Test
140
+ fun columnIsNull() {
75
141
  val config = config().set(
76
142
  "type" to "hash",
77
143
  "columns" to listOf(
@@ -1,8 +1,6 @@
1
- type: test
2
- data:
3
- - user1,20
4
- parser:
5
- type: csv
6
- columns:
7
- - {name: username, type: string}
8
- - {name: age, type: long}
1
+ type: config
2
+ columns:
3
+ - {name: username, type: string}
4
+ - {name: age, type: long}
5
+ values:
6
+ - - [user1,20]
@@ -1,13 +1,10 @@
1
- type: test
2
- data:
3
- - >-
4
- user1,21,65.4,true,2016-03-27 01:23:45,{"a":{"b":"c"}}"
5
- parser:
6
- type: csv
7
- columns:
8
- - {name: username, type: string}
9
- - {name: age, type: long}
10
- - {name: weight, type: double}
11
- - {name: active, type: boolean}
12
- - {name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
13
- - {name: options, type: json}
1
+ type: config
2
+ columns:
3
+ - {name: username, type: string}
4
+ - {name: age, type: long}
5
+ - {name: weight, type: double}
6
+ - {name: active, type: boolean}
7
+ - {name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
8
+ - {name: options, type: json}
9
+ values:
10
+ - - [user1,21,65.4,true,"2016-03-27 01:23:45",{"a":{"b":"c"}}]
@@ -1,9 +1,6 @@
1
- type: test
2
- data:
3
- - null,20
4
- parser:
5
- type: csv
6
- null_string: 'null'
7
- columns:
8
- - {name: username, type: string}
9
- - {name: age, type: long}
1
+ type: config
2
+ columns:
3
+ - {name: username, type: string}
4
+ - {name: age, type: long}
5
+ values:
6
+ - - [null,20]
metadata CHANGED
@@ -1,19 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-hash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shinichi Ishimura
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-19 00:00:00.000000000 Z
11
+ date: 2018-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - ~>
16
+ - - "~>"
17
17
  - !ruby/object:Gem::Version
18
18
  version: '1.0'
19
19
  name: bundler
@@ -21,13 +21,13 @@ dependencies:
21
21
  type: :development
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
- - - '>='
30
+ - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: '10.0'
33
33
  name: rake
@@ -35,7 +35,7 @@ dependencies:
35
35
  type: :development
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  description: Embulk filter plugin to convert an input to a hash value.
@@ -45,11 +45,14 @@ executables: []
45
45
  extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
- - .gitignore
48
+ - ".gitignore"
49
49
  - LICENSE.txt
50
50
  - README.md
51
51
  - build.gradle
52
52
  - circle.yml
53
+ - classpath/annotations-13.0.jar
54
+ - classpath/embulk-filter-hash-0.5.0.jar
55
+ - classpath/kotlin-stdlib-1.2.31.jar
53
56
  - gradle.properties
54
57
  - gradle/wrapper/gradle-wrapper.jar
55
58
  - gradle/wrapper/gradle-wrapper.properties
@@ -62,9 +65,6 @@ files:
62
65
  - src/test/resources/yaml/input_basic.yml
63
66
  - src/test/resources/yaml/input_column_types.yml
64
67
  - src/test/resources/yaml/input_null_column.yml
65
- - classpath/kotlin-stdlib-1.2.31.jar
66
- - classpath/annotations-13.0.jar
67
- - classpath/embulk-filter-hash-0.4.0.jar
68
68
  homepage: https://github.com/kamatama41/embulk-filter-hash
69
69
  licenses:
70
70
  - MIT
@@ -75,17 +75,17 @@ require_paths:
75
75
  - lib
76
76
  required_ruby_version: !ruby/object:Gem::Requirement
77
77
  requirements:
78
- - - '>='
78
+ - - ">="
79
79
  - !ruby/object:Gem::Version
80
80
  version: '0'
81
81
  required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  requirements:
83
- - - '>='
83
+ - - ">="
84
84
  - !ruby/object:Gem::Version
85
85
  version: '0'
86
86
  requirements: []
87
87
  rubyforge_project:
88
- rubygems_version: 2.1.9
88
+ rubygems_version: 2.6.8
89
89
  signing_key:
90
90
  specification_version: 4
91
91
  summary: Hash filter plugin for Embulk