embulk-filter-gsub 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fffdfbb5f26bf7c3ee0abf13c355b29080a09336
4
- data.tar.gz: 96e001cd2b5162bd6bf50c7ec239e2134496ca9d
3
+ metadata.gz: f2225445bc1080f16f10ccf3b514f4103f70cf39
4
+ data.tar.gz: 6cb469f097ee14aa27ced7847a9dc787eb0dc211
5
5
  SHA512:
6
- metadata.gz: 7929b95e1cb3aa18b800305a835d385ccd6245fc6eb0421c667a40468936a587d4a8523e889d9c36eead27fa94109556b9ff7e361476016a9f68ab805d654d54
7
- data.tar.gz: e0a1dce998d24320a9c188dd4cb0ea4189bbad9a148be600a2f09f0c084f9251c435208946bc28f6ae12fb244857ebb168da89cba829676eb1fe31a62ebdfb92
6
+ metadata.gz: 7e33ff204e96d633f242a975966b547df3353230a40c292131d2eb609e45494a63b2e3213c7874c2c1f7a389ab8adf26e2a88ca2d6de65e435cbde98d2c3830a
7
+ data.tar.gz: f9818862b57c4f7609ff3f153de35488de1098ea00b6a2a569a9acbdcf5f966ca89c8f7b6ecf6cfa0696b60e69c82e694b38879ab2800d323a44865c3fa84ee9
data/README.md CHANGED
@@ -133,6 +133,26 @@ target_columns:
133
133
  to: "$1 = [$2]"
134
134
  ```
135
135
 
136
+ ### Regular expression options
137
+
138
+ You can specify some regular expression options.
139
+
140
+ ```yaml
141
+ target_columns:
142
+ foo:
143
+ - type: regexp_replace
144
+ pattern: 'foo'
145
+ to: "***"
146
+ regexp_options:
147
+ ignore_case: true
148
+ ```
149
+
150
+ Supported options are:
151
+
152
+ * **ignore_case** (boolean, default: false)
153
+ * **multiline** (boolean, default: true)
154
+ * **dot_matches_all** (boolean, default: false)
155
+ * **enable_comments** (boolean, default: false)
136
156
 
137
157
  ## Build
138
158
 
@@ -1,5 +1,5 @@
1
1
  buildscript {
2
- ext.kotlin_version = '1.2.10'
2
+ ext.kotlin_version = '1.2.21'
3
3
  repositories {
4
4
  mavenCentral()
5
5
  jcenter()
@@ -18,6 +18,7 @@ embulk {
18
18
  version = "0.8.38"
19
19
  category = "filter"
20
20
  name = "gsub"
21
+ description = "Embulk filter plugin to convert text column values with regular expressions"
21
22
  authors = ["Sawada Tadashi"]
22
23
  email = "cesare@mayverse.jp"
23
24
  homepage = "https://github.com/cesare/embulk-filter-gsub"
@@ -1 +1 @@
1
- version=0.1.0
1
+ version=0.2.0
@@ -1,7 +1,6 @@
1
1
  package org.embulk.filter.gsub
2
2
 
3
3
  import org.embulk.filter.gsub.replacer.LowerCaseReplacer
4
- import org.embulk.filter.gsub.replacer.RegexFactory
5
4
  import org.embulk.filter.gsub.replacer.RegexOptionConfig
6
5
  import org.embulk.filter.gsub.replacer.TextReplacer
7
6
 
@@ -9,10 +8,8 @@ class LowerCaseReplacerFactory : TextReplacerFactory() {
9
8
  override fun create(rule: SubstitutionRule): TextReplacer {
10
9
  val pattern = rule.pattern.orNull()
11
10
  if (pattern != null) {
12
- // TODO set regex options
13
- val regexOptionConfig = RegexOptionConfig()
14
11
  val factory = RegexFactory()
15
- val regex = factory.create(pattern, regexOptionConfig)
12
+ val regex = factory.create(pattern, rule.regexOptions)
16
13
 
17
14
  return LowerCaseReplacer(regex)
18
15
  }
@@ -0,0 +1,32 @@
1
+ package org.embulk.filter.gsub
2
+
3
+ import org.embulk.filter.gsub.RegexOptions
4
+
5
+ class RegexFactory {
6
+ fun create(patternString: String, regexOptions: RegexOptions): Regex {
7
+ val options = buildOptions(regexOptions)
8
+ return Regex(patternString, options)
9
+ }
10
+
11
+ private fun buildOptions(regexOptions: RegexOptions): Set<RegexOption> {
12
+ val options = HashSet<RegexOption>()
13
+
14
+ if (regexOptions.ignoreCase) {
15
+ options.add(RegexOption.IGNORE_CASE)
16
+ }
17
+
18
+ if (regexOptions.multiline) {
19
+ options.add(RegexOption.MULTILINE)
20
+ }
21
+
22
+ if (regexOptions.dotMatchesAll) {
23
+ options.add(RegexOption.DOT_MATCHES_ALL)
24
+ }
25
+
26
+ if (regexOptions.enableComments) {
27
+ options.add(RegexOption.COMMENTS)
28
+ }
29
+
30
+ return options
31
+ }
32
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.filter.gsub
2
+
3
+ import org.embulk.config.Config
4
+ import org.embulk.config.ConfigDefault
5
+ import org.embulk.config.Task
6
+
7
+ interface RegexOptions : Task {
8
+ @get:Config("ignore_case")
9
+ @get:ConfigDefault("false")
10
+ val ignoreCase: Boolean
11
+
12
+ @get:Config("multiline")
13
+ @get:ConfigDefault("true")
14
+ val multiline: Boolean
15
+
16
+ @get:Config("dot_matches_all")
17
+ @get:ConfigDefault("false")
18
+ val dotMatchesAll: Boolean
19
+
20
+ @get:Config("enable_comments")
21
+ @get:ConfigDefault("false")
22
+ val enableComments: Boolean
23
+ }
@@ -1,6 +1,5 @@
1
1
  package org.embulk.filter.gsub
2
2
 
3
- import org.embulk.filter.gsub.replacer.RegexFactory
4
3
  import org.embulk.filter.gsub.replacer.RegexOptionConfig
5
4
  import org.embulk.filter.gsub.replacer.RegexReplacer
6
5
  import org.embulk.filter.gsub.replacer.TextReplacer
@@ -10,10 +9,8 @@ class RegexReplacerFactory : TextReplacerFactory() {
10
9
  val pattern = rule.pattern.get()
11
10
  val to = rule.to.get()
12
11
 
13
- val regexOptionConfig = RegexOptionConfig()
14
-
15
12
  val factory = RegexFactory()
16
- val regex = factory.create(pattern, regexOptionConfig)
13
+ val regex = factory.create(pattern, rule.regexOptions)
17
14
 
18
15
  return RegexReplacer(regex, to)
19
16
  }
@@ -23,4 +23,8 @@ interface SubstitutionRule : Task {
23
23
  @get:Config("to")
24
24
  @get:ConfigDefault("null")
25
25
  val to: Optional<String>
26
+
27
+ @get:Config("regexp_options")
28
+ @get:ConfigDefault("{}")
29
+ val regexOptions: RegexOptions
26
30
  }
@@ -1,6 +1,5 @@
1
1
  package org.embulk.filter.gsub
2
2
 
3
- import org.embulk.filter.gsub.replacer.RegexFactory
4
3
  import org.embulk.filter.gsub.replacer.RegexOptionConfig
5
4
  import org.embulk.filter.gsub.replacer.TextReplacer
6
5
  import org.embulk.filter.gsub.replacer.UpperCaseReplacer
@@ -9,10 +8,8 @@ class UpperCaseReplacerFactory : TextReplacerFactory() {
9
8
  override fun create(rule: SubstitutionRule): TextReplacer {
10
9
  val pattern = rule.pattern.orNull()
11
10
  if (pattern != null) {
12
- // TODO set regex options
13
- val regexOptionConfig = RegexOptionConfig()
14
11
  val factory = RegexFactory()
15
- val regex = factory.create(pattern, regexOptionConfig)
12
+ val regex = factory.create(pattern, rule.regexOptions)
16
13
 
17
14
  return UpperCaseReplacer(regex)
18
15
  }
@@ -57,6 +57,54 @@ class TestGsubFilterPlugin {
57
57
  Assert.assertEquals("\\1 [\\2]", barRule2.to.get())
58
58
  }
59
59
 
60
+ @Test
61
+ fun testDefaultRegexOptions() {
62
+ val configYaml = """
63
+ |type: gsub
64
+ |target_columns:
65
+ | foo:
66
+ | - type: regexp_replace
67
+ | pattern: "test"
68
+ """.trimMargin()
69
+
70
+ val config = getConfigFromYaml(configYaml)
71
+ val task = config.loadConfig(GsubFilterPlugin.PluginTask::class.java)
72
+ val fooRules = task.targetColumns["foo"]!!
73
+ val fooRule = fooRules[0]
74
+ val regexOptions = fooRule.regexOptions
75
+ Assert.assertFalse(regexOptions.ignoreCase)
76
+ Assert.assertTrue(regexOptions.multiline)
77
+ Assert.assertFalse(regexOptions.dotMatchesAll)
78
+ Assert.assertFalse(regexOptions.enableComments)
79
+ }
80
+
81
+ @Test
82
+ fun testRegexOptions() {
83
+ val configYaml = """
84
+ |type: gsub
85
+ |target_columns:
86
+ | foo:
87
+ | - type: regexp_replace
88
+ | pattern: "test"
89
+ | regexp_options:
90
+ | ignore_case: true
91
+ | multiline: true
92
+ | dot_matches_all: true
93
+ | enable_comments: true
94
+ """.trimMargin()
95
+
96
+ val config = getConfigFromYaml(configYaml)
97
+ val task = config.loadConfig(GsubFilterPlugin.PluginTask::class.java)
98
+ val fooRules = task.targetColumns["foo"]!!
99
+ val fooRule = fooRules[0]
100
+ val regexOptions = fooRule.regexOptions
101
+
102
+ Assert.assertTrue(regexOptions.ignoreCase)
103
+ Assert.assertTrue(regexOptions.multiline)
104
+ Assert.assertTrue(regexOptions.dotMatchesAll)
105
+ Assert.assertTrue(regexOptions.enableComments)
106
+ }
107
+
60
108
  @Test
61
109
  fun testEmptyFilter() {
62
110
  val configYaml = """
@@ -1,19 +1,134 @@
1
1
  package org.embulk.filter.gsub.replacer
2
2
 
3
+ import org.embulk.config.TaskSource
4
+ import org.embulk.filter.gsub.RegexFactory
5
+ import org.embulk.filter.gsub.RegexOptions
3
6
  import org.junit.Assert
4
7
  import org.junit.Test
5
8
 
6
9
  class RegexReplacerTest {
7
10
  @Test
8
11
  fun testExecute() {
9
- val optionConfig = RegexOptionConfig()
10
- optionConfig.ignoreCase = true
12
+ val regexOptions = createRegexpOption()
11
13
 
12
14
  val factory = RegexFactory()
13
- val pattern = factory.create("(\\w*):\\s*(.*)", optionConfig)
15
+ val pattern = factory.create("(\\w*):\\s*(.*)", regexOptions)
14
16
 
15
17
  val replacer = RegexReplacer(pattern, "$1 [$2]")
16
18
  val result = replacer.execute("test: foo bar baz")
17
19
  Assert.assertEquals("test [foo bar baz]", result)
18
20
  }
21
+
22
+ @Test
23
+ fun testExecuteWithoutIgnoreCaseOption() {
24
+ val regexOptions = createRegexpOption(ignoreCase = false)
25
+
26
+ val factory = RegexFactory()
27
+ val pattern = factory.create("foo", regexOptions)
28
+
29
+ val replacer = RegexReplacer(pattern, "*test-foo*")
30
+
31
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("foo bar baz"))
32
+ Assert.assertEquals("Foo bar baz", replacer.execute("Foo bar baz"))
33
+ Assert.assertEquals("FOO bar baz", replacer.execute("FOO bar baz"))
34
+ }
35
+
36
+ @Test
37
+ fun testExecuteWithIgnoreCaseOption() {
38
+ val regexOptions = createRegexpOption(ignoreCase = true)
39
+ val factory = RegexFactory()
40
+ val pattern = factory.create("foo", regexOptions)
41
+
42
+ val replacer = RegexReplacer(pattern, "*test-foo*")
43
+
44
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("foo bar baz"))
45
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("Foo bar baz"))
46
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("FOO bar baz"))
47
+ }
48
+
49
+ @Test
50
+ fun testExecuteWithoutMultilineOption() {
51
+ val regexOptions = createRegexpOption(multiline = false)
52
+ val factory = RegexFactory()
53
+ val pattern = factory.create("^bar", regexOptions)
54
+
55
+ val replacer = RegexReplacer(pattern, "*BAR*")
56
+
57
+ Assert.assertEquals("foo\nbar\nbaz", replacer.execute("foo\nbar\nbaz"))
58
+ }
59
+
60
+ @Test
61
+ fun testExecuteWithMultilineOption() {
62
+ val regexOptions = createRegexpOption(multiline = true)
63
+ val factory = RegexFactory()
64
+ val pattern = factory.create("^bar", regexOptions)
65
+
66
+ val replacer = RegexReplacer(pattern, "*BAR*")
67
+
68
+ Assert.assertEquals("foo\n*BAR*\nbaz", replacer.execute("foo\nbar\nbaz"))
69
+ }
70
+
71
+ @Test
72
+ fun testExecuteWithoutDotMatchesAllOption() {
73
+ val regexOptions = createRegexpOption(dotMatchesAll = false)
74
+ val factory = RegexFactory()
75
+ val pattern = factory.create("foo.bar.baz", regexOptions)
76
+
77
+ val replacer = RegexReplacer(pattern, "[foo-bar-baz]")
78
+
79
+ Assert.assertEquals("[foo-bar-baz]", replacer.execute("foo/bar/baz"))
80
+ Assert.assertEquals("foo\nbar/baz", replacer.execute("foo\nbar/baz"))
81
+ }
82
+
83
+ @Test
84
+ fun testExecuteWithDotMatchesAllOption() {
85
+ val regexOptions = createRegexpOption(dotMatchesAll = true)
86
+ val factory = RegexFactory()
87
+ val pattern = factory.create("foo.bar.baz", regexOptions)
88
+
89
+ val replacer = RegexReplacer(pattern, "[foo-bar-baz]")
90
+
91
+ Assert.assertEquals("[foo-bar-baz]", replacer.execute("foo/bar/baz"))
92
+ Assert.assertEquals("[foo-bar-baz]", replacer.execute("foo\nbar/baz"))
93
+ }
94
+
95
+ @Test
96
+ fun testExecuteWithEnableCommentsOption() {
97
+ val regexOptions = createRegexpOption(enableComments = true)
98
+ val factory = RegexFactory()
99
+
100
+ val patternString = """
101
+ |(ba\w) # matches bar and baz
102
+ """.trimMargin()
103
+ val pattern = factory.create(patternString, regexOptions)
104
+
105
+ val replacer = RegexReplacer(pattern, "*$1*")
106
+
107
+ Assert.assertEquals("foo *bar* *baz*", replacer.execute("foo bar baz"))
108
+ }
109
+
110
+ private fun createRegexpOption(
111
+ ignoreCase: Boolean = false,
112
+ multiline: Boolean = true,
113
+ dotMatchesAll: Boolean = false,
114
+ enableComments: Boolean = false
115
+ ): RegexOptions {
116
+ return object: RegexOptions {
117
+ override val ignoreCase: Boolean
118
+ get() = ignoreCase
119
+ override val multiline: Boolean
120
+ get() = multiline
121
+ override val dotMatchesAll: Boolean
122
+ get() = dotMatchesAll
123
+ override val enableComments: Boolean
124
+ get() = enableComments
125
+
126
+ override fun validate() {
127
+ }
128
+
129
+ override fun dump(): TaskSource {
130
+ throw NotImplementedError()
131
+ }
132
+ }
133
+ }
19
134
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-gsub
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sawada Tadashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-21 00:00:00.000000000 Z
11
+ date: 2018-02-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
- description: gsub
41
+ description: Embulk filter plugin to convert text column values with regular expressions
42
42
  email:
43
43
  - cesare@mayverse.jp
44
44
  executables: []
@@ -60,13 +60,14 @@ files:
60
60
  - src/main/kotlin/org/embulk/filter/gsub/ColumnVisitorImpl.kt
61
61
  - src/main/kotlin/org/embulk/filter/gsub/GsubFilterPlugin.kt
62
62
  - src/main/kotlin/org/embulk/filter/gsub/LowerCaseReplacerFactory.kt
63
+ - src/main/kotlin/org/embulk/filter/gsub/RegexFactory.kt
64
+ - src/main/kotlin/org/embulk/filter/gsub/RegexOptions.kt
63
65
  - src/main/kotlin/org/embulk/filter/gsub/RegexReplacerFactory.kt
64
66
  - src/main/kotlin/org/embulk/filter/gsub/SubstitutionRule.kt
65
67
  - src/main/kotlin/org/embulk/filter/gsub/TextReplacerFactory.kt
66
68
  - src/main/kotlin/org/embulk/filter/gsub/UpperCaseReplacerFactory.kt
67
69
  - src/main/kotlin/org/embulk/filter/gsub/replacer/CombinedReplacer.kt
68
70
  - src/main/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacer.kt
69
- - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexFactory.kt
70
71
  - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexOptionConfig.kt
71
72
  - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexReplacer.kt
72
73
  - src/main/kotlin/org/embulk/filter/gsub/replacer/TextReplacer.kt
@@ -76,8 +77,8 @@ files:
76
77
  - src/test/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacerTest.kt
77
78
  - src/test/kotlin/org/embulk/filter/gsub/replacer/RegexReplacerTest.kt
78
79
  - classpath/annotations-13.0.jar
79
- - classpath/embulk-filter-gsub-0.1.0.jar
80
- - classpath/kotlin-stdlib-1.2.10.jar
80
+ - classpath/embulk-filter-gsub-0.2.0.jar
81
+ - classpath/kotlin-stdlib-1.2.21.jar
81
82
  homepage: https://github.com/cesare/embulk-filter-gsub
82
83
  licenses:
83
84
  - MIT
@@ -1,22 +0,0 @@
1
- package org.embulk.filter.gsub.replacer
2
-
3
- class RegexFactory {
4
- fun create(patternString: String, regexOptionConfig: RegexOptionConfig): Regex {
5
- val options = buildOptions(regexOptionConfig)
6
- return Regex(patternString, options)
7
- }
8
-
9
- private fun buildOptions(optionConfig: RegexOptionConfig): Set<RegexOption> {
10
- val options = HashSet<RegexOption>()
11
-
12
- if (optionConfig.ignoreCase) {
13
- options.add(RegexOption.IGNORE_CASE)
14
- }
15
-
16
- if (optionConfig.multiline) {
17
- options.add(RegexOption.MULTILINE)
18
- }
19
-
20
- return options
21
- }
22
- }