embulk-filter-gsub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fffdfbb5f26bf7c3ee0abf13c355b29080a09336
4
- data.tar.gz: 96e001cd2b5162bd6bf50c7ec239e2134496ca9d
3
+ metadata.gz: f2225445bc1080f16f10ccf3b514f4103f70cf39
4
+ data.tar.gz: 6cb469f097ee14aa27ced7847a9dc787eb0dc211
5
5
  SHA512:
6
- metadata.gz: 7929b95e1cb3aa18b800305a835d385ccd6245fc6eb0421c667a40468936a587d4a8523e889d9c36eead27fa94109556b9ff7e361476016a9f68ab805d654d54
7
- data.tar.gz: e0a1dce998d24320a9c188dd4cb0ea4189bbad9a148be600a2f09f0c084f9251c435208946bc28f6ae12fb244857ebb168da89cba829676eb1fe31a62ebdfb92
6
+ metadata.gz: 7e33ff204e96d633f242a975966b547df3353230a40c292131d2eb609e45494a63b2e3213c7874c2c1f7a389ab8adf26e2a88ca2d6de65e435cbde98d2c3830a
7
+ data.tar.gz: f9818862b57c4f7609ff3f153de35488de1098ea00b6a2a569a9acbdcf5f966ca89c8f7b6ecf6cfa0696b60e69c82e694b38879ab2800d323a44865c3fa84ee9
data/README.md CHANGED
@@ -133,6 +133,26 @@ target_columns:
133
133
  to: "$1 = [$2]"
134
134
  ```
135
135
 
136
+ ### Regular expression options
137
+
138
+ You can specify some regular expression options.
139
+
140
+ ```yaml
141
+ target_columns:
142
+ foo:
143
+ - type: regexp_replace
144
+ pattern: 'foo'
145
+ to: "***"
146
+ regexp_options:
147
+ ignore_case: true
148
+ ```
149
+
150
+ Supported options are:
151
+
152
+ * **ignore_case** (boolean, default: false)
153
+ * **multiline** (boolean, default: true)
154
+ * **dot_matches_all** (boolean, default: false)
155
+ * **enable_comments** (boolean, default: false)
136
156
 
137
157
  ## Build
138
158
 
@@ -1,5 +1,5 @@
1
1
  buildscript {
2
- ext.kotlin_version = '1.2.10'
2
+ ext.kotlin_version = '1.2.21'
3
3
  repositories {
4
4
  mavenCentral()
5
5
  jcenter()
@@ -18,6 +18,7 @@ embulk {
18
18
  version = "0.8.38"
19
19
  category = "filter"
20
20
  name = "gsub"
21
+ description = "Embulk filter plugin to convert text column values with regular expressions"
21
22
  authors = ["Sawada Tadashi"]
22
23
  email = "cesare@mayverse.jp"
23
24
  homepage = "https://github.com/cesare/embulk-filter-gsub"
@@ -1 +1 @@
1
- version=0.1.0
1
+ version=0.2.0
@@ -1,7 +1,6 @@
1
1
  package org.embulk.filter.gsub
2
2
 
3
3
  import org.embulk.filter.gsub.replacer.LowerCaseReplacer
4
- import org.embulk.filter.gsub.replacer.RegexFactory
5
4
  import org.embulk.filter.gsub.replacer.RegexOptionConfig
6
5
  import org.embulk.filter.gsub.replacer.TextReplacer
7
6
 
@@ -9,10 +8,8 @@ class LowerCaseReplacerFactory : TextReplacerFactory() {
9
8
  override fun create(rule: SubstitutionRule): TextReplacer {
10
9
  val pattern = rule.pattern.orNull()
11
10
  if (pattern != null) {
12
- // TODO set regex options
13
- val regexOptionConfig = RegexOptionConfig()
14
11
  val factory = RegexFactory()
15
- val regex = factory.create(pattern, regexOptionConfig)
12
+ val regex = factory.create(pattern, rule.regexOptions)
16
13
 
17
14
  return LowerCaseReplacer(regex)
18
15
  }
@@ -0,0 +1,32 @@
1
+ package org.embulk.filter.gsub
2
+
3
+ import org.embulk.filter.gsub.RegexOptions
4
+
5
+ class RegexFactory {
6
+ fun create(patternString: String, regexOptions: RegexOptions): Regex {
7
+ val options = buildOptions(regexOptions)
8
+ return Regex(patternString, options)
9
+ }
10
+
11
+ private fun buildOptions(regexOptions: RegexOptions): Set<RegexOption> {
12
+ val options = HashSet<RegexOption>()
13
+
14
+ if (regexOptions.ignoreCase) {
15
+ options.add(RegexOption.IGNORE_CASE)
16
+ }
17
+
18
+ if (regexOptions.multiline) {
19
+ options.add(RegexOption.MULTILINE)
20
+ }
21
+
22
+ if (regexOptions.dotMatchesAll) {
23
+ options.add(RegexOption.DOT_MATCHES_ALL)
24
+ }
25
+
26
+ if (regexOptions.enableComments) {
27
+ options.add(RegexOption.COMMENTS)
28
+ }
29
+
30
+ return options
31
+ }
32
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.filter.gsub
2
+
3
+ import org.embulk.config.Config
4
+ import org.embulk.config.ConfigDefault
5
+ import org.embulk.config.Task
6
+
7
+ interface RegexOptions : Task {
8
+ @get:Config("ignore_case")
9
+ @get:ConfigDefault("false")
10
+ val ignoreCase: Boolean
11
+
12
+ @get:Config("multiline")
13
+ @get:ConfigDefault("true")
14
+ val multiline: Boolean
15
+
16
+ @get:Config("dot_matches_all")
17
+ @get:ConfigDefault("false")
18
+ val dotMatchesAll: Boolean
19
+
20
+ @get:Config("enable_comments")
21
+ @get:ConfigDefault("false")
22
+ val enableComments: Boolean
23
+ }
@@ -1,6 +1,5 @@
1
1
  package org.embulk.filter.gsub
2
2
 
3
- import org.embulk.filter.gsub.replacer.RegexFactory
4
3
  import org.embulk.filter.gsub.replacer.RegexOptionConfig
5
4
  import org.embulk.filter.gsub.replacer.RegexReplacer
6
5
  import org.embulk.filter.gsub.replacer.TextReplacer
@@ -10,10 +9,8 @@ class RegexReplacerFactory : TextReplacerFactory() {
10
9
  val pattern = rule.pattern.get()
11
10
  val to = rule.to.get()
12
11
 
13
- val regexOptionConfig = RegexOptionConfig()
14
-
15
12
  val factory = RegexFactory()
16
- val regex = factory.create(pattern, regexOptionConfig)
13
+ val regex = factory.create(pattern, rule.regexOptions)
17
14
 
18
15
  return RegexReplacer(regex, to)
19
16
  }
@@ -23,4 +23,8 @@ interface SubstitutionRule : Task {
23
23
  @get:Config("to")
24
24
  @get:ConfigDefault("null")
25
25
  val to: Optional<String>
26
+
27
+ @get:Config("regexp_options")
28
+ @get:ConfigDefault("{}")
29
+ val regexOptions: RegexOptions
26
30
  }
@@ -1,6 +1,5 @@
1
1
  package org.embulk.filter.gsub
2
2
 
3
- import org.embulk.filter.gsub.replacer.RegexFactory
4
3
  import org.embulk.filter.gsub.replacer.RegexOptionConfig
5
4
  import org.embulk.filter.gsub.replacer.TextReplacer
6
5
  import org.embulk.filter.gsub.replacer.UpperCaseReplacer
@@ -9,10 +8,8 @@ class UpperCaseReplacerFactory : TextReplacerFactory() {
9
8
  override fun create(rule: SubstitutionRule): TextReplacer {
10
9
  val pattern = rule.pattern.orNull()
11
10
  if (pattern != null) {
12
- // TODO set regex options
13
- val regexOptionConfig = RegexOptionConfig()
14
11
  val factory = RegexFactory()
15
- val regex = factory.create(pattern, regexOptionConfig)
12
+ val regex = factory.create(pattern, rule.regexOptions)
16
13
 
17
14
  return UpperCaseReplacer(regex)
18
15
  }
@@ -57,6 +57,54 @@ class TestGsubFilterPlugin {
57
57
  Assert.assertEquals("\\1 [\\2]", barRule2.to.get())
58
58
  }
59
59
 
60
+ @Test
61
+ fun testDefaultRegexOptions() {
62
+ val configYaml = """
63
+ |type: gsub
64
+ |target_columns:
65
+ | foo:
66
+ | - type: regexp_replace
67
+ | pattern: "test"
68
+ """.trimMargin()
69
+
70
+ val config = getConfigFromYaml(configYaml)
71
+ val task = config.loadConfig(GsubFilterPlugin.PluginTask::class.java)
72
+ val fooRules = task.targetColumns["foo"]!!
73
+ val fooRule = fooRules[0]
74
+ val regexOptions = fooRule.regexOptions
75
+ Assert.assertFalse(regexOptions.ignoreCase)
76
+ Assert.assertTrue(regexOptions.multiline)
77
+ Assert.assertFalse(regexOptions.dotMatchesAll)
78
+ Assert.assertFalse(regexOptions.enableComments)
79
+ }
80
+
81
+ @Test
82
+ fun testRegexOptions() {
83
+ val configYaml = """
84
+ |type: gsub
85
+ |target_columns:
86
+ | foo:
87
+ | - type: regexp_replace
88
+ | pattern: "test"
89
+ | regexp_options:
90
+ | ignore_case: true
91
+ | multiline: true
92
+ | dot_matches_all: true
93
+ | enable_comments: true
94
+ """.trimMargin()
95
+
96
+ val config = getConfigFromYaml(configYaml)
97
+ val task = config.loadConfig(GsubFilterPlugin.PluginTask::class.java)
98
+ val fooRules = task.targetColumns["foo"]!!
99
+ val fooRule = fooRules[0]
100
+ val regexOptions = fooRule.regexOptions
101
+
102
+ Assert.assertTrue(regexOptions.ignoreCase)
103
+ Assert.assertTrue(regexOptions.multiline)
104
+ Assert.assertTrue(regexOptions.dotMatchesAll)
105
+ Assert.assertTrue(regexOptions.enableComments)
106
+ }
107
+
60
108
  @Test
61
109
  fun testEmptyFilter() {
62
110
  val configYaml = """
@@ -1,19 +1,134 @@
1
1
  package org.embulk.filter.gsub.replacer
2
2
 
3
+ import org.embulk.config.TaskSource
4
+ import org.embulk.filter.gsub.RegexFactory
5
+ import org.embulk.filter.gsub.RegexOptions
3
6
  import org.junit.Assert
4
7
  import org.junit.Test
5
8
 
6
9
  class RegexReplacerTest {
7
10
  @Test
8
11
  fun testExecute() {
9
- val optionConfig = RegexOptionConfig()
10
- optionConfig.ignoreCase = true
12
+ val regexOptions = createRegexpOption()
11
13
 
12
14
  val factory = RegexFactory()
13
- val pattern = factory.create("(\\w*):\\s*(.*)", optionConfig)
15
+ val pattern = factory.create("(\\w*):\\s*(.*)", regexOptions)
14
16
 
15
17
  val replacer = RegexReplacer(pattern, "$1 [$2]")
16
18
  val result = replacer.execute("test: foo bar baz")
17
19
  Assert.assertEquals("test [foo bar baz]", result)
18
20
  }
21
+
22
+ @Test
23
+ fun testExecuteWithoutIgnoreCaseOption() {
24
+ val regexOptions = createRegexpOption(ignoreCase = false)
25
+
26
+ val factory = RegexFactory()
27
+ val pattern = factory.create("foo", regexOptions)
28
+
29
+ val replacer = RegexReplacer(pattern, "*test-foo*")
30
+
31
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("foo bar baz"))
32
+ Assert.assertEquals("Foo bar baz", replacer.execute("Foo bar baz"))
33
+ Assert.assertEquals("FOO bar baz", replacer.execute("FOO bar baz"))
34
+ }
35
+
36
+ @Test
37
+ fun testExecuteWithIgnoreCaseOption() {
38
+ val regexOptions = createRegexpOption(ignoreCase = true)
39
+ val factory = RegexFactory()
40
+ val pattern = factory.create("foo", regexOptions)
41
+
42
+ val replacer = RegexReplacer(pattern, "*test-foo*")
43
+
44
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("foo bar baz"))
45
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("Foo bar baz"))
46
+ Assert.assertEquals("*test-foo* bar baz", replacer.execute("FOO bar baz"))
47
+ }
48
+
49
+ @Test
50
+ fun testExecuteWithoutMultilineOption() {
51
+ val regexOptions = createRegexpOption(multiline = false)
52
+ val factory = RegexFactory()
53
+ val pattern = factory.create("^bar", regexOptions)
54
+
55
+ val replacer = RegexReplacer(pattern, "*BAR*")
56
+
57
+ Assert.assertEquals("foo\nbar\nbaz", replacer.execute("foo\nbar\nbaz"))
58
+ }
59
+
60
+ @Test
61
+ fun testExecuteWithMultilineOption() {
62
+ val regexOptions = createRegexpOption(multiline = true)
63
+ val factory = RegexFactory()
64
+ val pattern = factory.create("^bar", regexOptions)
65
+
66
+ val replacer = RegexReplacer(pattern, "*BAR*")
67
+
68
+ Assert.assertEquals("foo\n*BAR*\nbaz", replacer.execute("foo\nbar\nbaz"))
69
+ }
70
+
71
+ @Test
72
+ fun testExecuteWithoutDotMatchesAllOption() {
73
+ val regexOptions = createRegexpOption(dotMatchesAll = false)
74
+ val factory = RegexFactory()
75
+ val pattern = factory.create("foo.bar.baz", regexOptions)
76
+
77
+ val replacer = RegexReplacer(pattern, "[foo-bar-baz]")
78
+
79
+ Assert.assertEquals("[foo-bar-baz]", replacer.execute("foo/bar/baz"))
80
+ Assert.assertEquals("foo\nbar/baz", replacer.execute("foo\nbar/baz"))
81
+ }
82
+
83
+ @Test
84
+ fun testExecuteWithDotMatchesAllOption() {
85
+ val regexOptions = createRegexpOption(dotMatchesAll = true)
86
+ val factory = RegexFactory()
87
+ val pattern = factory.create("foo.bar.baz", regexOptions)
88
+
89
+ val replacer = RegexReplacer(pattern, "[foo-bar-baz]")
90
+
91
+ Assert.assertEquals("[foo-bar-baz]", replacer.execute("foo/bar/baz"))
92
+ Assert.assertEquals("[foo-bar-baz]", replacer.execute("foo\nbar/baz"))
93
+ }
94
+
95
+ @Test
96
+ fun testExecuteWithEnableCommentsOption() {
97
+ val regexOptions = createRegexpOption(enableComments = true)
98
+ val factory = RegexFactory()
99
+
100
+ val patternString = """
101
+ |(ba\w) # matches bar and baz
102
+ """.trimMargin()
103
+ val pattern = factory.create(patternString, regexOptions)
104
+
105
+ val replacer = RegexReplacer(pattern, "*$1*")
106
+
107
+ Assert.assertEquals("foo *bar* *baz*", replacer.execute("foo bar baz"))
108
+ }
109
+
110
+ private fun createRegexpOption(
111
+ ignoreCase: Boolean = false,
112
+ multiline: Boolean = true,
113
+ dotMatchesAll: Boolean = false,
114
+ enableComments: Boolean = false
115
+ ): RegexOptions {
116
+ return object: RegexOptions {
117
+ override val ignoreCase: Boolean
118
+ get() = ignoreCase
119
+ override val multiline: Boolean
120
+ get() = multiline
121
+ override val dotMatchesAll: Boolean
122
+ get() = dotMatchesAll
123
+ override val enableComments: Boolean
124
+ get() = enableComments
125
+
126
+ override fun validate() {
127
+ }
128
+
129
+ override fun dump(): TaskSource {
130
+ throw NotImplementedError()
131
+ }
132
+ }
133
+ }
19
134
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-gsub
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sawada Tadashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-21 00:00:00.000000000 Z
11
+ date: 2018-02-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
- description: gsub
41
+ description: Embulk filter plugin to convert text column values with regular expressions
42
42
  email:
43
43
  - cesare@mayverse.jp
44
44
  executables: []
@@ -60,13 +60,14 @@ files:
60
60
  - src/main/kotlin/org/embulk/filter/gsub/ColumnVisitorImpl.kt
61
61
  - src/main/kotlin/org/embulk/filter/gsub/GsubFilterPlugin.kt
62
62
  - src/main/kotlin/org/embulk/filter/gsub/LowerCaseReplacerFactory.kt
63
+ - src/main/kotlin/org/embulk/filter/gsub/RegexFactory.kt
64
+ - src/main/kotlin/org/embulk/filter/gsub/RegexOptions.kt
63
65
  - src/main/kotlin/org/embulk/filter/gsub/RegexReplacerFactory.kt
64
66
  - src/main/kotlin/org/embulk/filter/gsub/SubstitutionRule.kt
65
67
  - src/main/kotlin/org/embulk/filter/gsub/TextReplacerFactory.kt
66
68
  - src/main/kotlin/org/embulk/filter/gsub/UpperCaseReplacerFactory.kt
67
69
  - src/main/kotlin/org/embulk/filter/gsub/replacer/CombinedReplacer.kt
68
70
  - src/main/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacer.kt
69
- - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexFactory.kt
70
71
  - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexOptionConfig.kt
71
72
  - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexReplacer.kt
72
73
  - src/main/kotlin/org/embulk/filter/gsub/replacer/TextReplacer.kt
@@ -76,8 +77,8 @@ files:
76
77
  - src/test/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacerTest.kt
77
78
  - src/test/kotlin/org/embulk/filter/gsub/replacer/RegexReplacerTest.kt
78
79
  - classpath/annotations-13.0.jar
79
- - classpath/embulk-filter-gsub-0.1.0.jar
80
- - classpath/kotlin-stdlib-1.2.10.jar
80
+ - classpath/embulk-filter-gsub-0.2.0.jar
81
+ - classpath/kotlin-stdlib-1.2.21.jar
81
82
  homepage: https://github.com/cesare/embulk-filter-gsub
82
83
  licenses:
83
84
  - MIT
@@ -1,22 +0,0 @@
1
- package org.embulk.filter.gsub.replacer
2
-
3
- class RegexFactory {
4
- fun create(patternString: String, regexOptionConfig: RegexOptionConfig): Regex {
5
- val options = buildOptions(regexOptionConfig)
6
- return Regex(patternString, options)
7
- }
8
-
9
- private fun buildOptions(optionConfig: RegexOptionConfig): Set<RegexOption> {
10
- val options = HashSet<RegexOption>()
11
-
12
- if (optionConfig.ignoreCase) {
13
- options.add(RegexOption.IGNORE_CASE)
14
- }
15
-
16
- if (optionConfig.multiline) {
17
- options.add(RegexOption.MULTILINE)
18
- }
19
-
20
- return options
21
- }
22
- }