embulk-filter-gsub 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +42 -0
  3. data/.gitignore +14 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +141 -0
  6. data/build.gradle +49 -0
  7. data/gradle.properties +1 -0
  8. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  9. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  10. data/gradlew +172 -0
  11. data/gradlew.bat +84 -0
  12. data/lib/embulk/filter/gsub.rb +3 -0
  13. data/src/main/kotlin/org/embulk/filter/gsub/ColumnReplacerFactory.kt +20 -0
  14. data/src/main/kotlin/org/embulk/filter/gsub/ColumnVisitorImpl.kt +72 -0
  15. data/src/main/kotlin/org/embulk/filter/gsub/GsubFilterPlugin.kt +51 -0
  16. data/src/main/kotlin/org/embulk/filter/gsub/LowerCaseReplacerFactory.kt +23 -0
  17. data/src/main/kotlin/org/embulk/filter/gsub/RegexReplacerFactory.kt +20 -0
  18. data/src/main/kotlin/org/embulk/filter/gsub/SubstitutionRule.kt +26 -0
  19. data/src/main/kotlin/org/embulk/filter/gsub/TextReplacerFactory.kt +33 -0
  20. data/src/main/kotlin/org/embulk/filter/gsub/UpperCaseReplacerFactory.kt +23 -0
  21. data/src/main/kotlin/org/embulk/filter/gsub/replacer/CombinedReplacer.kt +7 -0
  22. data/src/main/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacer.kt +17 -0
  23. data/src/main/kotlin/org/embulk/filter/gsub/replacer/RegexFactory.kt +22 -0
  24. data/src/main/kotlin/org/embulk/filter/gsub/replacer/RegexOptionConfig.kt +6 -0
  25. data/src/main/kotlin/org/embulk/filter/gsub/replacer/RegexReplacer.kt +7 -0
  26. data/src/main/kotlin/org/embulk/filter/gsub/replacer/TextReplacer.kt +5 -0
  27. data/src/main/kotlin/org/embulk/filter/gsub/replacer/UpperCaseReplacer.kt +18 -0
  28. data/src/test/kotlin/org/embulk/filter/gsub/TestGsubFilterPlugin.kt +221 -0
  29. data/src/test/kotlin/org/embulk/filter/gsub/replacer/CombinedReplacerTest.kt +37 -0
  30. data/src/test/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacerTest.kt +20 -0
  31. data/src/test/kotlin/org/embulk/filter/gsub/replacer/RegexReplacerTest.kt +19 -0
  32. metadata +105 -0
@@ -0,0 +1,18 @@
1
+ package org.embulk.filter.gsub.replacer
2
+
3
+ class UpperCaseReplacer(private val pattern: Regex?) : TextReplacer {
4
+ constructor() : this(null)
5
+
6
+ override fun execute(text: String): String {
7
+ return pattern?.let { replaceWithPattern(it, text)} ?: replaceWholeText(text)
8
+ }
9
+
10
+ private fun replaceWithPattern(pattern: Regex, text: String): String {
11
+ return pattern.replace(text, { matchResult -> matchResult.value.toUpperCase() })
12
+ }
13
+
14
+ private fun replaceWholeText(text: String): String {
15
+ return text.toUpperCase()
16
+ }
17
+ }
18
+
@@ -0,0 +1,221 @@
1
+ package org.embulk.filter.gsub
2
+
3
+ import org.embulk.EmbulkTestRuntime
4
+ import org.embulk.config.ConfigLoader
5
+ import org.embulk.config.ConfigSource
6
+ import org.embulk.config.TaskSource
7
+ import org.embulk.spi.*
8
+ import org.embulk.spi.type.Types
9
+ import org.embulk.spi.util.Pages
10
+ import org.junit.Assert
11
+ import org.junit.Rule
12
+ import org.junit.Test
13
+
14
+ import org.hamcrest.Matchers.*
15
+
16
+ class TestGsubFilterPlugin {
17
+ @get:Rule
18
+ val runtime = EmbulkTestRuntime()
19
+
20
+ @Test
21
+ fun testConfig() {
22
+ val configYaml = """
23
+ |type: gsub
24
+ |target_columns:
25
+ | foo:
26
+ | - type: to_lower_case
27
+ | pattern: "[A-Z]*"
28
+ | bar:
29
+ | - type: "regexp_replace"
30
+ | pattern: "<br\\s*/?>"
31
+ | to: "\\n"
32
+ | - pattern: "(\\d+):(.*)"
33
+ | to: "\\1 [\\2]"
34
+ """.trimMargin()
35
+
36
+ val config = getConfigFromYaml(configYaml)
37
+ val task = config.loadConfig(GsubFilterPlugin.PluginTask::class.java)
38
+
39
+ val fooRules = task.targetColumns["foo"]
40
+ Assert.assertThat(fooRules, hasSize(1))
41
+
42
+ val fooRule = fooRules!![0]
43
+ Assert.assertEquals("to_lower_case", fooRule.type)
44
+ Assert.assertEquals("[A-Z]*", fooRule.pattern.get())
45
+
46
+ val barRules = task.targetColumns["bar"]
47
+ Assert.assertThat(barRules, hasSize(2))
48
+
49
+ val barRule1 = barRules!![0]
50
+ Assert.assertEquals("regexp_replace", barRule1.type)
51
+ Assert.assertEquals("<br\\s*/?>", barRule1.pattern.get())
52
+ Assert.assertEquals("\\n", barRule1.to.get())
53
+
54
+ val barRule2 = barRules[1]
55
+ Assert.assertEquals("regexp_replace", barRule2.type)
56
+ Assert.assertEquals("(\\d+):(.*)", barRule2.pattern.get())
57
+ Assert.assertEquals("\\1 [\\2]", barRule2.to.get())
58
+ }
59
+
60
+ @Test
61
+ fun testEmptyFilter() {
62
+ val configYaml = """
63
+ |type: gsub
64
+ """.trimMargin()
65
+
66
+ val config = getConfigFromYaml(configYaml)
67
+
68
+ val inputSchema = Schema.builder()
69
+ .add("bool", Types.BOOLEAN)
70
+ .add("long", Types.LONG)
71
+ .add("string", Types.STRING)
72
+ .build()
73
+
74
+ val plugin = GsubFilterPlugin()
75
+ plugin.transaction(config, inputSchema, object: FilterPlugin.Control {
76
+ override fun run(taskSource: TaskSource, outputSchema: Schema) {
77
+ val mockPageOutput = TestPageBuilderReader.MockPageOutput()
78
+ val pageOutput = plugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)
79
+
80
+ val inputPage = PageTestUtils.buildPage(runtime.bufferAllocator, inputSchema, true, 1234L, "test for echo")
81
+ for (page in inputPage) {
82
+ pageOutput.add(page)
83
+ }
84
+ pageOutput.finish()
85
+ pageOutput.close()
86
+
87
+ val records = Pages.toObjects(outputSchema, mockPageOutput.pages)
88
+ val record = records.get(0)
89
+ Assert.assertEquals(true, record[0])
90
+ Assert.assertEquals(1234L, record[1])
91
+ Assert.assertEquals("test for echo", record[2])
92
+ }
93
+ })
94
+ }
95
+
96
+ @Test
97
+ fun testRegexReplaceFilter() {
98
+ val configYaml = """
99
+ |type: gsub
100
+ |target_columns:
101
+ | string:
102
+ | - type: regexp_replace
103
+ | pattern: "test"
104
+ | to: "[replaced]"
105
+ """.trimMargin()
106
+
107
+ val config = getConfigFromYaml(configYaml)
108
+
109
+ val inputSchema = Schema.builder()
110
+ .add("bool", Types.BOOLEAN)
111
+ .add("long", Types.LONG)
112
+ .add("string", Types.STRING)
113
+ .build()
114
+
115
+ val plugin = GsubFilterPlugin()
116
+ plugin.transaction(config, inputSchema, object: FilterPlugin.Control {
117
+ override fun run(taskSource: TaskSource, outputSchema: Schema) {
118
+ val mockPageOutput = TestPageBuilderReader.MockPageOutput()
119
+ val pageOutput = plugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)
120
+
121
+ val inputPage = PageTestUtils.buildPage(runtime.bufferAllocator, inputSchema, true, 1234L, "test for echo")
122
+ for (page in inputPage) {
123
+ pageOutput.add(page)
124
+ }
125
+ pageOutput.finish()
126
+ pageOutput.close()
127
+
128
+ val records = Pages.toObjects(outputSchema, mockPageOutput.pages)
129
+ val record = records.get(0)
130
+ Assert.assertEquals(true, record[0])
131
+ Assert.assertEquals(1234L, record[1])
132
+ Assert.assertEquals("[replaced] for echo", record[2])
133
+ }
134
+ })
135
+ }
136
+
137
+ @Test
138
+ fun testToUpperCaseFilter() {
139
+ val configYaml = """
140
+ |type: gsub
141
+ |target_columns:
142
+ | string:
143
+ | - type: to_upper_case
144
+ | pattern: "test"
145
+ """.trimMargin()
146
+
147
+ val config = getConfigFromYaml(configYaml)
148
+
149
+ val inputSchema = Schema.builder()
150
+ .add("bool", Types.BOOLEAN)
151
+ .add("long", Types.LONG)
152
+ .add("string", Types.STRING)
153
+ .build()
154
+
155
+ val plugin = GsubFilterPlugin()
156
+ plugin.transaction(config, inputSchema, object: FilterPlugin.Control {
157
+ override fun run(taskSource: TaskSource, outputSchema: Schema) {
158
+ val mockPageOutput = TestPageBuilderReader.MockPageOutput()
159
+ val pageOutput = plugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)
160
+
161
+ val inputPage = PageTestUtils.buildPage(runtime.bufferAllocator, inputSchema, true, 1234L, "test for echo")
162
+ for (page in inputPage) {
163
+ pageOutput.add(page)
164
+ }
165
+ pageOutput.finish()
166
+ pageOutput.close()
167
+
168
+ val records = Pages.toObjects(outputSchema, mockPageOutput.pages)
169
+ val record = records.get(0)
170
+ Assert.assertEquals(true, record[0])
171
+ Assert.assertEquals(1234L, record[1])
172
+ Assert.assertEquals("TEST for echo", record[2])
173
+ }
174
+ })
175
+ }
176
+
177
+ @Test
178
+ fun testToLowerCaseFilter() {
179
+ val configYaml = """
180
+ |type: gsub
181
+ |target_columns:
182
+ | string:
183
+ | - type: to_lower_case
184
+ | pattern: "TEST"
185
+ """.trimMargin()
186
+
187
+ val config = getConfigFromYaml(configYaml)
188
+
189
+ val inputSchema = Schema.builder()
190
+ .add("bool", Types.BOOLEAN)
191
+ .add("long", Types.LONG)
192
+ .add("string", Types.STRING)
193
+ .build()
194
+
195
+ val plugin = GsubFilterPlugin()
196
+ plugin.transaction(config, inputSchema, object: FilterPlugin.Control {
197
+ override fun run(taskSource: TaskSource, outputSchema: Schema) {
198
+ val mockPageOutput = TestPageBuilderReader.MockPageOutput()
199
+ val pageOutput = plugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)
200
+
201
+ val inputPage = PageTestUtils.buildPage(runtime.bufferAllocator, inputSchema, true, 1234L, "TEST FOR ECHO")
202
+ for (page in inputPage) {
203
+ pageOutput.add(page)
204
+ }
205
+ pageOutput.finish()
206
+ pageOutput.close()
207
+
208
+ val records = Pages.toObjects(outputSchema, mockPageOutput.pages)
209
+ val record = records.get(0)
210
+ Assert.assertEquals(true, record[0])
211
+ Assert.assertEquals(1234L, record[1])
212
+ Assert.assertEquals("test FOR ECHO", record[2])
213
+ }
214
+ })
215
+ }
216
+
217
+ private fun getConfigFromYaml(yaml: String): ConfigSource {
218
+ val loader = ConfigLoader(Exec.getModelManager())
219
+ return loader.fromYamlString(yaml)
220
+ }
221
+ }
@@ -0,0 +1,37 @@
1
+ package org.embulk.filter.gsub.replacer
2
+
3
+ import org.junit.Assert
4
+ import org.junit.Test
5
+
6
+ class CombinedReplacerTest {
7
+ @Test
8
+ fun testExecuteWithEmptyReplacer() {
9
+ val replacer = CombinedReplacer(ArrayList<TextReplacer>())
10
+ val result = replacer.execute("foo bar baz")
11
+ Assert.assertEquals("foo bar baz", result)
12
+ }
13
+
14
+ @Test
15
+ fun testExecuteWithSingleReplacer() {
16
+ val lowerCaseReplacer = LowerCaseReplacer()
17
+ val replacers = ArrayList<TextReplacer>()
18
+ replacers.add(lowerCaseReplacer)
19
+
20
+ val combinedReplacer = CombinedReplacer(replacers)
21
+ val result = combinedReplacer.execute("FOO BAR BAZ")
22
+ Assert.assertEquals("foo bar baz", result)
23
+ }
24
+
25
+ @Test
26
+ fun testExecuteWithMultipleReplacer() {
27
+ val lowerCaseReplacer = LowerCaseReplacer()
28
+ val regexReplacer = RegexReplacer("\\s+".toRegex(), "-")
29
+ val replacers = ArrayList<TextReplacer>()
30
+ replacers.add(lowerCaseReplacer)
31
+ replacers.add(regexReplacer)
32
+
33
+ val combinedReplacer = CombinedReplacer(replacers)
34
+ val result = combinedReplacer.execute("FOO BAR BAZ")
35
+ Assert.assertEquals("foo-bar-baz", result)
36
+ }
37
+ }
@@ -0,0 +1,20 @@
1
+ package org.embulk.filter.gsub.replacer
2
+
3
+ import org.junit.Assert
4
+ import org.junit.Test
5
+
6
+ class LowerCaseReplacerTest {
7
+ @Test
8
+ fun testExecuteWithoutPattern() {
9
+ val replacer = LowerCaseReplacer()
10
+ val result = replacer.execute("Foo Bar BAZ")
11
+ Assert.assertEquals("foo bar baz", result)
12
+ }
13
+
14
+ @Test
15
+ fun testExecuteWithPattern() {
16
+ val replacer = LowerCaseReplacer("[A-Z]{3}".toRegex())
17
+ val result = replacer.execute("Foo Bar BAZ")
18
+ Assert.assertEquals("Foo Bar baz", result)
19
+ }
20
+ }
@@ -0,0 +1,19 @@
1
+ package org.embulk.filter.gsub.replacer
2
+
3
+ import org.junit.Assert
4
+ import org.junit.Test
5
+
6
+ class RegexReplacerTest {
7
+ @Test
8
+ fun testExecute() {
9
+ val optionConfig = RegexOptionConfig()
10
+ optionConfig.ignoreCase = true
11
+
12
+ val factory = RegexFactory()
13
+ val pattern = factory.create("(\\w*):\\s*(.*)", optionConfig)
14
+
15
+ val replacer = RegexReplacer(pattern, "$1 [$2]")
16
+ val result = replacer.execute("test: foo bar baz")
17
+ Assert.assertEquals("test [foo bar baz]", result)
18
+ }
19
+ }
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-gsub
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Sawada Tadashi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-01-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '10.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: gsub
42
+ email:
43
+ - cesare@mayverse.jp
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .circleci/config.yml
49
+ - .gitignore
50
+ - LICENSE.txt
51
+ - README.md
52
+ - build.gradle
53
+ - gradle.properties
54
+ - gradle/wrapper/gradle-wrapper.jar
55
+ - gradle/wrapper/gradle-wrapper.properties
56
+ - gradlew
57
+ - gradlew.bat
58
+ - lib/embulk/filter/gsub.rb
59
+ - src/main/kotlin/org/embulk/filter/gsub/ColumnReplacerFactory.kt
60
+ - src/main/kotlin/org/embulk/filter/gsub/ColumnVisitorImpl.kt
61
+ - src/main/kotlin/org/embulk/filter/gsub/GsubFilterPlugin.kt
62
+ - src/main/kotlin/org/embulk/filter/gsub/LowerCaseReplacerFactory.kt
63
+ - src/main/kotlin/org/embulk/filter/gsub/RegexReplacerFactory.kt
64
+ - src/main/kotlin/org/embulk/filter/gsub/SubstitutionRule.kt
65
+ - src/main/kotlin/org/embulk/filter/gsub/TextReplacerFactory.kt
66
+ - src/main/kotlin/org/embulk/filter/gsub/UpperCaseReplacerFactory.kt
67
+ - src/main/kotlin/org/embulk/filter/gsub/replacer/CombinedReplacer.kt
68
+ - src/main/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacer.kt
69
+ - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexFactory.kt
70
+ - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexOptionConfig.kt
71
+ - src/main/kotlin/org/embulk/filter/gsub/replacer/RegexReplacer.kt
72
+ - src/main/kotlin/org/embulk/filter/gsub/replacer/TextReplacer.kt
73
+ - src/main/kotlin/org/embulk/filter/gsub/replacer/UpperCaseReplacer.kt
74
+ - src/test/kotlin/org/embulk/filter/gsub/TestGsubFilterPlugin.kt
75
+ - src/test/kotlin/org/embulk/filter/gsub/replacer/CombinedReplacerTest.kt
76
+ - src/test/kotlin/org/embulk/filter/gsub/replacer/LowerCaseReplacerTest.kt
77
+ - src/test/kotlin/org/embulk/filter/gsub/replacer/RegexReplacerTest.kt
78
+ - classpath/annotations-13.0.jar
79
+ - classpath/embulk-filter-gsub-0.1.0.jar
80
+ - classpath/kotlin-stdlib-1.2.10.jar
81
+ homepage: https://github.com/cesare/embulk-filter-gsub
82
+ licenses:
83
+ - MIT
84
+ metadata: {}
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 2.1.9
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: Gsub filter plugin for Embulk
105
+ test_files: []