embulk 0.8.14-java → 0.8.15-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.15
7
8
  release/release-0.8.14
8
9
  release/release-0.8.13
9
10
  release/release-0.8.12
@@ -0,0 +1,17 @@
1
+ Release 0.8.15
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Added several rules option to rename filter plugin. [#494]
8
+
9
+ * http://www.embulk.org/docs/built-in.html#rename-filter-plugin
10
+
11
+ * Introduced SkipTransactionException to stop Embulk before the transaction starts. [#498]
12
+
13
+ * Added embulk-test package, which is an utility library to be used from the test cases of Java-based input plugins. [#503]
14
+
15
+ Release Date
16
+ ------------------
17
+ 2016-11-04
@@ -7,13 +7,28 @@ import org.embulk.config.ConfigSource;
7
7
  import org.embulk.config.Task;
8
8
  import org.embulk.config.TaskSource;
9
9
  import org.embulk.spi.Column;
10
+ import org.embulk.spi.Exec;
10
11
  import org.embulk.spi.FilterPlugin;
11
12
  import org.embulk.spi.PageOutput;
12
13
  import org.embulk.spi.Schema;
13
14
 
15
+ import com.google.common.base.Optional;
16
+ import com.google.common.base.Strings;
17
+ import com.google.common.collect.ImmutableMap;
18
+ import com.google.common.collect.ImmutableSet;
19
+
20
+ import org.slf4j.Logger;
21
+
22
+ import java.util.ArrayList;
23
+ import java.util.HashMap;
24
+ import java.util.HashSet;
14
25
  import java.util.List;
26
+ import java.util.Locale;
15
27
  import java.util.Map;
28
+ import java.util.regex.PatternSyntaxException;
16
29
 
30
+ import javax.validation.constraints.Min;
31
+ import javax.validation.constraints.Size;
17
32
 
18
33
  public class RenameFilterPlugin
19
34
  implements FilterPlugin
@@ -71,7 +86,6 @@ public class RenameFilterPlugin
71
86
  return output;
72
87
  }
73
88
 
74
-
75
89
  // Extending Task is required to be deserialized with ConfigSource.loadConfig()
76
90
  // although this Rule is not really a Task.
77
91
  // TODO(dmikurube): Revisit this to consider how not to extend Task for this.
@@ -82,12 +96,371 @@ public class RenameFilterPlugin
82
96
  String getRule();
83
97
  }
84
98
 
99
+ private interface CharacterTypesRule
100
+ extends Rule {
101
+ @Config("pass_types")
102
+ @ConfigDefault("[]")
103
+ List<String> getPassTypes();
104
+
105
+ @Config("pass_characters")
106
+ @ConfigDefault("\"\"")
107
+ String getPassCharacters();
108
+
109
+ @Config("replace")
110
+ @ConfigDefault("\"_\"")
111
+ @Size(min = 1, max = 1)
112
+ String getReplace();
113
+ }
114
+
115
+ private interface FirstCharacterTypesRule
116
+ extends Rule {
117
+ @Config("replace")
118
+ @ConfigDefault("null")
119
+ Optional<String> getReplace();
120
+
121
+ @Config("pass_types")
122
+ @ConfigDefault("[]")
123
+ List<String> getPassTypes();
124
+
125
+ @Config("pass_characters")
126
+ @ConfigDefault("\"\"")
127
+ String getPassCharacters();
128
+
129
+ @Config("prefix")
130
+ @ConfigDefault("null")
131
+ Optional<String> getPrefix();
132
+ }
133
+
134
+ private interface TruncateRule
135
+ extends Rule {
136
+ @Config("max_length")
137
+ @ConfigDefault("128")
138
+ @Min(0)
139
+ int getMaxLength();
140
+ }
141
+
142
+ private interface RegexReplaceRule
143
+ extends Rule {
144
+ @Config("match")
145
+ String getMatch();
146
+
147
+ @Config("replace")
148
+ String getReplace();
149
+ }
150
+
151
+ private interface UniqueNumberSuffixRule
152
+ extends Rule {
153
+ @Config("delimiter")
154
+ @ConfigDefault("\"_\"")
155
+ String getDelimiter();
156
+
157
+ @Config("digits")
158
+ @ConfigDefault("null")
159
+ Optional<Integer> getDigits();
160
+
161
+ @Config("max_length")
162
+ @ConfigDefault("null")
163
+ Optional<Integer> getMaxLength();
164
+
165
+ @Config("offset")
166
+ @ConfigDefault("1")
167
+ @Min(0)
168
+ int getOffset();
169
+ }
170
+
85
171
  private Schema applyRule(ConfigSource ruleConfig, Schema inputSchema) throws ConfigException
86
172
  {
87
173
  Rule rule = ruleConfig.loadConfig(Rule.class);
88
174
  switch (rule.getRule()) {
175
+ case "character_types":
176
+ return applyCharacterTypesRule(inputSchema, ruleConfig.loadConfig(CharacterTypesRule.class));
177
+ case "first_character_types":
178
+ return applyFirstCharacterTypesRule(inputSchema, ruleConfig.loadConfig(FirstCharacterTypesRule.class));
179
+ case "lower_to_upper":
180
+ return applyLowerToUpperRule(inputSchema);
181
+ case "regex_replace":
182
+ return applyRegexReplaceRule(inputSchema, ruleConfig.loadConfig(RegexReplaceRule.class));
183
+ case "truncate":
184
+ return applyTruncateRule(inputSchema, ruleConfig.loadConfig(TruncateRule.class));
185
+ case "upper_to_lower":
186
+ return applyUpperToLowerRule(inputSchema);
187
+ case "unique_number_suffix":
188
+ return applyUniqueNumberSuffixRule(inputSchema, ruleConfig.loadConfig(UniqueNumberSuffixRule.class));
89
189
  default:
90
190
  throw new ConfigException("Renaming rule \"" +rule+ "\" is unknown");
91
191
  }
92
192
  }
193
+
194
+ private Schema applyCharacterTypesRule(Schema inputSchema, CharacterTypesRule rule) {
195
+ final List<String> passTypes = rule.getPassTypes();
196
+ final String passCharacters = rule.getPassCharacters();
197
+ final String replace = rule.getReplace();
198
+
199
+ if (replace.isEmpty()) {
200
+ throw new ConfigException("\"replace\" in \"character_types\" must not be explicitly empty");
201
+ }
202
+ if (replace.length() != 1) {
203
+ throw new ConfigException("\"replace\" in \"character_types\" must contain just 1 character");
204
+ }
205
+ // TODO(dmikurube): Revisit this for better escaping.
206
+ if (passCharacters.contains("\\E")) {
207
+ throw new ConfigException("\"pass_characters\" in \"character_types\" must not contain \"\\E\"");
208
+ }
209
+
210
+ StringBuilder regexBuilder = new StringBuilder();
211
+ regexBuilder.append("[^");
212
+ for (String target : passTypes) {
213
+ if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
214
+ regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
215
+ } else {
216
+ throw new ConfigException("\"" +target+ "\" is an unknown character type keyword");
217
+ }
218
+ }
219
+ if (!passCharacters.isEmpty()) {
220
+ regexBuilder.append("\\Q");
221
+ regexBuilder.append(passCharacters);
222
+ regexBuilder.append("\\E");
223
+ }
224
+ regexBuilder.append("]");
225
+
226
+ Schema.Builder schemaBuilder = Schema.builder();
227
+ for (Column column : inputSchema.getColumns()) {
228
+ schemaBuilder.add(column.getName().replaceAll(regexBuilder.toString(), replace), column.getType());
229
+ }
230
+ return schemaBuilder.build();
231
+ }
232
+
233
+ private Schema applyFirstCharacterTypesRule(Schema inputSchema, FirstCharacterTypesRule rule) {
234
+ final Optional<String> replace = rule.getReplace();
235
+ final List<String> passTypes = rule.getPassTypes();
236
+ final String passCharacters = rule.getPassCharacters();
237
+ final Optional<String> prefix = rule.getPrefix();
238
+
239
+ if (replace.isPresent() && replace.get().length() != 1) {
240
+ throw new ConfigException("\"replace\" in \"first_character_types\" must contain just 1 character if specified");
241
+ }
242
+ if (prefix.isPresent() && prefix.get().length() != 1) {
243
+ throw new ConfigException("\"prefix\" in \"first_character_types\" must contain just 1 character if specified");
244
+ }
245
+ if (prefix.isPresent() && replace.isPresent()) {
246
+ throw new ConfigException("\"replace\" and \"prefix\" in \"first_character_types\" must not be specified together");
247
+ }
248
+ if ((!prefix.isPresent()) && (!replace.isPresent())) {
249
+ throw new ConfigException("Either of \"replace\" or \"prefix\" must be specified in \"first_character_types\"");
250
+ }
251
+ // TODO(dmikurube): Revisit this for better escaping.
252
+ if (passCharacters.contains("\\E")) {
253
+ throw new ConfigException("\"pass_characters\" in \"first_character_types\" must not contain \"\\E\"");
254
+ }
255
+
256
+ StringBuilder regexBuilder = new StringBuilder();
257
+ regexBuilder.append("^[^");
258
+ for (String target : passTypes) {
259
+ if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
260
+ regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
261
+ } else {
262
+ throw new ConfigException("\"" +target+ "\" is an unknown character type keyword");
263
+ }
264
+ }
265
+ if (!passCharacters.isEmpty()) {
266
+ regexBuilder.append("\\Q");
267
+ regexBuilder.append(passCharacters);
268
+ regexBuilder.append("\\E");
269
+ }
270
+ regexBuilder.append("].*");
271
+
272
+ Schema.Builder schemaBuidler = Schema.builder();
273
+ for (Column column : inputSchema.getColumns()) {
274
+ String name = column.getName();
275
+ if (name.matches(regexBuilder.toString())) {
276
+ if (replace.isPresent()) {
277
+ name = replace.get() + name.substring(1);
278
+ }
279
+ else if (prefix.isPresent()) {
280
+ name = prefix.get() + name;
281
+ }
282
+ }
283
+ schemaBuidler.add(name, column.getType());
284
+ }
285
+ return schemaBuidler.build();
286
+ }
287
+
288
+ private Schema applyLowerToUpperRule(Schema inputSchema) {
289
+ Schema.Builder builder = Schema.builder();
290
+ for (Column column : inputSchema.getColumns()) {
291
+ builder.add(column.getName().toUpperCase(Locale.ENGLISH), column.getType());
292
+ }
293
+ return builder.build();
294
+ }
295
+
296
+ private Schema applyTruncateRule(Schema inputSchema, TruncateRule rule) {
297
+ Schema.Builder builder = Schema.builder();
298
+ for (Column column : inputSchema.getColumns()) {
299
+ if (column.getName().length() <= rule.getMaxLength()) {
300
+ builder.add(column.getName(), column.getType());
301
+ }
302
+ else {
303
+ try {
304
+ builder.add(column.getName().substring(0, rule.getMaxLength()), column.getType());
305
+ }
306
+ catch (IndexOutOfBoundsException ex) {
307
+ logger.error("FATAL unexpected error in \"truncate\" rule: substring failed.");
308
+ throw new AssertionError("FATAL unexpected error in \"truncate\" rule: substring failed.", ex);
309
+ }
310
+ }
311
+ }
312
+ return builder.build();
313
+ }
314
+
315
+ private Schema applyUpperToLowerRule(Schema inputSchema) {
316
+ Schema.Builder builder = Schema.builder();
317
+ for (Column column : inputSchema.getColumns()) {
318
+ builder.add(column.getName().toLowerCase(Locale.ENGLISH), column.getType());
319
+ }
320
+ return builder.build();
321
+ }
322
+
323
+ private Schema applyRegexReplaceRule(Schema inputSchema, RegexReplaceRule rule) {
324
+ final String match = rule.getMatch();
325
+ final String replace = rule.getReplace();
326
+
327
+ Schema.Builder builder = Schema.builder();
328
+ for (Column column : inputSchema.getColumns()) {
329
+ // TODO(dmikurube): Check if we need a kind of sanitization?
330
+ try {
331
+ builder.add(column.getName().replaceAll(match, replace), column.getType());
332
+ }
333
+ catch (PatternSyntaxException ex) {
334
+ throw new ConfigException(ex);
335
+ }
336
+ }
337
+ return builder.build();
338
+ }
339
+
340
+ /**
341
+ * Resolves conflicting column names by suffixing numbers.
342
+ *
343
+ * Conflicts are resolved by the following rules. The rules should not be changed casually because changing the
344
+ * rules breaks compatibility.
345
+ *
346
+ * 1. Count all duplicates in the original column names. Indexes are counted up per original column name.
347
+ * 2. Fix new column names from the left to the right
348
+ * - Try to append the current index for the original column name (with truncation if requested (not implemented))
349
+ * - Fix the new name if no duplication is found with fixed column names on the left and original column names
350
+ * - Retry with an index incremented if a duplication is found with fixed column names on the left
351
+ *
352
+ * Examples:
353
+ * [c, c1, c1, c2, c, c3]
354
+ * ==> [c, c1, c1_2, c2, c_2, c3]
355
+ *
356
+ * If a newly suffixed name newly conflicts with other columns, the index is just skipped. For example:
357
+ * [c, c, c_0, c_1, c_2]
358
+ * ==> [c, c_3, c_0, c_1, c_2]
359
+ *
360
+ * If truncation is requested simultaneously with uniqueness (not implemented), it should work like:
361
+ * [co, c, co , c , co , c , ..., co , c , co , c , co , c ]
362
+ * ==> [co, c, co_2, c_2, co_3, c_3, ..., co_9, c_9, c_10, c_11, c_12, c_13] (max_length:4)
363
+ *
364
+ * [co, co , co , ..., co , c, c , ..., c , co , c , co , c , co , c ]
365
+ * ==> [co, co_2, co_3, ..., co_9, c, c_2, ..., c_7, c_10, c_8, c_11, c_9, c_12, c_13] (max_length:4)
366
+ *
367
+ * Note that a delimiter should not be omitted. Recurring conflicts may confuse users.
368
+ * [c, c, c, ..., c, c, c, c, c1, c1, c1]
369
+ * NG: [c, c2, c3, ..., c10, c11, c12, c13, c1, c12, c13] (not unique!)
370
+ * ==> [c, c2, c3, ..., c10, c11, c12, c13, c1, c14, c15] (confusing)
371
+ */
372
+ private Schema applyUniqueNumberSuffixRule(Schema inputSchema, UniqueNumberSuffixRule rule) {
373
+ final String delimiter = rule.getDelimiter();
374
+ final Optional<Integer> digits = rule.getDigits();
375
+ final Optional<Integer> maxLength = rule.getMaxLength();
376
+ final int offset = rule.getOffset();
377
+
378
+ // |delimiter| must consist of just 1 character to check quickly that it does not contain any digit.
379
+ if (delimiter == null || delimiter.length() != 1 || Character.isDigit(delimiter.charAt(0))) {
380
+ throw new ConfigException("\"delimiter\" in rule \"unique_number_suffix\" must contain just 1 non-digit character");
381
+ }
382
+ if (maxLength.isPresent() && maxLength.get() < minimumMaxLengthInUniqueNumberSuffix) {
383
+ throw new ConfigException("\"max_length\" in rule \"unique_number_suffix\" must be larger than " +(minimumMaxLengthInUniqueNumberSuffix-1));
384
+ }
385
+ if (maxLength.isPresent() && digits.isPresent() && maxLength.get() < digits.get() + delimiter.length()) {
386
+ throw new ConfigException("\"max_length\" in rule \"unique_number_suffix\" must be larger than \"digits\"");
387
+ }
388
+ int digitsOfNumberOfColumns = Integer.toString(inputSchema.getColumnCount() + offset - 1).length();
389
+ if (maxLength.isPresent() && maxLength.get() <= digitsOfNumberOfColumns) {
390
+ throw new ConfigException("\"max_length\" in rule \"unique_number_suffix\" must be larger than digits of ((number of columns) + \"offset\" - 1)");
391
+ }
392
+ if (digits.isPresent() && digits.get() <= digitsOfNumberOfColumns) {
393
+ throw new ConfigException("\"digits\" in rule \"unique_number_suffix\" must be larger than digits of ((number of columns) + \"offset\" - 1)");
394
+ }
395
+
396
+ // Columns should not be truncated here initially. Uniqueness should be identified before truncated.
397
+
398
+ // Iterate for initial states.
399
+ HashSet<String> originalColumnNames = new HashSet<>();
400
+ HashMap<String, Integer> columnNameCountups = new HashMap<>();
401
+ for (Column column : inputSchema.getColumns()) {
402
+ originalColumnNames.add(column.getName());
403
+ columnNameCountups.put(column.getName(), offset);
404
+ }
405
+
406
+ Schema.Builder outputBuilder = Schema.builder();
407
+
408
+ HashSet<String> fixedColumnNames = new HashSet<>();
409
+ for (Column column : inputSchema.getColumns()) {
410
+ String truncatedName = column.getName();
411
+ if (column.getName().length() > maxLength.or(Integer.MAX_VALUE)) {
412
+ truncatedName = column.getName().substring(0, maxLength.get());
413
+ }
414
+
415
+ // Fix with the new name candidate if the new name does not conflict with the fixed names on the left.
416
+ // Conflicts with original names do not matter here.
417
+ if (!fixedColumnNames.contains(truncatedName)) {
418
+ // The original name is counted up.
419
+ columnNameCountups.put(column.getName(), columnNameCountups.get(column.getName()) + 1);
420
+ // The truncated name is fixed.
421
+ fixedColumnNames.add(truncatedName);
422
+ outputBuilder.add(truncatedName, column.getType());
423
+ continue;
424
+ }
425
+
426
+ int index = columnNameCountups.get(column.getName());
427
+ String concatenatedName;
428
+ do {
429
+ // This can be replaced with String#format(Locale.ENGLISH, ...), but Java's String#format does not
430
+ // have variable widths ("%*d" in C's printf). It cannot be very simple with String#format.
431
+ String differentiatorString = Integer.toString(index);
432
+ if (digits.isPresent() && (digits.get() > differentiatorString.length())) {
433
+ differentiatorString =
434
+ Strings.repeat("0", digits.get() - differentiatorString.length()) + differentiatorString;
435
+ }
436
+ differentiatorString = delimiter + differentiatorString;
437
+ concatenatedName = column.getName() + differentiatorString;
438
+ if (concatenatedName.length() > maxLength.or(Integer.MAX_VALUE)) {
439
+ concatenatedName =
440
+ column.getName().substring(0, maxLength.get() - differentiatorString.length())
441
+ + differentiatorString;
442
+ }
443
+ ++index;
444
+ // Conflicts with original names matter when creating new names with suffixes.
445
+ } while (fixedColumnNames.contains(concatenatedName) || originalColumnNames.contains(concatenatedName));
446
+ // The original name is counted up.
447
+ columnNameCountups.put(column.getName(), index);
448
+ // The concatenated&truncated name is fixed.
449
+ fixedColumnNames.add(concatenatedName);
450
+ outputBuilder.add(concatenatedName, column.getType());
451
+ }
452
+ return outputBuilder.build();
453
+ }
454
+
455
+ private static final ImmutableMap<String, String> CHARACTER_TYPE_KEYWORDS =
456
+ new ImmutableMap.Builder<String, String>().put("a-z", "a-z")
457
+ .put("A-Z", "A-Z")
458
+ .put("0-9", "0-9")
459
+ .build();
460
+
461
+ // TODO(dmikurube): Revisit the limitation.
462
+ // It should be practically acceptable to assume any output accepts column names with 8 characters at least...
463
+ private static final int minimumMaxLengthInUniqueNumberSuffix = 8;
464
+
465
+ private final Logger logger = Exec.getLogger(getClass());
93
466
  }
@@ -2,19 +2,28 @@ package org.embulk.standards;
2
2
 
3
3
  import com.google.common.collect.ImmutableList;
4
4
  import com.google.common.collect.ImmutableMap;
5
+ import com.google.common.collect.Iterables;
5
6
  import org.embulk.EmbulkTestRuntime;
6
7
  import org.embulk.config.ConfigException;
7
8
  import org.embulk.config.ConfigSource;
8
9
  import org.embulk.config.TaskSource;
10
+ import org.embulk.config.TaskValidationException;
9
11
  import org.embulk.spi.Column;
10
12
  import org.embulk.spi.FilterPlugin;
11
13
  import org.embulk.spi.Exec;
12
14
  import org.embulk.spi.Schema;
13
15
  import org.embulk.spi.SchemaConfigException;
14
16
  import org.embulk.standards.RenameFilterPlugin.PluginTask;
17
+
15
18
  import org.junit.Before;
16
19
  import org.junit.Rule;
17
20
  import org.junit.Test;
21
+ import org.junit.rules.ExpectedException;
22
+
23
+ import java.util.ArrayList;
24
+ import java.util.Arrays;
25
+ import java.util.HashMap;
26
+ import java.util.regex.PatternSyntaxException;
18
27
 
19
28
  import static org.embulk.spi.type.Types.STRING;
20
29
  import static org.embulk.spi.type.Types.TIMESTAMP;
@@ -27,11 +36,16 @@ public class TestRenameFilterPlugin
27
36
  @Rule
28
37
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
29
38
 
39
+ @Rule
40
+ public ExpectedException exception = ExpectedException.none();
41
+
30
42
  private final Schema SCHEMA = Schema.builder()
31
43
  .add("_c0", STRING)
32
44
  .add("_c1", TIMESTAMP)
33
45
  .build();
34
46
 
47
+ private final String DEFAULT = "__use_default__";
48
+
35
49
  private RenameFilterPlugin filter;
36
50
 
37
51
  @Before
@@ -137,4 +151,862 @@ public class TestRenameFilterPlugin
137
151
  assertTrue(t instanceof ConfigException);
138
152
  }
139
153
  }
154
+
155
+ @Test
156
+ public void checkRuleLowerToUpperRule()
157
+ {
158
+ final String original[] = { "_C0", "_C1", "_c2" };
159
+ final String expected[] = { "_C0", "_C1", "_C2" };
160
+ ConfigSource config = Exec.newConfigSource().set("rules",
161
+ ImmutableList.of(ImmutableMap.of("rule", "lower_to_upper")));
162
+ renameAndCheckSchema(config, original, expected);
163
+ }
164
+
165
+ @Test
166
+ public void checkTruncateRule()
167
+ {
168
+ final String original[] = { "foo", "bar", "gj", "foobar", "foobarbaz" };
169
+ final String expected[] = { "foo", "bar", "gj", "foo", "foo" };
170
+ ConfigSource config = Exec.newConfigSource().set("rules",
171
+ ImmutableList.of(ImmutableMap.of("rule", "truncate", "max_length", "3")));
172
+ renameAndCheckSchema(config, original, expected);
173
+ }
174
+
175
+ @Test
176
+ public void checkTruncateRuleDefault()
177
+ {
178
+ final String original[] = {
179
+ "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" };
180
+ final String expected[] = {
181
+ "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678" };
182
+ ConfigSource config = Exec.newConfigSource().set("rules",
183
+ ImmutableList.of(ImmutableMap.of("rule", "truncate")));
184
+ renameAndCheckSchema(config, original, expected);
185
+ }
186
+
187
+ @Test
188
+ public void checkTruncateRuleNegative()
189
+ {
190
+ final String original[] = { "foo" };
191
+ ConfigSource config = Exec.newConfigSource().set("rules",
192
+ ImmutableList.of(ImmutableMap.of("rule", "truncate", "max_length", -1)));
193
+ exception.expect(TaskValidationException.class);
194
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
195
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
196
+ renameAndCheckSchema(config, original, original);
197
+ }
198
+
199
+ @Test
200
+ public void checkRuleUpperToLowerRule()
201
+ {
202
+ final String original[] = { "_C0", "_C1", "_c2" };
203
+ final String expected[] = { "_c0", "_c1", "_c2" };
204
+ ConfigSource config = Exec.newConfigSource().set("rules",
205
+ ImmutableList.of(ImmutableMap.of("rule", "upper_to_lower")));
206
+ renameAndCheckSchema(config, original, expected);
207
+ }
208
+
209
+ @Test
210
+ public void checkCharacterTypesRulePassAlphabet()
211
+ {
212
+ final String original[] = { "Internal$Foo0123--Bar" };
213
+ final String expected[] = { "Internal_Foo______Bar" };
214
+ final String pass_types[] = { "a-z", "A-Z" };
215
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
216
+ }
217
+
218
+ @Test
219
+ public void checkCharacterTypesRulePassAlphanumeric()
220
+ {
221
+ final String original[] = { "Internal$Foo0123--Bar" };
222
+ final String expected[] = { "Internal_Foo0123__Bar" };
223
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
224
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
225
+ }
226
+
227
+ @Test
228
+ public void checkCharacterTypesRulePassLowercase()
229
+ {
230
+ final String original[] = { "Internal$Foo0123--Bar" };
231
+ final String expected[] = { "_nternal__oo_______ar" };
232
+ final String pass_types[] = { "a-z" };
233
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
234
+ }
235
+
236
+ @Test
237
+ public void checkCharacterTypesRulePassLowerwording()
238
+ {
239
+ final String original[] = { "Internal$Foo_0123--Bar" };
240
+ final String expected[] = { "-nternal--oo_0123---ar" };
241
+ final String pass_types[] = { "a-z", "0-9" };
242
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "_", "-");
243
+ }
244
+
245
+ @Test
246
+ public void checkCharacterTypesRulePassNumeric()
247
+ {
248
+ final String original[] = { "Internal$Foo_0123--Bar" };
249
+ final String expected[] = { "_____________0123_____" };
250
+ final String pass_types[] = { "0-9" };
251
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
252
+ }
253
+
254
+ @Test
255
+ public void checkCharacterTypesRulePassUppercase()
256
+ {
257
+ final String original[] = { "Internal$Foo_0123--Bar" };
258
+ final String expected[] = { "I________F_________B__" };
259
+ final String pass_types[] = { "A-Z" };
260
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
261
+ }
262
+
263
+ @Test
264
+ public void checkCharacterTypesRulePassUpperwording()
265
+ {
266
+ final String original[] = { "Internal$Foo_0123--Bar" };
267
+ final String expected[] = { "I--------F--_0123--B--" };
268
+ final String pass_types[] = { "A-Z", "0-9" };
269
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "_", "-");
270
+ }
271
+
272
+ @Test
273
+ public void checkCharacterTypesRulePassWording()
274
+ {
275
+ final String original[] = { "Internal$Foo_0123--Bar" };
276
+ final String expected[] = { "Internal-Foo_0123--Bar" };
277
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
278
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "_", "-");
279
+ }
280
+
281
+ @Test
282
+ public void checkCharacterTypesRulePassCombination()
283
+ {
284
+ final String original[] = { "@Foobar0123_$" };
285
+ final String expected[] = { "__oobar0123__" };
286
+ final String pass_types[] = { "0-9", "a-z" };
287
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
288
+ }
289
+
290
+ @Test
291
+ public void checkCharacterTypesRuleLongReplace()
292
+ {
293
+ final String original[] = { "fooBAR" };
294
+ final String pass_types[] = { "a-z" };
295
+ exception.expect(TaskValidationException.class);
296
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
297
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
298
+ checkCharacterTypesRuleInternal(original, original, pass_types, "", "___");
299
+ }
300
+
301
+ @Test
302
+ public void checkCharacterTypesRuleEmptyReplace()
303
+ {
304
+ final String original[] = { "fooBAR" };
305
+ final String pass_types[] = { "a-z" };
306
+ exception.expect(TaskValidationException.class);
307
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
308
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
309
+ checkCharacterTypesRuleInternal(original, original, pass_types, "", "");
310
+ }
311
+
312
+ // TODO(dmikurube): Test a nil/null replace.
313
+ // - rule: character_types
314
+ // delimiter:
315
+
316
+ @Test
317
+ public void checkCharacterTypesRuleUnknownType()
318
+ {
319
+ final String original[] = { "fooBAR" };
320
+ final String pass_types[] = { "some_unknown_keyword" };
321
+ exception.expect(ConfigException.class);
322
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
323
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
324
+ checkCharacterTypesRuleInternal(original, original, pass_types, "");
325
+ }
326
+
327
+ @Test
328
+ public void checkCharacterTypesRuleForbiddenCharSequence()
329
+ {
330
+ final String original[] = { "fooBAR" };
331
+ final String pass_types[] = {};
332
+ exception.expect(ConfigException.class);
333
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
334
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
335
+ checkCharacterTypesRuleInternal(original, original, pass_types, "\\E");
336
+ }
337
+
338
+ private void checkCharacterTypesRuleInternal(
339
+ final String original[],
340
+ final String expected[],
341
+ final String pass_types[],
342
+ final String pass_characters)
343
+ {
344
+ checkCharacterTypesRuleInternal(original, expected, pass_types, pass_characters, DEFAULT);
345
+ }
346
+
347
+ private void checkCharacterTypesRuleInternal(
348
+ final String original[],
349
+ final String expected[],
350
+ final String pass_types[],
351
+ final String pass_characters,
352
+ final String replace)
353
+ {
354
+ HashMap<String, Object> parameters = new HashMap<>();
355
+ parameters.put("rule", "character_types");
356
+ if (pass_types != null) {
357
+ parameters.put("pass_types", ImmutableList.copyOf(pass_types));
358
+ }
359
+ if (!pass_characters.equals(DEFAULT)) {
360
+ parameters.put("pass_characters", pass_characters);
361
+ }
362
+ if (!replace.equals(DEFAULT)) {
363
+ parameters.put("replace", replace);
364
+ }
365
+ ConfigSource config = Exec.newConfigSource().set("rules",
366
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
367
+ renameAndCheckSchema(config, original, expected);
368
+ }
369
+
370
+ @Test
371
+ public void checkRegexReplaceRule1()
372
+ {
373
+ final String original[] = { "foobarbaz" };
374
+ final String expected[] = { "hogebarbaz" };
375
+ checkRegexReplaceRuleInternal(original, expected, "foo", "hoge");
376
+ }
377
+
378
+ @Test
379
+ public void checkRegexReplaceRule2()
380
+ {
381
+ final String original[] = { "200_dollars" };
382
+ final String expected[] = { "USD200" };
383
+ checkRegexReplaceRuleInternal(original, expected, "([0-9]+)_dollars", "USD$1");
384
+ }
385
+
386
+ private void checkRegexReplaceRuleInternal(
387
+ final String original[],
388
+ final String expected[],
389
+ final String match,
390
+ final String replace)
391
+ {
392
+ HashMap<String, Object> parameters = new HashMap<>();
393
+ parameters.put("rule", "regex_replace");
394
+ parameters.put("match", match);
395
+ parameters.put("replace", replace);
396
+ ConfigSource config = Exec.newConfigSource().set("rules",
397
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
398
+ renameAndCheckSchema(config, original, expected);
399
+ }
400
+
401
+ @Test
402
+ public void checkFirstCharacterTypesRuleReplaceSingleHyphen()
403
+ {
404
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
405
+ final String expected[] = { "_oo", "_12foo", "_bar", "_AZ", "_ban", "_jar", "_zip", "-zap" };
406
+ final String pass_types[] = {};
407
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types, "-");
408
+ }
409
+
410
+ @Test
411
+ public void checkFirstCharacterTypesRuleReplaceMultipleSingles()
412
+ {
413
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
414
+ final String expected[] = { "_oo", "_12foo", "@bar", "_AZ", "_ban", "_jar", "*zip", "-zap" };
415
+ final String pass_types[] = {};
416
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types, "-@*");
417
+ }
418
+
419
+ @Test
420
+ public void checkFirstCharacterTypesRuleReplaceAlphabet()
421
+ {
422
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
423
+ final String expected[] = { "foo", "_12foo", "_bar", "BAZ", "_ban", "_jar", "_zip", "_zap" };
424
+ final String pass_types[] = { "a-z", "A-Z" };
425
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
426
+ }
427
+
428
+ @Test
429
+ public void checkFirstCharacterTypesRuleReplaceAlphanumeric()
430
+ {
431
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
432
+ final String expected[] = { "foo", "012foo", "_bar", "BAZ", "_ban", "_jar", "_zip", "_zap" };
433
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
434
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
435
+ }
436
+
437
+ @Test
438
+ public void checkFirstCharacterTypesRuleReplaceLowercase()
439
+ {
440
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
441
+ final String expected[] = { "foo", "_12foo", "_bar", "_AZ", "_ban", "_jar", "_zip", "_zap" };
442
+ final String pass_types[] = { "a-z" };
443
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
444
+ }
445
+
446
+ @Test
447
+ public void checkFirstCharacterTypesRuleReplaceLowerwording()
448
+ {
449
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
450
+ final String expected[] = { "foo", "012foo", "-bar", "-AZ", "-ban", "_jar", "-zip", "-zap" };
451
+ final String pass_types[] = { "a-z", "0-9" };
452
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "-", pass_types, "_");
453
+ }
454
+
455
+ @Test
456
+ public void checkFirstCharacterTypesRuleReplaceNumeric()
457
+ {
458
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
459
+ final String expected[] = { "_oo", "012foo", "_bar", "_AZ", "_ban", "_jar", "_zip", "_zap" };
460
+ final String pass_types[] = { "0-9" };
461
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
462
+ }
463
+
464
+ @Test
465
+ public void checkFirstCharacterTypesRuleReplaceUppercase()
466
+ {
467
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
468
+ final String expected[] = { "_oo", "_12foo", "_bar", "BAZ", "_ban", "_jar", "_zip", "_zap" };
469
+ final String pass_types[] = { "A-Z" };
470
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
471
+ }
472
+
473
+ @Test
474
+ public void checkFirstCharacterTypesRuleReplaceUpperwording()
475
+ {
476
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
477
+ final String expected[] = { "-oo", "012foo", "-bar", "BAZ", "-ban", "_jar", "-zip", "-zap" };
478
+ final String pass_types[] = { "A-Z", "0-9" };
479
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "-", pass_types, "_");
480
+ }
481
+
482
+ @Test
483
+ public void checkFirstCharacterTypesRuleReplaceWording()
484
+ {
485
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
486
+ final String expected[] = { "foo", "012foo", "$bar", "BAZ", "$ban", "_jar", "$zip", "$zap" };
487
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
488
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "$", pass_types, "_");
489
+ }
490
+
491
+ @Test
492
+ public void checkFirstCharacterTypesRuleReplaceUnknownFirst()
493
+ {
494
+ final String original[] = { "foo" };
495
+ final String pass_types[] = { "some_unknown_type" };
496
+ exception.expect(ConfigException.class);
497
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
498
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
499
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "_", pass_types);
500
+ }
501
+
502
+ @Test
503
+ public void checkFirstCharacterTypesRuleReplaceForbiddenCharSequence()
504
+ {
505
+ final String original[] = { "foo" };
506
+ final String pass_types[] = {};
507
+ exception.expect(ConfigException.class);
508
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
509
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
510
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "_", pass_types, "\\E");
511
+ }
512
+
513
+ @Test
514
+ public void checkFirstCharacterTypesRulePrefixSingleHyphen()
515
+ {
516
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
517
+ final String expected[] = { "_foo", "_012foo", "_@bar", "_BAZ", "_&ban", "__jar", "_*zip", "-zap" };
518
+ final String pass_types[] = {};
519
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types, "-");
520
+ }
521
+
522
+ @Test
523
+ public void checkFirstCharacterTypesRulePrefixMultipleSingles()
524
+ {
525
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
526
+ final String expected[] = { "_foo", "_012foo", "@bar", "_BAZ", "_&ban", "__jar", "*zip", "-zap" };
527
+ final String pass_types[] = {};
528
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types, "-@*");
529
+ }
530
+
531
+ @Test
532
+ public void checkFirstCharacterTypesRulePrefixAlphabet()
533
+ {
534
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
535
+ final String expected[] = { "foo", "_012foo", "_@bar", "BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
536
+ final String pass_types[] = { "a-z", "A-Z" };
537
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
538
+ }
539
+
540
+ @Test
541
+ public void checkFirstCharacterTypesRulePrefixAlphanumeric()
542
+ {
543
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
544
+ final String expected[] = { "foo", "012foo", "_@bar", "BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
545
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
546
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
547
+ }
548
+
549
+ @Test
550
+ public void checkFirstCharacterTypesRulePrefixLowercase()
551
+ {
552
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
553
+ final String expected[] = { "foo", "_012foo", "_@bar", "_BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
554
+ final String pass_types[] = { "a-z" };
555
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
556
+ }
557
+
558
+ @Test
559
+ public void checkFirstCharacterTypesRulePrefixLowerwording()
560
+ {
561
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
562
+ final String expected[] = { "foo", "012foo", "-@bar", "-BAZ", "-&ban", "_jar", "-*zip", "--zap" };
563
+ final String pass_types[] = { "a-z", "0-9" };
564
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "-", pass_types, "_");
565
+ }
566
+
567
+ @Test
568
+ public void checkFirstCharacterTypesRulePrefixNumeric()
569
+ {
570
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
571
+ final String expected[] = { "_foo", "012foo", "_@bar", "_BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
572
+ final String pass_types[] = { "0-9" };
573
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
574
+ }
575
+
576
+ @Test
577
+ public void checkFirstCharacterTypesRulePrefixUppercase()
578
+ {
579
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
580
+ final String expected[] = { "_foo", "_012foo", "_@bar", "BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
581
+ final String pass_types[] = { "A-Z" };
582
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
583
+ }
584
+
585
+ @Test
586
+ public void checkFirstCharacterTypesRulePrefixUpperwording()
587
+ {
588
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
589
+ final String expected[] = { "-foo", "012foo", "-@bar", "BAZ", "-&ban", "_jar", "-*zip", "--zap" };
590
+ final String pass_types[] = { "A-Z", "0-9" };
591
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "-", pass_types, "_");
592
+ }
593
+
594
+ @Test
595
+ public void checkFirstCharacterTypesRulePrefixWording()
596
+ {
597
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
598
+ final String expected[] = { "foo", "012foo", "$@bar", "BAZ", "$&ban", "_jar", "$*zip", "$-zap" };
599
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
600
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "$", pass_types, "_");
601
+ }
602
+
603
+ @Test
604
+ public void checkFirstCharacterTypesRuleEmptyPrefix()
605
+ {
606
+ final String original[] = { "foo" };
607
+ final String pass_types[] = {};
608
+ exception.expect(ConfigException.class);
609
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
610
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
611
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "", pass_types);
612
+ }
613
+
614
+ @Test
615
+ public void checkFirstCharacterTypesRuleLongPrefix()
616
+ {
617
+ final String original[] = { "foo" };
618
+ final String pass_types[] = {};
619
+ exception.expect(ConfigException.class);
620
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
621
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
622
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "__", pass_types);
623
+ }
624
+
625
+ @Test
626
+ public void checkFirstCharacterTypesRuleEmptyReplace()
627
+ {
628
+ final String original[] = { "foo" };
629
+ final String pass_types[] = {};
630
+ exception.expect(ConfigException.class);
631
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
632
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
633
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "", pass_types);
634
+ }
635
+
636
+ @Test
637
+ public void checkFirstCharacterTypesRuleLongReplace()
638
+ {
639
+ final String original[] = { "foo" };
640
+ final String pass_types[] = {};
641
+ exception.expect(ConfigException.class);
642
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
643
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
644
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "__", pass_types);
645
+ }
646
+
647
+ @Test
648
+ public void checkFirstCharacterTypesRulePrefixUnknownFirst()
649
+ {
650
+ final String original[] = { "foo" };
651
+ final String pass_types[] = { "some_unknown_type" };
652
+ exception.expect(ConfigException.class);
653
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
654
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
655
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "_", pass_types);
656
+ }
657
+
658
+ @Test
659
+ public void checkFirstCharacterTypesRulePrefixForbiddenCharSequence()
660
+ {
661
+ final String original[] = { "foo" };
662
+ final String pass_types[] = {};
663
+ exception.expect(ConfigException.class);
664
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
665
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
666
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "\\E", pass_types);
667
+ }
668
+
669
+ @Test
670
+ public void checkFirstCharacterTypesRuleBothReplacePrefix()
671
+ {
672
+ HashMap<String, Object> parameters = new HashMap<>();
673
+ parameters.put("rule", "first_character_types");
674
+ parameters.put("replace", "_");
675
+ parameters.put("prefix", "_");
676
+ ConfigSource config = Exec.newConfigSource().set("rules",
677
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
678
+ exception.expect(ConfigException.class);
679
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
680
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
681
+ renameAndCheckSchema(config, new String[0], new String[0]);
682
+ }
683
+
684
+ @Test
685
+ public void checkFirstCharacterTypesRuleNeitherReplacePrefix()
686
+ {
687
+ HashMap<String, Object> parameters = new HashMap<>();
688
+ parameters.put("rule", "first_character_types");
689
+ ConfigSource config = Exec.newConfigSource().set("rules",
690
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
691
+ exception.expect(ConfigException.class);
692
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
693
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
694
+ renameAndCheckSchema(config, new String[0], new String[0]);
695
+ }
696
+
697
+ private void checkFirstCharacterTypesRuleReplaceInternal(
698
+ final String original[],
699
+ final String expected[],
700
+ final String replace,
701
+ final String pass_types[]) {
702
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, replace, pass_types, DEFAULT);
703
+ }
704
+
705
+ private void checkFirstCharacterTypesRuleReplaceInternal(
706
+ final String original[],
707
+ final String expected[],
708
+ final String replace,
709
+ final String pass_types[],
710
+ final String pass_characters)
711
+ {
712
+ HashMap<String, Object> parameters = new HashMap<>();
713
+ parameters.put("rule", "first_character_types");
714
+ if (pass_types.length > 0) {
715
+ parameters.put("pass_types", Arrays.asList(pass_types));
716
+ }
717
+ if (!pass_characters.equals(DEFAULT)) {
718
+ parameters.put("pass_characters", pass_characters);
719
+ }
720
+ if (!replace.equals(DEFAULT)) {
721
+ parameters.put("replace", replace);
722
+ }
723
+ ConfigSource config = Exec.newConfigSource().set("rules",
724
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
725
+ renameAndCheckSchema(config, original, expected);
726
+ }
727
+
728
+ private void checkFirstCharacterTypesRulePrefixInternal(
729
+ final String original[],
730
+ final String expected[],
731
+ final String prefix,
732
+ final String pass_types[]) {
733
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, prefix, pass_types, DEFAULT);
734
+ }
735
+
736
+ private void checkFirstCharacterTypesRulePrefixInternal(
737
+ final String original[],
738
+ final String expected[],
739
+ final String prefix,
740
+ final String pass_types[],
741
+ final String pass_characters)
742
+ {
743
+ HashMap<String, Object> parameters = new HashMap<>();
744
+ parameters.put("rule", "first_character_types");
745
+ if (pass_types.length > 0) {
746
+ parameters.put("pass_types", Arrays.asList(pass_types));
747
+ }
748
+ if (!pass_characters.equals(DEFAULT)) {
749
+ parameters.put("pass_characters", pass_characters);
750
+ }
751
+ if (!prefix.equals(DEFAULT)) {
752
+ parameters.put("prefix", prefix);
753
+ }
754
+ ConfigSource config = Exec.newConfigSource().set("rules",
755
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
756
+ renameAndCheckSchema(config, original, expected);
757
+ }
758
+
759
+ public void checkUniqueNumberSuffixRuleEmptyDelimiter()
760
+ {
761
+ final String columnNames[] = { "c" };
762
+ try {
763
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, "");
764
+ } catch (Throwable t) {
765
+ assertTrue(t instanceof ConfigException);
766
+ }
767
+ }
768
+
769
+ @Test
770
+ public void checkUniqueNumberSuffixRuleLongDelimiter()
771
+ {
772
+ final String columnNames[] = { "c" };
773
+ try {
774
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, "__");
775
+ } catch (Throwable t) {
776
+ assertTrue(t instanceof ConfigException);
777
+ }
778
+ }
779
+
780
+ @Test
781
+ public void checkUniqueNumberSuffixRuleDigitDelimiter()
782
+ {
783
+ final String columnNames[] = { "c" };
784
+ try {
785
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, "2");
786
+ } catch (Throwable t) {
787
+ assertTrue(t instanceof ConfigException);
788
+ }
789
+ }
790
+
791
+ @Test
792
+ public void checkUniqueNumberSuffixRuleShortMaxLength()
793
+ {
794
+ final String columnNames[] = { "c" };
795
+ try {
796
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, DEFAULT, -1, 7);
797
+ } catch (Throwable t) {
798
+ assertTrue(t instanceof ConfigException);
799
+ }
800
+ }
801
+
802
+ // TODO(dmikurube): Test a nil/null delimiter in "unique".
803
+ // - rule: unique
804
+ // delimiter:
805
+
806
+ @Test
807
+ public void checkUniqueNumberSuffixRule0()
808
+ {
809
+ final String originalColumnNames[] = { "a", "b", "c", "d", "e" };
810
+ final String expectedColumnNames[] = { "a", "b", "c", "d", "e" };
811
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
812
+ }
813
+
814
+ @Test
815
+ public void checkUniqueNumberSuffixRule1()
816
+ {
817
+ final String originalColumnNames[] = { "c", "c", "c1", "c2", "c2" };
818
+ final String expectedColumnNames[] = { "c", "c_2", "c1", "c2", "c2_2" };
819
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
820
+ }
821
+
822
+ @Test
823
+ public void checkUniqueNumberSuffixRule2()
824
+ {
825
+ final String originalColumnNames[] = { "c", "c", "c_1", "c_3", "c" };
826
+ final String expectedColumnNames[] = { "c", "c_2", "c_1", "c_3", "c_4" };
827
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
828
+ }
829
+
830
+ @Test
831
+ public void checkUniqueNumberSuffixRule3()
832
+ {
833
+ final String originalColumnNames[] = {
834
+ "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c_1", "c_1" };
835
+ final String expectedColumnNames[] = {
836
+ "c", "c_2", "c_3", "c_4", "c_5", "c_6", "c_7", "c_8", "c_9", "c_10", "c_1", "c_1_2" };
837
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
838
+ }
839
+
840
+ @Test
841
+ public void checkUniqueNumberSuffixRule4DifferentDelimiter()
842
+ {
843
+ final String originalColumnNames[] = { "c", "c", "c1", "c2", "c2" };
844
+ final String expectedColumnNames[] = { "c", "c-2", "c1", "c2", "c2-2" };
845
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, "-");
846
+ }
847
+
848
+ @Test
849
+ public void checkUniqueNumberSuffixRule5Digits()
850
+ {
851
+ final String originalColumnNames[] = { "c", "c", "c1", "c2", "c2" };
852
+ final String expectedColumnNames[] = { "c", "c_0002", "c1", "c2", "c2_0002" };
853
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, 4, -1);
854
+ }
855
+
856
+ @Test
857
+ public void checkUniqueNumberSuffixRule6MaxLength1()
858
+ {
859
+ final String originalColumnNames[] = { "column", "column", "column_1", "column_2", "column_2" };
860
+ final String expectedColumnNames[] = { "column", "column_3", "column_1", "column_2", "column_4" };
861
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
862
+ }
863
+
864
+ @Test
865
+ public void checkUniqueNumberSuffixRule7()
866
+ {
867
+ final String originalColumnNames[] = { "column", "column", "column_2", "column_3" };
868
+ final String expectedColumnNames[] = { "column", "column_4", "column_2", "column_3" };
869
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
870
+ }
871
+
872
+ @Test
873
+ public void checkUniqueNumberSuffixRule8MaxLength2()
874
+ {
875
+ final String originalColumnNames[] = {
876
+ "column", "colum", "column", "colum", "column", "colum", "column", "colum", "column",
877
+ "colum", "column", "colum", "column", "colum", "column", "colum", "column", "colum",
878
+ "column", "colum", "column", "colum" };
879
+ final String expectedColumnNames[] = {
880
+ "column", "colum", "column_2", "colum_2", "column_3", "colum_3", "column_4", "colum_4", "column_5",
881
+ "colum_5", "column_6", "colum_6", "column_7", "colum_7", "column_8", "colum_8", "column_9", "colum_9",
882
+ "colum_10", "colum_11", "colum_12", "colum_13" };
883
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
884
+ }
885
+
886
+ @Test
887
+ public void checkUniqueNumberSuffixRule9MaxLength3()
888
+ {
889
+ final String originalColumnNames[] = {
890
+ "column", "column", "column", "column", "column", "column", "column", "column", "column",
891
+ "colum", "colum", "colum", "colum", "colum", "colum", "colum", "colum",
892
+ "column", "colum", "column", "colum", "column" };
893
+ final String expectedColumnNames[] = {
894
+ "column", "column_2", "column_3", "column_4", "column_5", "column_6", "column_7", "column_8", "column_9",
895
+ "colum", "colum_2", "colum_3", "colum_4", "colum_5", "colum_6", "colum_7", "colum_8",
896
+ "colum_10", "colum_9", "colum_11", "colum_12", "colum_13" };
897
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
898
+ }
899
+
900
+ @Test
901
+ public void checkUniqueNumberSuffixRule10EsteemOriginalNames()
902
+ {
903
+ final String originalColumnNames[] = { "c", "c", "c_2" };
904
+ final String expectedColumnNames[] = { "c", "c_3", "c_2" };
905
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, -1);
906
+ }
907
+
908
+ @Test
909
+ public void checkUniqueNumberSuffixRuleNegativeLength()
910
+ {
911
+ final String originalColumnNames[] = { "column" };
912
+ exception.expect(ConfigException.class);
913
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
914
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
915
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, originalColumnNames, DEFAULT, -1, -2);
916
+ }
917
+
918
+ private void checkUniqueNumberSuffixRuleInternal(
919
+ final String originalColumnNames[],
920
+ final String expectedColumnNames[]) {
921
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames,
922
+ expectedColumnNames,
923
+ DEFAULT,
924
+ -1,
925
+ -1);
926
+ }
927
+
928
+ private void checkUniqueNumberSuffixRuleInternal(
929
+ final String originalColumnNames[],
930
+ final String expectedColumnNames[],
931
+ String delimiter) {
932
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames,
933
+ expectedColumnNames,
934
+ delimiter,
935
+ -1,
936
+ -1);
937
+ }
938
+
939
+ private void checkUniqueNumberSuffixRuleInternal(
940
+ final String originalColumnNames[],
941
+ final String expectedColumnNames[],
942
+ String delimiter,
943
+ int digits,
944
+ int max_length)
945
+ {
946
+ Schema.Builder originalSchemaBuilder = Schema.builder();
947
+ for (String originalColumnName : originalColumnNames) {
948
+ originalSchemaBuilder.add(originalColumnName, STRING);
949
+ }
950
+ final Schema ORIGINAL_SCHEMA = originalSchemaBuilder.build();
951
+
952
+ HashMap<String, Object> parameters = new HashMap<>();
953
+ parameters.put("rule", "unique_number_suffix");
954
+ if (!delimiter.equals(DEFAULT)) {
955
+ parameters.put("delimiter", delimiter);
956
+ }
957
+ if (digits >= 0) {
958
+ parameters.put("digits", digits);
959
+ }
960
+ if (max_length != -1) {
961
+ parameters.put("max_length", max_length);
962
+ }
963
+ ConfigSource pluginConfig = Exec.newConfigSource().set("rules",
964
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
965
+
966
+ filter.transaction(pluginConfig, ORIGINAL_SCHEMA, new FilterPlugin.Control() {
967
+ @Override
968
+ public void run(TaskSource task, Schema newSchema)
969
+ {
970
+ ArrayList<String> resolvedColumnNamesList = new ArrayList<>(newSchema.size());
971
+ for (Column resolvedColumn : newSchema.getColumns()) {
972
+ resolvedColumnNamesList.add(resolvedColumn.getName());
973
+ }
974
+ String[] resolvedColumnNames = Iterables.toArray(resolvedColumnNamesList, String.class);
975
+ assertEquals(expectedColumnNames, resolvedColumnNames);
976
+ for (int i = 0; i < expectedColumnNames.length; ++i) {
977
+ Column original = ORIGINAL_SCHEMA.getColumn(i);
978
+ Column resolved = newSchema.getColumn(i);
979
+ assertEquals(original.getType(), resolved.getType());
980
+ }
981
+ }
982
+ });
983
+ }
984
+
985
+ private Schema makeSchema(final String columnNames[])
986
+ {
987
+ Schema.Builder builder = new Schema.Builder();
988
+ for (String columnName : columnNames) {
989
+ builder.add(columnName, STRING);
990
+ }
991
+ return builder.build();
992
+ }
993
+
994
+ private void renameAndCheckSchema(ConfigSource config,
995
+ final String original[],
996
+ final String expected[])
997
+ {
998
+ final Schema originalSchema = makeSchema(original);
999
+ filter.transaction(config, originalSchema, new FilterPlugin.Control() {
1000
+ @Override
1001
+ public void run(TaskSource task, Schema renamedSchema)
1002
+ {
1003
+ assertEquals(originalSchema.getColumnCount(), renamedSchema.getColumnCount());
1004
+ assertEquals(expected.length, renamedSchema.getColumnCount());
1005
+ for (int i = 0; i < renamedSchema.getColumnCount(); ++i) {
1006
+ assertEquals(originalSchema.getColumnType(i), renamedSchema.getColumnType(i));
1007
+ assertEquals(expected[i], renamedSchema.getColumnName(i));
1008
+ }
1009
+ }
1010
+ });
1011
+ }
140
1012
  }