embulk 0.8.14 → 0.8.15

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.15
7
8
  release/release-0.8.14
8
9
  release/release-0.8.13
9
10
  release/release-0.8.12
@@ -0,0 +1,17 @@
1
+ Release 0.8.15
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Added several rules option to rename filter plugin. [#494]
8
+
9
+ * http://www.embulk.org/docs/built-in.html#rename-filter-plugin
10
+
11
+ * Introduced SkipTransactionException to stop Embulk before the transaction starts. [#498]
12
+
13
+ * Added embulk-test package, which is an utility library to be used from the test cases of Java-based input plugins. [#503]
14
+
15
+ Release Date
16
+ ------------------
17
+ 2016-11-04
@@ -7,13 +7,28 @@ import org.embulk.config.ConfigSource;
7
7
  import org.embulk.config.Task;
8
8
  import org.embulk.config.TaskSource;
9
9
  import org.embulk.spi.Column;
10
+ import org.embulk.spi.Exec;
10
11
  import org.embulk.spi.FilterPlugin;
11
12
  import org.embulk.spi.PageOutput;
12
13
  import org.embulk.spi.Schema;
13
14
 
15
+ import com.google.common.base.Optional;
16
+ import com.google.common.base.Strings;
17
+ import com.google.common.collect.ImmutableMap;
18
+ import com.google.common.collect.ImmutableSet;
19
+
20
+ import org.slf4j.Logger;
21
+
22
+ import java.util.ArrayList;
23
+ import java.util.HashMap;
24
+ import java.util.HashSet;
14
25
  import java.util.List;
26
+ import java.util.Locale;
15
27
  import java.util.Map;
28
+ import java.util.regex.PatternSyntaxException;
16
29
 
30
+ import javax.validation.constraints.Min;
31
+ import javax.validation.constraints.Size;
17
32
 
18
33
  public class RenameFilterPlugin
19
34
  implements FilterPlugin
@@ -71,7 +86,6 @@ public class RenameFilterPlugin
71
86
  return output;
72
87
  }
73
88
 
74
-
75
89
  // Extending Task is required to be deserialized with ConfigSource.loadConfig()
76
90
  // although this Rule is not really a Task.
77
91
  // TODO(dmikurube): Revisit this to consider how not to extend Task for this.
@@ -82,12 +96,371 @@ public class RenameFilterPlugin
82
96
  String getRule();
83
97
  }
84
98
 
99
+ private interface CharacterTypesRule
100
+ extends Rule {
101
+ @Config("pass_types")
102
+ @ConfigDefault("[]")
103
+ List<String> getPassTypes();
104
+
105
+ @Config("pass_characters")
106
+ @ConfigDefault("\"\"")
107
+ String getPassCharacters();
108
+
109
+ @Config("replace")
110
+ @ConfigDefault("\"_\"")
111
+ @Size(min = 1, max = 1)
112
+ String getReplace();
113
+ }
114
+
115
+ private interface FirstCharacterTypesRule
116
+ extends Rule {
117
+ @Config("replace")
118
+ @ConfigDefault("null")
119
+ Optional<String> getReplace();
120
+
121
+ @Config("pass_types")
122
+ @ConfigDefault("[]")
123
+ List<String> getPassTypes();
124
+
125
+ @Config("pass_characters")
126
+ @ConfigDefault("\"\"")
127
+ String getPassCharacters();
128
+
129
+ @Config("prefix")
130
+ @ConfigDefault("null")
131
+ Optional<String> getPrefix();
132
+ }
133
+
134
+ private interface TruncateRule
135
+ extends Rule {
136
+ @Config("max_length")
137
+ @ConfigDefault("128")
138
+ @Min(0)
139
+ int getMaxLength();
140
+ }
141
+
142
+ private interface RegexReplaceRule
143
+ extends Rule {
144
+ @Config("match")
145
+ String getMatch();
146
+
147
+ @Config("replace")
148
+ String getReplace();
149
+ }
150
+
151
+ private interface UniqueNumberSuffixRule
152
+ extends Rule {
153
+ @Config("delimiter")
154
+ @ConfigDefault("\"_\"")
155
+ String getDelimiter();
156
+
157
+ @Config("digits")
158
+ @ConfigDefault("null")
159
+ Optional<Integer> getDigits();
160
+
161
+ @Config("max_length")
162
+ @ConfigDefault("null")
163
+ Optional<Integer> getMaxLength();
164
+
165
+ @Config("offset")
166
+ @ConfigDefault("1")
167
+ @Min(0)
168
+ int getOffset();
169
+ }
170
+
85
171
  private Schema applyRule(ConfigSource ruleConfig, Schema inputSchema) throws ConfigException
86
172
  {
87
173
  Rule rule = ruleConfig.loadConfig(Rule.class);
88
174
  switch (rule.getRule()) {
175
+ case "character_types":
176
+ return applyCharacterTypesRule(inputSchema, ruleConfig.loadConfig(CharacterTypesRule.class));
177
+ case "first_character_types":
178
+ return applyFirstCharacterTypesRule(inputSchema, ruleConfig.loadConfig(FirstCharacterTypesRule.class));
179
+ case "lower_to_upper":
180
+ return applyLowerToUpperRule(inputSchema);
181
+ case "regex_replace":
182
+ return applyRegexReplaceRule(inputSchema, ruleConfig.loadConfig(RegexReplaceRule.class));
183
+ case "truncate":
184
+ return applyTruncateRule(inputSchema, ruleConfig.loadConfig(TruncateRule.class));
185
+ case "upper_to_lower":
186
+ return applyUpperToLowerRule(inputSchema);
187
+ case "unique_number_suffix":
188
+ return applyUniqueNumberSuffixRule(inputSchema, ruleConfig.loadConfig(UniqueNumberSuffixRule.class));
89
189
  default:
90
190
  throw new ConfigException("Renaming rule \"" +rule+ "\" is unknown");
91
191
  }
92
192
  }
193
+
194
+ private Schema applyCharacterTypesRule(Schema inputSchema, CharacterTypesRule rule) {
195
+ final List<String> passTypes = rule.getPassTypes();
196
+ final String passCharacters = rule.getPassCharacters();
197
+ final String replace = rule.getReplace();
198
+
199
+ if (replace.isEmpty()) {
200
+ throw new ConfigException("\"replace\" in \"character_types\" must not be explicitly empty");
201
+ }
202
+ if (replace.length() != 1) {
203
+ throw new ConfigException("\"replace\" in \"character_types\" must contain just 1 character");
204
+ }
205
+ // TODO(dmikurube): Revisit this for better escaping.
206
+ if (passCharacters.contains("\\E")) {
207
+ throw new ConfigException("\"pass_characters\" in \"character_types\" must not contain \"\\E\"");
208
+ }
209
+
210
+ StringBuilder regexBuilder = new StringBuilder();
211
+ regexBuilder.append("[^");
212
+ for (String target : passTypes) {
213
+ if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
214
+ regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
215
+ } else {
216
+ throw new ConfigException("\"" +target+ "\" is an unknown character type keyword");
217
+ }
218
+ }
219
+ if (!passCharacters.isEmpty()) {
220
+ regexBuilder.append("\\Q");
221
+ regexBuilder.append(passCharacters);
222
+ regexBuilder.append("\\E");
223
+ }
224
+ regexBuilder.append("]");
225
+
226
+ Schema.Builder schemaBuilder = Schema.builder();
227
+ for (Column column : inputSchema.getColumns()) {
228
+ schemaBuilder.add(column.getName().replaceAll(regexBuilder.toString(), replace), column.getType());
229
+ }
230
+ return schemaBuilder.build();
231
+ }
232
+
233
+ private Schema applyFirstCharacterTypesRule(Schema inputSchema, FirstCharacterTypesRule rule) {
234
+ final Optional<String> replace = rule.getReplace();
235
+ final List<String> passTypes = rule.getPassTypes();
236
+ final String passCharacters = rule.getPassCharacters();
237
+ final Optional<String> prefix = rule.getPrefix();
238
+
239
+ if (replace.isPresent() && replace.get().length() != 1) {
240
+ throw new ConfigException("\"replace\" in \"first_character_types\" must contain just 1 character if specified");
241
+ }
242
+ if (prefix.isPresent() && prefix.get().length() != 1) {
243
+ throw new ConfigException("\"prefix\" in \"first_character_types\" must contain just 1 character if specified");
244
+ }
245
+ if (prefix.isPresent() && replace.isPresent()) {
246
+ throw new ConfigException("\"replace\" and \"prefix\" in \"first_character_types\" must not be specified together");
247
+ }
248
+ if ((!prefix.isPresent()) && (!replace.isPresent())) {
249
+ throw new ConfigException("Either of \"replace\" or \"prefix\" must be specified in \"first_character_types\"");
250
+ }
251
+ // TODO(dmikurube): Revisit this for better escaping.
252
+ if (passCharacters.contains("\\E")) {
253
+ throw new ConfigException("\"pass_characters\" in \"first_character_types\" must not contain \"\\E\"");
254
+ }
255
+
256
+ StringBuilder regexBuilder = new StringBuilder();
257
+ regexBuilder.append("^[^");
258
+ for (String target : passTypes) {
259
+ if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
260
+ regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
261
+ } else {
262
+ throw new ConfigException("\"" +target+ "\" is an unknown character type keyword");
263
+ }
264
+ }
265
+ if (!passCharacters.isEmpty()) {
266
+ regexBuilder.append("\\Q");
267
+ regexBuilder.append(passCharacters);
268
+ regexBuilder.append("\\E");
269
+ }
270
+ regexBuilder.append("].*");
271
+
272
+ Schema.Builder schemaBuidler = Schema.builder();
273
+ for (Column column : inputSchema.getColumns()) {
274
+ String name = column.getName();
275
+ if (name.matches(regexBuilder.toString())) {
276
+ if (replace.isPresent()) {
277
+ name = replace.get() + name.substring(1);
278
+ }
279
+ else if (prefix.isPresent()) {
280
+ name = prefix.get() + name;
281
+ }
282
+ }
283
+ schemaBuidler.add(name, column.getType());
284
+ }
285
+ return schemaBuidler.build();
286
+ }
287
+
288
+ private Schema applyLowerToUpperRule(Schema inputSchema) {
289
+ Schema.Builder builder = Schema.builder();
290
+ for (Column column : inputSchema.getColumns()) {
291
+ builder.add(column.getName().toUpperCase(Locale.ENGLISH), column.getType());
292
+ }
293
+ return builder.build();
294
+ }
295
+
296
+ private Schema applyTruncateRule(Schema inputSchema, TruncateRule rule) {
297
+ Schema.Builder builder = Schema.builder();
298
+ for (Column column : inputSchema.getColumns()) {
299
+ if (column.getName().length() <= rule.getMaxLength()) {
300
+ builder.add(column.getName(), column.getType());
301
+ }
302
+ else {
303
+ try {
304
+ builder.add(column.getName().substring(0, rule.getMaxLength()), column.getType());
305
+ }
306
+ catch (IndexOutOfBoundsException ex) {
307
+ logger.error("FATAL unexpected error in \"truncate\" rule: substring failed.");
308
+ throw new AssertionError("FATAL unexpected error in \"truncate\" rule: substring failed.", ex);
309
+ }
310
+ }
311
+ }
312
+ return builder.build();
313
+ }
314
+
315
+ private Schema applyUpperToLowerRule(Schema inputSchema) {
316
+ Schema.Builder builder = Schema.builder();
317
+ for (Column column : inputSchema.getColumns()) {
318
+ builder.add(column.getName().toLowerCase(Locale.ENGLISH), column.getType());
319
+ }
320
+ return builder.build();
321
+ }
322
+
323
+ private Schema applyRegexReplaceRule(Schema inputSchema, RegexReplaceRule rule) {
324
+ final String match = rule.getMatch();
325
+ final String replace = rule.getReplace();
326
+
327
+ Schema.Builder builder = Schema.builder();
328
+ for (Column column : inputSchema.getColumns()) {
329
+ // TODO(dmikurube): Check if we need a kind of sanitization?
330
+ try {
331
+ builder.add(column.getName().replaceAll(match, replace), column.getType());
332
+ }
333
+ catch (PatternSyntaxException ex) {
334
+ throw new ConfigException(ex);
335
+ }
336
+ }
337
+ return builder.build();
338
+ }
339
+
340
+ /**
341
+ * Resolves conflicting column names by suffixing numbers.
342
+ *
343
+ * Conflicts are resolved by the following rules. The rules should not be changed casually because changing the
344
+ * rules breaks compatibility.
345
+ *
346
+ * 1. Count all duplicates in the original column names. Indexes are counted up per original column name.
347
+ * 2. Fix new column names from the left to the right
348
+ * - Try to append the current index for the original column name (with truncation if requested (not implemented))
349
+ * - Fix the new name if no duplication is found with fixed column names on the left and original column names
350
+ * - Retry with an index incremented if a duplication is found with fixed column names on the left
351
+ *
352
+ * Examples:
353
+ * [c, c1, c1, c2, c, c3]
354
+ * ==> [c, c1, c1_2, c2, c_2, c3]
355
+ *
356
+ * If a newly suffixed name newly conflicts with other columns, the index is just skipped. For example:
357
+ * [c, c, c_0, c_1, c_2]
358
+ * ==> [c, c_3, c_0, c_1, c_2]
359
+ *
360
+ * If truncation is requested simultaneously with uniqueness (not implemented), it should work like:
361
+ * [co, c, co , c , co , c , ..., co , c , co , c , co , c ]
362
+ * ==> [co, c, co_2, c_2, co_3, c_3, ..., co_9, c_9, c_10, c_11, c_12, c_13] (max_length:4)
363
+ *
364
+ * [co, co , co , ..., co , c, c , ..., c , co , c , co , c , co , c ]
365
+ * ==> [co, co_2, co_3, ..., co_9, c, c_2, ..., c_7, c_10, c_8, c_11, c_9, c_12, c_13] (max_length:4)
366
+ *
367
+ * Note that a delimiter should not be omitted. Recurring conflicts may confuse users.
368
+ * [c, c, c, ..., c, c, c, c, c1, c1, c1]
369
+ * NG: [c, c2, c3, ..., c10, c11, c12, c13, c1, c12, c13] (not unique!)
370
+ * ==> [c, c2, c3, ..., c10, c11, c12, c13, c1, c14, c15] (confusing)
371
+ */
372
+ private Schema applyUniqueNumberSuffixRule(Schema inputSchema, UniqueNumberSuffixRule rule) {
373
+ final String delimiter = rule.getDelimiter();
374
+ final Optional<Integer> digits = rule.getDigits();
375
+ final Optional<Integer> maxLength = rule.getMaxLength();
376
+ final int offset = rule.getOffset();
377
+
378
+ // |delimiter| must consist of just 1 character to check quickly that it does not contain any digit.
379
+ if (delimiter == null || delimiter.length() != 1 || Character.isDigit(delimiter.charAt(0))) {
380
+ throw new ConfigException("\"delimiter\" in rule \"unique_number_suffix\" must contain just 1 non-digit character");
381
+ }
382
+ if (maxLength.isPresent() && maxLength.get() < minimumMaxLengthInUniqueNumberSuffix) {
383
+ throw new ConfigException("\"max_length\" in rule \"unique_number_suffix\" must be larger than " +(minimumMaxLengthInUniqueNumberSuffix-1));
384
+ }
385
+ if (maxLength.isPresent() && digits.isPresent() && maxLength.get() < digits.get() + delimiter.length()) {
386
+ throw new ConfigException("\"max_length\" in rule \"unique_number_suffix\" must be larger than \"digits\"");
387
+ }
388
+ int digitsOfNumberOfColumns = Integer.toString(inputSchema.getColumnCount() + offset - 1).length();
389
+ if (maxLength.isPresent() && maxLength.get() <= digitsOfNumberOfColumns) {
390
+ throw new ConfigException("\"max_length\" in rule \"unique_number_suffix\" must be larger than digits of ((number of columns) + \"offset\" - 1)");
391
+ }
392
+ if (digits.isPresent() && digits.get() <= digitsOfNumberOfColumns) {
393
+ throw new ConfigException("\"digits\" in rule \"unique_number_suffix\" must be larger than digits of ((number of columns) + \"offset\" - 1)");
394
+ }
395
+
396
+ // Columns should not be truncated here initially. Uniqueness should be identified before truncated.
397
+
398
+ // Iterate for initial states.
399
+ HashSet<String> originalColumnNames = new HashSet<>();
400
+ HashMap<String, Integer> columnNameCountups = new HashMap<>();
401
+ for (Column column : inputSchema.getColumns()) {
402
+ originalColumnNames.add(column.getName());
403
+ columnNameCountups.put(column.getName(), offset);
404
+ }
405
+
406
+ Schema.Builder outputBuilder = Schema.builder();
407
+
408
+ HashSet<String> fixedColumnNames = new HashSet<>();
409
+ for (Column column : inputSchema.getColumns()) {
410
+ String truncatedName = column.getName();
411
+ if (column.getName().length() > maxLength.or(Integer.MAX_VALUE)) {
412
+ truncatedName = column.getName().substring(0, maxLength.get());
413
+ }
414
+
415
+ // Fix with the new name candidate if the new name does not conflict with the fixed names on the left.
416
+ // Conflicts with original names do not matter here.
417
+ if (!fixedColumnNames.contains(truncatedName)) {
418
+ // The original name is counted up.
419
+ columnNameCountups.put(column.getName(), columnNameCountups.get(column.getName()) + 1);
420
+ // The truncated name is fixed.
421
+ fixedColumnNames.add(truncatedName);
422
+ outputBuilder.add(truncatedName, column.getType());
423
+ continue;
424
+ }
425
+
426
+ int index = columnNameCountups.get(column.getName());
427
+ String concatenatedName;
428
+ do {
429
+ // This can be replaced with String#format(Locale.ENGLISH, ...), but Java's String#format does not
430
+ // have variable widths ("%*d" in C's printf). It cannot be very simple with String#format.
431
+ String differentiatorString = Integer.toString(index);
432
+ if (digits.isPresent() && (digits.get() > differentiatorString.length())) {
433
+ differentiatorString =
434
+ Strings.repeat("0", digits.get() - differentiatorString.length()) + differentiatorString;
435
+ }
436
+ differentiatorString = delimiter + differentiatorString;
437
+ concatenatedName = column.getName() + differentiatorString;
438
+ if (concatenatedName.length() > maxLength.or(Integer.MAX_VALUE)) {
439
+ concatenatedName =
440
+ column.getName().substring(0, maxLength.get() - differentiatorString.length())
441
+ + differentiatorString;
442
+ }
443
+ ++index;
444
+ // Conflicts with original names matter when creating new names with suffixes.
445
+ } while (fixedColumnNames.contains(concatenatedName) || originalColumnNames.contains(concatenatedName));
446
+ // The original name is counted up.
447
+ columnNameCountups.put(column.getName(), index);
448
+ // The concatenated&truncated name is fixed.
449
+ fixedColumnNames.add(concatenatedName);
450
+ outputBuilder.add(concatenatedName, column.getType());
451
+ }
452
+ return outputBuilder.build();
453
+ }
454
+
455
+ private static final ImmutableMap<String, String> CHARACTER_TYPE_KEYWORDS =
456
+ new ImmutableMap.Builder<String, String>().put("a-z", "a-z")
457
+ .put("A-Z", "A-Z")
458
+ .put("0-9", "0-9")
459
+ .build();
460
+
461
+ // TODO(dmikurube): Revisit the limitation.
462
+ // It should be practically acceptable to assume any output accepts column names with 8 characters at least...
463
+ private static final int minimumMaxLengthInUniqueNumberSuffix = 8;
464
+
465
+ private final Logger logger = Exec.getLogger(getClass());
93
466
  }
@@ -2,19 +2,28 @@ package org.embulk.standards;
2
2
 
3
3
  import com.google.common.collect.ImmutableList;
4
4
  import com.google.common.collect.ImmutableMap;
5
+ import com.google.common.collect.Iterables;
5
6
  import org.embulk.EmbulkTestRuntime;
6
7
  import org.embulk.config.ConfigException;
7
8
  import org.embulk.config.ConfigSource;
8
9
  import org.embulk.config.TaskSource;
10
+ import org.embulk.config.TaskValidationException;
9
11
  import org.embulk.spi.Column;
10
12
  import org.embulk.spi.FilterPlugin;
11
13
  import org.embulk.spi.Exec;
12
14
  import org.embulk.spi.Schema;
13
15
  import org.embulk.spi.SchemaConfigException;
14
16
  import org.embulk.standards.RenameFilterPlugin.PluginTask;
17
+
15
18
  import org.junit.Before;
16
19
  import org.junit.Rule;
17
20
  import org.junit.Test;
21
+ import org.junit.rules.ExpectedException;
22
+
23
+ import java.util.ArrayList;
24
+ import java.util.Arrays;
25
+ import java.util.HashMap;
26
+ import java.util.regex.PatternSyntaxException;
18
27
 
19
28
  import static org.embulk.spi.type.Types.STRING;
20
29
  import static org.embulk.spi.type.Types.TIMESTAMP;
@@ -27,11 +36,16 @@ public class TestRenameFilterPlugin
27
36
  @Rule
28
37
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
29
38
 
39
+ @Rule
40
+ public ExpectedException exception = ExpectedException.none();
41
+
30
42
  private final Schema SCHEMA = Schema.builder()
31
43
  .add("_c0", STRING)
32
44
  .add("_c1", TIMESTAMP)
33
45
  .build();
34
46
 
47
+ private final String DEFAULT = "__use_default__";
48
+
35
49
  private RenameFilterPlugin filter;
36
50
 
37
51
  @Before
@@ -137,4 +151,862 @@ public class TestRenameFilterPlugin
137
151
  assertTrue(t instanceof ConfigException);
138
152
  }
139
153
  }
154
+
155
+ @Test
156
+ public void checkRuleLowerToUpperRule()
157
+ {
158
+ final String original[] = { "_C0", "_C1", "_c2" };
159
+ final String expected[] = { "_C0", "_C1", "_C2" };
160
+ ConfigSource config = Exec.newConfigSource().set("rules",
161
+ ImmutableList.of(ImmutableMap.of("rule", "lower_to_upper")));
162
+ renameAndCheckSchema(config, original, expected);
163
+ }
164
+
165
+ @Test
166
+ public void checkTruncateRule()
167
+ {
168
+ final String original[] = { "foo", "bar", "gj", "foobar", "foobarbaz" };
169
+ final String expected[] = { "foo", "bar", "gj", "foo", "foo" };
170
+ ConfigSource config = Exec.newConfigSource().set("rules",
171
+ ImmutableList.of(ImmutableMap.of("rule", "truncate", "max_length", "3")));
172
+ renameAndCheckSchema(config, original, expected);
173
+ }
174
+
175
+ @Test
176
+ public void checkTruncateRuleDefault()
177
+ {
178
+ final String original[] = {
179
+ "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" };
180
+ final String expected[] = {
181
+ "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678" };
182
+ ConfigSource config = Exec.newConfigSource().set("rules",
183
+ ImmutableList.of(ImmutableMap.of("rule", "truncate")));
184
+ renameAndCheckSchema(config, original, expected);
185
+ }
186
+
187
+ @Test
188
+ public void checkTruncateRuleNegative()
189
+ {
190
+ final String original[] = { "foo" };
191
+ ConfigSource config = Exec.newConfigSource().set("rules",
192
+ ImmutableList.of(ImmutableMap.of("rule", "truncate", "max_length", -1)));
193
+ exception.expect(TaskValidationException.class);
194
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
195
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
196
+ renameAndCheckSchema(config, original, original);
197
+ }
198
+
199
+ @Test
200
+ public void checkRuleUpperToLowerRule()
201
+ {
202
+ final String original[] = { "_C0", "_C1", "_c2" };
203
+ final String expected[] = { "_c0", "_c1", "_c2" };
204
+ ConfigSource config = Exec.newConfigSource().set("rules",
205
+ ImmutableList.of(ImmutableMap.of("rule", "upper_to_lower")));
206
+ renameAndCheckSchema(config, original, expected);
207
+ }
208
+
209
+ @Test
210
+ public void checkCharacterTypesRulePassAlphabet()
211
+ {
212
+ final String original[] = { "Internal$Foo0123--Bar" };
213
+ final String expected[] = { "Internal_Foo______Bar" };
214
+ final String pass_types[] = { "a-z", "A-Z" };
215
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
216
+ }
217
+
218
+ @Test
219
+ public void checkCharacterTypesRulePassAlphanumeric()
220
+ {
221
+ final String original[] = { "Internal$Foo0123--Bar" };
222
+ final String expected[] = { "Internal_Foo0123__Bar" };
223
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
224
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
225
+ }
226
+
227
+ @Test
228
+ public void checkCharacterTypesRulePassLowercase()
229
+ {
230
+ final String original[] = { "Internal$Foo0123--Bar" };
231
+ final String expected[] = { "_nternal__oo_______ar" };
232
+ final String pass_types[] = { "a-z" };
233
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
234
+ }
235
+
236
+ @Test
237
+ public void checkCharacterTypesRulePassLowerwording()
238
+ {
239
+ final String original[] = { "Internal$Foo_0123--Bar" };
240
+ final String expected[] = { "-nternal--oo_0123---ar" };
241
+ final String pass_types[] = { "a-z", "0-9" };
242
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "_", "-");
243
+ }
244
+
245
+ @Test
246
+ public void checkCharacterTypesRulePassNumeric()
247
+ {
248
+ final String original[] = { "Internal$Foo_0123--Bar" };
249
+ final String expected[] = { "_____________0123_____" };
250
+ final String pass_types[] = { "0-9" };
251
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
252
+ }
253
+
254
+ @Test
255
+ public void checkCharacterTypesRulePassUppercase()
256
+ {
257
+ final String original[] = { "Internal$Foo_0123--Bar" };
258
+ final String expected[] = { "I________F_________B__" };
259
+ final String pass_types[] = { "A-Z" };
260
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
261
+ }
262
+
263
+ @Test
264
+ public void checkCharacterTypesRulePassUpperwording()
265
+ {
266
+ final String original[] = { "Internal$Foo_0123--Bar" };
267
+ final String expected[] = { "I--------F--_0123--B--" };
268
+ final String pass_types[] = { "A-Z", "0-9" };
269
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "_", "-");
270
+ }
271
+
272
+ @Test
273
+ public void checkCharacterTypesRulePassWording()
274
+ {
275
+ final String original[] = { "Internal$Foo_0123--Bar" };
276
+ final String expected[] = { "Internal-Foo_0123--Bar" };
277
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
278
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "_", "-");
279
+ }
280
+
281
+ @Test
282
+ public void checkCharacterTypesRulePassCombination()
283
+ {
284
+ final String original[] = { "@Foobar0123_$" };
285
+ final String expected[] = { "__oobar0123__" };
286
+ final String pass_types[] = { "0-9", "a-z" };
287
+ checkCharacterTypesRuleInternal(original, expected, pass_types, "");
288
+ }
289
+
290
+ @Test
291
+ public void checkCharacterTypesRuleLongReplace()
292
+ {
293
+ final String original[] = { "fooBAR" };
294
+ final String pass_types[] = { "a-z" };
295
+ exception.expect(TaskValidationException.class);
296
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
297
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
298
+ checkCharacterTypesRuleInternal(original, original, pass_types, "", "___");
299
+ }
300
+
301
+ @Test
302
+ public void checkCharacterTypesRuleEmptyReplace()
303
+ {
304
+ final String original[] = { "fooBAR" };
305
+ final String pass_types[] = { "a-z" };
306
+ exception.expect(TaskValidationException.class);
307
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
308
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
309
+ checkCharacterTypesRuleInternal(original, original, pass_types, "", "");
310
+ }
311
+
312
+ // TODO(dmikurube): Test a nil/null replace.
313
+ // - rule: character_types
314
+ // delimiter:
315
+
316
+ @Test
317
+ public void checkCharacterTypesRuleUnknownType()
318
+ {
319
+ final String original[] = { "fooBAR" };
320
+ final String pass_types[] = { "some_unknown_keyword" };
321
+ exception.expect(ConfigException.class);
322
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
323
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
324
+ checkCharacterTypesRuleInternal(original, original, pass_types, "");
325
+ }
326
+
327
+ @Test
328
+ public void checkCharacterTypesRuleForbiddenCharSequence()
329
+ {
330
+ final String original[] = { "fooBAR" };
331
+ final String pass_types[] = {};
332
+ exception.expect(ConfigException.class);
333
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
334
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
335
+ checkCharacterTypesRuleInternal(original, original, pass_types, "\\E");
336
+ }
337
+
338
+ private void checkCharacterTypesRuleInternal(
339
+ final String original[],
340
+ final String expected[],
341
+ final String pass_types[],
342
+ final String pass_characters)
343
+ {
344
+ checkCharacterTypesRuleInternal(original, expected, pass_types, pass_characters, DEFAULT);
345
+ }
346
+
347
+ private void checkCharacterTypesRuleInternal(
348
+ final String original[],
349
+ final String expected[],
350
+ final String pass_types[],
351
+ final String pass_characters,
352
+ final String replace)
353
+ {
354
+ HashMap<String, Object> parameters = new HashMap<>();
355
+ parameters.put("rule", "character_types");
356
+ if (pass_types != null) {
357
+ parameters.put("pass_types", ImmutableList.copyOf(pass_types));
358
+ }
359
+ if (!pass_characters.equals(DEFAULT)) {
360
+ parameters.put("pass_characters", pass_characters);
361
+ }
362
+ if (!replace.equals(DEFAULT)) {
363
+ parameters.put("replace", replace);
364
+ }
365
+ ConfigSource config = Exec.newConfigSource().set("rules",
366
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
367
+ renameAndCheckSchema(config, original, expected);
368
+ }
369
+
370
+ @Test
371
+ public void checkRegexReplaceRule1()
372
+ {
373
+ final String original[] = { "foobarbaz" };
374
+ final String expected[] = { "hogebarbaz" };
375
+ checkRegexReplaceRuleInternal(original, expected, "foo", "hoge");
376
+ }
377
+
378
+ @Test
379
+ public void checkRegexReplaceRule2()
380
+ {
381
+ final String original[] = { "200_dollars" };
382
+ final String expected[] = { "USD200" };
383
+ checkRegexReplaceRuleInternal(original, expected, "([0-9]+)_dollars", "USD$1");
384
+ }
385
+
386
+ private void checkRegexReplaceRuleInternal(
387
+ final String original[],
388
+ final String expected[],
389
+ final String match,
390
+ final String replace)
391
+ {
392
+ HashMap<String, Object> parameters = new HashMap<>();
393
+ parameters.put("rule", "regex_replace");
394
+ parameters.put("match", match);
395
+ parameters.put("replace", replace);
396
+ ConfigSource config = Exec.newConfigSource().set("rules",
397
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
398
+ renameAndCheckSchema(config, original, expected);
399
+ }
400
+
401
+ @Test
402
+ public void checkFirstCharacterTypesRuleReplaceSingleHyphen()
403
+ {
404
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
405
+ final String expected[] = { "_oo", "_12foo", "_bar", "_AZ", "_ban", "_jar", "_zip", "-zap" };
406
+ final String pass_types[] = {};
407
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types, "-");
408
+ }
409
+
410
+ @Test
411
+ public void checkFirstCharacterTypesRuleReplaceMultipleSingles()
412
+ {
413
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
414
+ final String expected[] = { "_oo", "_12foo", "@bar", "_AZ", "_ban", "_jar", "*zip", "-zap" };
415
+ final String pass_types[] = {};
416
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types, "-@*");
417
+ }
418
+
419
+ @Test
420
+ public void checkFirstCharacterTypesRuleReplaceAlphabet()
421
+ {
422
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
423
+ final String expected[] = { "foo", "_12foo", "_bar", "BAZ", "_ban", "_jar", "_zip", "_zap" };
424
+ final String pass_types[] = { "a-z", "A-Z" };
425
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
426
+ }
427
+
428
+ @Test
429
+ public void checkFirstCharacterTypesRuleReplaceAlphanumeric()
430
+ {
431
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
432
+ final String expected[] = { "foo", "012foo", "_bar", "BAZ", "_ban", "_jar", "_zip", "_zap" };
433
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
434
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
435
+ }
436
+
437
+ @Test
438
+ public void checkFirstCharacterTypesRuleReplaceLowercase()
439
+ {
440
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
441
+ final String expected[] = { "foo", "_12foo", "_bar", "_AZ", "_ban", "_jar", "_zip", "_zap" };
442
+ final String pass_types[] = { "a-z" };
443
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
444
+ }
445
+
446
+ @Test
447
+ public void checkFirstCharacterTypesRuleReplaceLowerwording()
448
+ {
449
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
450
+ final String expected[] = { "foo", "012foo", "-bar", "-AZ", "-ban", "_jar", "-zip", "-zap" };
451
+ final String pass_types[] = { "a-z", "0-9" };
452
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "-", pass_types, "_");
453
+ }
454
+
455
+ @Test
456
+ public void checkFirstCharacterTypesRuleReplaceNumeric()
457
+ {
458
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
459
+ final String expected[] = { "_oo", "012foo", "_bar", "_AZ", "_ban", "_jar", "_zip", "_zap" };
460
+ final String pass_types[] = { "0-9" };
461
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
462
+ }
463
+
464
+ @Test
465
+ public void checkFirstCharacterTypesRuleReplaceUppercase()
466
+ {
467
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
468
+ final String expected[] = { "_oo", "_12foo", "_bar", "BAZ", "_ban", "_jar", "_zip", "_zap" };
469
+ final String pass_types[] = { "A-Z" };
470
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "_", pass_types);
471
+ }
472
+
473
+ @Test
474
+ public void checkFirstCharacterTypesRuleReplaceUpperwording()
475
+ {
476
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
477
+ final String expected[] = { "-oo", "012foo", "-bar", "BAZ", "-ban", "_jar", "-zip", "-zap" };
478
+ final String pass_types[] = { "A-Z", "0-9" };
479
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "-", pass_types, "_");
480
+ }
481
+
482
+ @Test
483
+ public void checkFirstCharacterTypesRuleReplaceWording()
484
+ {
485
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
486
+ final String expected[] = { "foo", "012foo", "$bar", "BAZ", "$ban", "_jar", "$zip", "$zap" };
487
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
488
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, "$", pass_types, "_");
489
+ }
490
+
491
+ @Test
492
+ public void checkFirstCharacterTypesRuleReplaceUnknownFirst()
493
+ {
494
+ final String original[] = { "foo" };
495
+ final String pass_types[] = { "some_unknown_type" };
496
+ exception.expect(ConfigException.class);
497
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
498
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
499
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "_", pass_types);
500
+ }
501
+
502
+ @Test
503
+ public void checkFirstCharacterTypesRuleReplaceForbiddenCharSequence()
504
+ {
505
+ final String original[] = { "foo" };
506
+ final String pass_types[] = {};
507
+ exception.expect(ConfigException.class);
508
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
509
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
510
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "_", pass_types, "\\E");
511
+ }
512
+
513
+ @Test
514
+ public void checkFirstCharacterTypesRulePrefixSingleHyphen()
515
+ {
516
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
517
+ final String expected[] = { "_foo", "_012foo", "_@bar", "_BAZ", "_&ban", "__jar", "_*zip", "-zap" };
518
+ final String pass_types[] = {};
519
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types, "-");
520
+ }
521
+
522
+ @Test
523
+ public void checkFirstCharacterTypesRulePrefixMultipleSingles()
524
+ {
525
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
526
+ final String expected[] = { "_foo", "_012foo", "@bar", "_BAZ", "_&ban", "__jar", "*zip", "-zap" };
527
+ final String pass_types[] = {};
528
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types, "-@*");
529
+ }
530
+
531
+ @Test
532
+ public void checkFirstCharacterTypesRulePrefixAlphabet()
533
+ {
534
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
535
+ final String expected[] = { "foo", "_012foo", "_@bar", "BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
536
+ final String pass_types[] = { "a-z", "A-Z" };
537
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
538
+ }
539
+
540
+ @Test
541
+ public void checkFirstCharacterTypesRulePrefixAlphanumeric()
542
+ {
543
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
544
+ final String expected[] = { "foo", "012foo", "_@bar", "BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
545
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
546
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
547
+ }
548
+
549
+ @Test
550
+ public void checkFirstCharacterTypesRulePrefixLowercase()
551
+ {
552
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
553
+ final String expected[] = { "foo", "_012foo", "_@bar", "_BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
554
+ final String pass_types[] = { "a-z" };
555
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
556
+ }
557
+
558
+ @Test
559
+ public void checkFirstCharacterTypesRulePrefixLowerwording()
560
+ {
561
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
562
+ final String expected[] = { "foo", "012foo", "-@bar", "-BAZ", "-&ban", "_jar", "-*zip", "--zap" };
563
+ final String pass_types[] = { "a-z", "0-9" };
564
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "-", pass_types, "_");
565
+ }
566
+
567
+ @Test
568
+ public void checkFirstCharacterTypesRulePrefixNumeric()
569
+ {
570
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
571
+ final String expected[] = { "_foo", "012foo", "_@bar", "_BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
572
+ final String pass_types[] = { "0-9" };
573
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
574
+ }
575
+
576
+ @Test
577
+ public void checkFirstCharacterTypesRulePrefixUppercase()
578
+ {
579
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
580
+ final String expected[] = { "_foo", "_012foo", "_@bar", "BAZ", "_&ban", "__jar", "_*zip", "_-zap" };
581
+ final String pass_types[] = { "A-Z" };
582
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "_", pass_types);
583
+ }
584
+
585
+ @Test
586
+ public void checkFirstCharacterTypesRulePrefixUpperwording()
587
+ {
588
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
589
+ final String expected[] = { "-foo", "012foo", "-@bar", "BAZ", "-&ban", "_jar", "-*zip", "--zap" };
590
+ final String pass_types[] = { "A-Z", "0-9" };
591
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "-", pass_types, "_");
592
+ }
593
+
594
+ @Test
595
+ public void checkFirstCharacterTypesRulePrefixWording()
596
+ {
597
+ final String original[] = { "foo", "012foo", "@bar", "BAZ", "&ban", "_jar", "*zip", "-zap" };
598
+ final String expected[] = { "foo", "012foo", "$@bar", "BAZ", "$&ban", "_jar", "$*zip", "$-zap" };
599
+ final String pass_types[] = { "a-z", "A-Z", "0-9" };
600
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, "$", pass_types, "_");
601
+ }
602
+
603
+ @Test
604
+ public void checkFirstCharacterTypesRuleEmptyPrefix()
605
+ {
606
+ final String original[] = { "foo" };
607
+ final String pass_types[] = {};
608
+ exception.expect(ConfigException.class);
609
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
610
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
611
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "", pass_types);
612
+ }
613
+
614
+ @Test
615
+ public void checkFirstCharacterTypesRuleLongPrefix()
616
+ {
617
+ final String original[] = { "foo" };
618
+ final String pass_types[] = {};
619
+ exception.expect(ConfigException.class);
620
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
621
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
622
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "__", pass_types);
623
+ }
624
+
625
+ @Test
626
+ public void checkFirstCharacterTypesRuleEmptyReplace()
627
+ {
628
+ final String original[] = { "foo" };
629
+ final String pass_types[] = {};
630
+ exception.expect(ConfigException.class);
631
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
632
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
633
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "", pass_types);
634
+ }
635
+
636
+ @Test
637
+ public void checkFirstCharacterTypesRuleLongReplace()
638
+ {
639
+ final String original[] = { "foo" };
640
+ final String pass_types[] = {};
641
+ exception.expect(ConfigException.class);
642
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
643
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
644
+ checkFirstCharacterTypesRuleReplaceInternal(original, original, "__", pass_types);
645
+ }
646
+
647
+ @Test
648
+ public void checkFirstCharacterTypesRulePrefixUnknownFirst()
649
+ {
650
+ final String original[] = { "foo" };
651
+ final String pass_types[] = { "some_unknown_type" };
652
+ exception.expect(ConfigException.class);
653
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
654
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
655
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "_", pass_types);
656
+ }
657
+
658
+ @Test
659
+ public void checkFirstCharacterTypesRulePrefixForbiddenCharSequence()
660
+ {
661
+ final String original[] = { "foo" };
662
+ final String pass_types[] = {};
663
+ exception.expect(ConfigException.class);
664
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
665
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
666
+ checkFirstCharacterTypesRulePrefixInternal(original, original, "\\E", pass_types);
667
+ }
668
+
669
+ @Test
670
+ public void checkFirstCharacterTypesRuleBothReplacePrefix()
671
+ {
672
+ HashMap<String, Object> parameters = new HashMap<>();
673
+ parameters.put("rule", "first_character_types");
674
+ parameters.put("replace", "_");
675
+ parameters.put("prefix", "_");
676
+ ConfigSource config = Exec.newConfigSource().set("rules",
677
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
678
+ exception.expect(ConfigException.class);
679
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
680
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
681
+ renameAndCheckSchema(config, new String[0], new String[0]);
682
+ }
683
+
684
+ @Test
685
+ public void checkFirstCharacterTypesRuleNeitherReplacePrefix()
686
+ {
687
+ HashMap<String, Object> parameters = new HashMap<>();
688
+ parameters.put("rule", "first_character_types");
689
+ ConfigSource config = Exec.newConfigSource().set("rules",
690
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
691
+ exception.expect(ConfigException.class);
692
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
693
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
694
+ renameAndCheckSchema(config, new String[0], new String[0]);
695
+ }
696
+
697
+ private void checkFirstCharacterTypesRuleReplaceInternal(
698
+ final String original[],
699
+ final String expected[],
700
+ final String replace,
701
+ final String pass_types[]) {
702
+ checkFirstCharacterTypesRuleReplaceInternal(original, expected, replace, pass_types, DEFAULT);
703
+ }
704
+
705
+ private void checkFirstCharacterTypesRuleReplaceInternal(
706
+ final String original[],
707
+ final String expected[],
708
+ final String replace,
709
+ final String pass_types[],
710
+ final String pass_characters)
711
+ {
712
+ HashMap<String, Object> parameters = new HashMap<>();
713
+ parameters.put("rule", "first_character_types");
714
+ if (pass_types.length > 0) {
715
+ parameters.put("pass_types", Arrays.asList(pass_types));
716
+ }
717
+ if (!pass_characters.equals(DEFAULT)) {
718
+ parameters.put("pass_characters", pass_characters);
719
+ }
720
+ if (!replace.equals(DEFAULT)) {
721
+ parameters.put("replace", replace);
722
+ }
723
+ ConfigSource config = Exec.newConfigSource().set("rules",
724
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
725
+ renameAndCheckSchema(config, original, expected);
726
+ }
727
+
728
+ private void checkFirstCharacterTypesRulePrefixInternal(
729
+ final String original[],
730
+ final String expected[],
731
+ final String prefix,
732
+ final String pass_types[]) {
733
+ checkFirstCharacterTypesRulePrefixInternal(original, expected, prefix, pass_types, DEFAULT);
734
+ }
735
+
736
+ private void checkFirstCharacterTypesRulePrefixInternal(
737
+ final String original[],
738
+ final String expected[],
739
+ final String prefix,
740
+ final String pass_types[],
741
+ final String pass_characters)
742
+ {
743
+ HashMap<String, Object> parameters = new HashMap<>();
744
+ parameters.put("rule", "first_character_types");
745
+ if (pass_types.length > 0) {
746
+ parameters.put("pass_types", Arrays.asList(pass_types));
747
+ }
748
+ if (!pass_characters.equals(DEFAULT)) {
749
+ parameters.put("pass_characters", pass_characters);
750
+ }
751
+ if (!prefix.equals(DEFAULT)) {
752
+ parameters.put("prefix", prefix);
753
+ }
754
+ ConfigSource config = Exec.newConfigSource().set("rules",
755
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
756
+ renameAndCheckSchema(config, original, expected);
757
+ }
758
+
759
+ public void checkUniqueNumberSuffixRuleEmptyDelimiter()
760
+ {
761
+ final String columnNames[] = { "c" };
762
+ try {
763
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, "");
764
+ } catch (Throwable t) {
765
+ assertTrue(t instanceof ConfigException);
766
+ }
767
+ }
768
+
769
+ @Test
770
+ public void checkUniqueNumberSuffixRuleLongDelimiter()
771
+ {
772
+ final String columnNames[] = { "c" };
773
+ try {
774
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, "__");
775
+ } catch (Throwable t) {
776
+ assertTrue(t instanceof ConfigException);
777
+ }
778
+ }
779
+
780
+ @Test
781
+ public void checkUniqueNumberSuffixRuleDigitDelimiter()
782
+ {
783
+ final String columnNames[] = { "c" };
784
+ try {
785
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, "2");
786
+ } catch (Throwable t) {
787
+ assertTrue(t instanceof ConfigException);
788
+ }
789
+ }
790
+
791
+ @Test
792
+ public void checkUniqueNumberSuffixRuleShortMaxLength()
793
+ {
794
+ final String columnNames[] = { "c" };
795
+ try {
796
+ checkUniqueNumberSuffixRuleInternal(columnNames, columnNames, DEFAULT, -1, 7);
797
+ } catch (Throwable t) {
798
+ assertTrue(t instanceof ConfigException);
799
+ }
800
+ }
801
+
802
+ // TODO(dmikurube): Test a nil/null delimiter in "unique".
803
+ // - rule: unique
804
+ // delimiter:
805
+
806
+ @Test
807
+ public void checkUniqueNumberSuffixRule0()
808
+ {
809
+ final String originalColumnNames[] = { "a", "b", "c", "d", "e" };
810
+ final String expectedColumnNames[] = { "a", "b", "c", "d", "e" };
811
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
812
+ }
813
+
814
+ @Test
815
+ public void checkUniqueNumberSuffixRule1()
816
+ {
817
+ final String originalColumnNames[] = { "c", "c", "c1", "c2", "c2" };
818
+ final String expectedColumnNames[] = { "c", "c_2", "c1", "c2", "c2_2" };
819
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
820
+ }
821
+
822
+ @Test
823
+ public void checkUniqueNumberSuffixRule2()
824
+ {
825
+ final String originalColumnNames[] = { "c", "c", "c_1", "c_3", "c" };
826
+ final String expectedColumnNames[] = { "c", "c_2", "c_1", "c_3", "c_4" };
827
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
828
+ }
829
+
830
+ @Test
831
+ public void checkUniqueNumberSuffixRule3()
832
+ {
833
+ final String originalColumnNames[] = {
834
+ "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c_1", "c_1" };
835
+ final String expectedColumnNames[] = {
836
+ "c", "c_2", "c_3", "c_4", "c_5", "c_6", "c_7", "c_8", "c_9", "c_10", "c_1", "c_1_2" };
837
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames);
838
+ }
839
+
840
+ @Test
841
+ public void checkUniqueNumberSuffixRule4DifferentDelimiter()
842
+ {
843
+ final String originalColumnNames[] = { "c", "c", "c1", "c2", "c2" };
844
+ final String expectedColumnNames[] = { "c", "c-2", "c1", "c2", "c2-2" };
845
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, "-");
846
+ }
847
+
848
+ @Test
849
+ public void checkUniqueNumberSuffixRule5Digits()
850
+ {
851
+ final String originalColumnNames[] = { "c", "c", "c1", "c2", "c2" };
852
+ final String expectedColumnNames[] = { "c", "c_0002", "c1", "c2", "c2_0002" };
853
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, 4, -1);
854
+ }
855
+
856
+ @Test
857
+ public void checkUniqueNumberSuffixRule6MaxLength1()
858
+ {
859
+ final String originalColumnNames[] = { "column", "column", "column_1", "column_2", "column_2" };
860
+ final String expectedColumnNames[] = { "column", "column_3", "column_1", "column_2", "column_4" };
861
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
862
+ }
863
+
864
+ @Test
865
+ public void checkUniqueNumberSuffixRule7()
866
+ {
867
+ final String originalColumnNames[] = { "column", "column", "column_2", "column_3" };
868
+ final String expectedColumnNames[] = { "column", "column_4", "column_2", "column_3" };
869
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
870
+ }
871
+
872
+ @Test
873
+ public void checkUniqueNumberSuffixRule8MaxLength2()
874
+ {
875
+ final String originalColumnNames[] = {
876
+ "column", "colum", "column", "colum", "column", "colum", "column", "colum", "column",
877
+ "colum", "column", "colum", "column", "colum", "column", "colum", "column", "colum",
878
+ "column", "colum", "column", "colum" };
879
+ final String expectedColumnNames[] = {
880
+ "column", "colum", "column_2", "colum_2", "column_3", "colum_3", "column_4", "colum_4", "column_5",
881
+ "colum_5", "column_6", "colum_6", "column_7", "colum_7", "column_8", "colum_8", "column_9", "colum_9",
882
+ "colum_10", "colum_11", "colum_12", "colum_13" };
883
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
884
+ }
885
+
886
+ @Test
887
+ public void checkUniqueNumberSuffixRule9MaxLength3()
888
+ {
889
+ final String originalColumnNames[] = {
890
+ "column", "column", "column", "column", "column", "column", "column", "column", "column",
891
+ "colum", "colum", "colum", "colum", "colum", "colum", "colum", "colum",
892
+ "column", "colum", "column", "colum", "column" };
893
+ final String expectedColumnNames[] = {
894
+ "column", "column_2", "column_3", "column_4", "column_5", "column_6", "column_7", "column_8", "column_9",
895
+ "colum", "colum_2", "colum_3", "colum_4", "colum_5", "colum_6", "colum_7", "colum_8",
896
+ "colum_10", "colum_9", "colum_11", "colum_12", "colum_13" };
897
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, 8);
898
+ }
899
+
900
+ @Test
901
+ public void checkUniqueNumberSuffixRule10EsteemOriginalNames()
902
+ {
903
+ final String originalColumnNames[] = { "c", "c", "c_2" };
904
+ final String expectedColumnNames[] = { "c", "c_3", "c_2" };
905
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, expectedColumnNames, DEFAULT, -1, -1);
906
+ }
907
+
908
+ @Test
909
+ public void checkUniqueNumberSuffixRuleNegativeLength()
910
+ {
911
+ final String originalColumnNames[] = { "column" };
912
+ exception.expect(ConfigException.class);
913
+ // TODO(dmikurube): Except "Caused by": exception.expectCause(instanceOf(JsonMappingException.class));
914
+ // Needs to import org.hamcrest.Matchers... in addition to org.junit...
915
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames, originalColumnNames, DEFAULT, -1, -2);
916
+ }
917
+
918
+ private void checkUniqueNumberSuffixRuleInternal(
919
+ final String originalColumnNames[],
920
+ final String expectedColumnNames[]) {
921
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames,
922
+ expectedColumnNames,
923
+ DEFAULT,
924
+ -1,
925
+ -1);
926
+ }
927
+
928
+ private void checkUniqueNumberSuffixRuleInternal(
929
+ final String originalColumnNames[],
930
+ final String expectedColumnNames[],
931
+ String delimiter) {
932
+ checkUniqueNumberSuffixRuleInternal(originalColumnNames,
933
+ expectedColumnNames,
934
+ delimiter,
935
+ -1,
936
+ -1);
937
+ }
938
+
939
+ private void checkUniqueNumberSuffixRuleInternal(
940
+ final String originalColumnNames[],
941
+ final String expectedColumnNames[],
942
+ String delimiter,
943
+ int digits,
944
+ int max_length)
945
+ {
946
+ Schema.Builder originalSchemaBuilder = Schema.builder();
947
+ for (String originalColumnName : originalColumnNames) {
948
+ originalSchemaBuilder.add(originalColumnName, STRING);
949
+ }
950
+ final Schema ORIGINAL_SCHEMA = originalSchemaBuilder.build();
951
+
952
+ HashMap<String, Object> parameters = new HashMap<>();
953
+ parameters.put("rule", "unique_number_suffix");
954
+ if (!delimiter.equals(DEFAULT)) {
955
+ parameters.put("delimiter", delimiter);
956
+ }
957
+ if (digits >= 0) {
958
+ parameters.put("digits", digits);
959
+ }
960
+ if (max_length != -1) {
961
+ parameters.put("max_length", max_length);
962
+ }
963
+ ConfigSource pluginConfig = Exec.newConfigSource().set("rules",
964
+ ImmutableList.of(ImmutableMap.copyOf(parameters)));
965
+
966
+ filter.transaction(pluginConfig, ORIGINAL_SCHEMA, new FilterPlugin.Control() {
967
+ @Override
968
+ public void run(TaskSource task, Schema newSchema)
969
+ {
970
+ ArrayList<String> resolvedColumnNamesList = new ArrayList<>(newSchema.size());
971
+ for (Column resolvedColumn : newSchema.getColumns()) {
972
+ resolvedColumnNamesList.add(resolvedColumn.getName());
973
+ }
974
+ String[] resolvedColumnNames = Iterables.toArray(resolvedColumnNamesList, String.class);
975
+ assertEquals(expectedColumnNames, resolvedColumnNames);
976
+ for (int i = 0; i < expectedColumnNames.length; ++i) {
977
+ Column original = ORIGINAL_SCHEMA.getColumn(i);
978
+ Column resolved = newSchema.getColumn(i);
979
+ assertEquals(original.getType(), resolved.getType());
980
+ }
981
+ }
982
+ });
983
+ }
984
+
985
+ private Schema makeSchema(final String columnNames[])
986
+ {
987
+ Schema.Builder builder = new Schema.Builder();
988
+ for (String columnName : columnNames) {
989
+ builder.add(columnName, STRING);
990
+ }
991
+ return builder.build();
992
+ }
993
+
994
+ private void renameAndCheckSchema(ConfigSource config,
995
+ final String original[],
996
+ final String expected[])
997
+ {
998
+ final Schema originalSchema = makeSchema(original);
999
+ filter.transaction(config, originalSchema, new FilterPlugin.Control() {
1000
+ @Override
1001
+ public void run(TaskSource task, Schema renamedSchema)
1002
+ {
1003
+ assertEquals(originalSchema.getColumnCount(), renamedSchema.getColumnCount());
1004
+ assertEquals(expected.length, renamedSchema.getColumnCount());
1005
+ for (int i = 0; i < renamedSchema.getColumnCount(); ++i) {
1006
+ assertEquals(originalSchema.getColumnType(i), renamedSchema.getColumnType(i));
1007
+ assertEquals(expected[i], renamedSchema.getColumnName(i));
1008
+ }
1009
+ }
1010
+ });
1011
+ }
140
1012
  }