embulk 0.8.15 → 0.8.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -1
  3. data/appveyor.yml +8 -0
  4. data/build.gradle +86 -45
  5. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
  6. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
  7. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
  8. data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
  9. data/embulk-docs/src/built-in.rst +34 -0
  10. data/embulk-docs/src/release.rst +1 -0
  11. data/embulk-docs/src/release/release-0.8.16.rst +43 -0
  12. data/embulk-standards/build.gradle +1 -0
  13. data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
  14. data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
  15. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
  16. data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
  17. data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
  18. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
  19. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
  20. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
  21. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
  22. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
  23. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
  24. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
  25. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
  26. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
  27. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
  28. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
  29. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
  30. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
  31. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
  32. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
  33. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
  34. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
  35. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
  36. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
  37. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
  38. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
  39. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
  40. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
  41. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
  42. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
  43. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
  44. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
  45. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
  46. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
  47. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
  48. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
  49. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
  50. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
  51. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
  52. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
  53. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
  54. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
  55. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
  56. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
  57. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
  58. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
  59. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
  60. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
  61. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
  62. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
  63. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
  64. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
  65. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
  66. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
  67. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
  68. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
  69. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
  70. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
  71. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
  72. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
  73. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
  74. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
  75. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
  76. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
  77. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
  78. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
  79. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
  80. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
  81. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
  82. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
  83. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
  84. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
  85. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
  86. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
  87. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
  88. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
  89. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
  90. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
  91. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
  92. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
  93. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
  94. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
  95. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
  96. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
  97. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
  98. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
  99. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
  100. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
  101. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
  102. data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
  103. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  104. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  105. data/gradlew +30 -21
  106. data/gradlew.bat +4 -10
  107. data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
  108. data/lib/embulk/data/new/java/build.gradle.erb +5 -3
  109. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  110. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  111. data/lib/embulk/data/new/java/gradlew +30 -21
  112. data/lib/embulk/data/new/java/gradlew.bat +4 -10
  113. data/lib/embulk/guess/csv.rb +44 -22
  114. data/lib/embulk/guess/newline.rb +10 -4
  115. data/lib/embulk/guess_plugin.rb +3 -1
  116. data/lib/embulk/java/time_helper.rb +2 -2
  117. data/lib/embulk/version.rb +1 -1
  118. metadata +92 -5
@@ -835,6 +835,40 @@ Columns: not recommended
835
835
  .. hint::
836
836
  ``columns`` are applied before ``rules`` if ``columns`` and ``rules`` are specified together. (It is discouraged to specify them together, though.)
837
837
 
838
+
839
+ Remove columns filter plugin
840
+ -----------------------------
841
+
842
+ The ``remove_columns`` filter plugin removes columns from schema.
843
+
844
+ Options
845
+ ~~~~~~~~
846
+
847
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
848
+ | name | type | description | required? |
849
+ +==========================+==========+============================================================+=======================+
850
+ | remove | array | An array of names of columns that it removes from schema. | ``[]`` by default |
851
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
852
+ | keep | array | An array of names of columns that it keeps in schema. | ``[]`` by default |
853
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
854
+ | accept_unmatched_columns | boolean | If true, skip columns that aren't included in schemas. | ``false`` by default |
855
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
856
+
857
+
858
+ remove: and keep: options are not multi-select.
859
+
860
+ Example
861
+ ~~~~~~~~
862
+
863
+ .. code-block:: yaml
864
+
865
+ # This configuration removes "_c0" and "_c1" named columns from schema.
866
+ filters:
867
+ ...
868
+ - type: remove_columns
869
+ remove: ["_c0", "_c1"]
870
+
871
+
838
872
  Local executor plugin
839
873
  ----------------------
840
874
 
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.16
7
8
  release/release-0.8.15
8
9
  release/release-0.8.14
9
10
  release/release-0.8.13
@@ -0,0 +1,43 @@
1
+ Release 0.8.16
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Added remove_columns filter plugin [#530]
8
+
9
+ * http://www.embulk.org/docs/built-in.html#remove_columns-filter-plugin
10
+
11
+ * Supported timestamp format "%Q". (@hiroyuki-sato) [#468, #531]
12
+
13
+ * Improved csv guess plugin:
14
+
15
+ * Added semicolon as delimiter suggest candidate in csv guess plugin. [#527]
16
+
17
+ * Enabled suggesting for a few rows [#533]
18
+
19
+ * Enabled suggesting for a single column [#540]
20
+
21
+ * Changed and removed limitation of minimum 40 bytes size limit of guessing. [#518]
22
+
23
+ * Refactored and introduced TestingEmbulk#{Input,Parser,Output}Builder to embulk-test. [#513, #514, #526]
24
+
25
+ * Fixed PageBuilder to avoid NullPointerException. [#535]
26
+
27
+ * Fixed ResumableInputStream to avoid NullPointerException. [#472]
28
+
29
+ * Fixed TaskValidationException to inherit ConfigException. [#520]
30
+
31
+ * Fixed build.gradle to use Task.doLast instead of Task.leftShift. [#536]
32
+
33
+ * Fixed build failure on AppVeyor by FileNotFoundException. [#537]
34
+
35
+ * Added updateJRuby task to make it easy to upgrade version of JRuby. [#538]
36
+
37
+ * Upgraded gradle v3.2.1. [#528]
38
+
39
+ * Release notes: https://docs.gradle.org/3.2.1/release-notes
40
+
41
+ Release Date
42
+ ------------------
43
+ 2017-01-27
@@ -3,4 +3,5 @@ dependencies {
3
3
  compile 'org.apache.commons:commons-compress:1.10'
4
4
 
5
5
  testCompile project(':embulk-core').sourceSets.test.output
6
+ testCompile project(':embulk-test')
6
7
  }
@@ -0,0 +1,268 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.collect.ImmutableList;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import com.google.inject.Inject;
7
+ import org.embulk.config.Config;
8
+ import org.embulk.config.ConfigDefault;
9
+ import org.embulk.config.ConfigException;
10
+ import org.embulk.config.ConfigSource;
11
+ import org.embulk.config.Task;
12
+ import org.embulk.config.TaskSource;
13
+ import org.embulk.spi.Column;
14
+ import org.embulk.spi.ColumnVisitor;
15
+ import org.embulk.spi.Exec;
16
+ import org.embulk.spi.FilterPlugin;
17
+ import org.embulk.spi.Page;
18
+ import org.embulk.spi.PageBuilder;
19
+ import org.embulk.spi.PageOutput;
20
+ import org.embulk.spi.PageReader;
21
+ import org.embulk.spi.Schema;
22
+ import org.embulk.spi.SchemaConfigException;
23
+ import org.slf4j.Logger;
24
+
25
+ import java.util.List;
26
+ import java.util.Map;
27
+ import java.util.HashMap;
28
+
29
+ import static java.util.Locale.ENGLISH;
30
+ import static org.embulk.spi.Exec.getBufferAllocator;
31
+
32
+ public class RemoveColumnsFilterPlugin
33
+ implements FilterPlugin
34
+ {
35
+ public interface PluginTask
36
+ extends Task
37
+ {
38
+ @Config("remove")
39
+ @ConfigDefault("null")
40
+ public Optional<List<String>> getRemove();
41
+
42
+ // TODO remove_pattern option
43
+
44
+ @Config("keep")
45
+ @ConfigDefault("null")
46
+ public Optional<List<String>> getKeep();
47
+
48
+ // TODO keep_pattern option
49
+
50
+ @Config("accept_unmatched_columns")
51
+ @ConfigDefault("false")
52
+ public boolean getAcceptUnmatchedColumns();
53
+
54
+ public void setIndexMapping(int[] mapping);
55
+ public int[] getIndexMapping();
56
+ }
57
+
58
+ private final Logger LOG;
59
+
60
+ @Inject
61
+ public RemoveColumnsFilterPlugin()
62
+ {
63
+ LOG = Exec.getLogger(getClass());
64
+ }
65
+
66
+ @Override
67
+ public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control)
68
+ {
69
+ PluginTask task = config.loadConfig(PluginTask.class);
70
+
71
+ // validate remove: and keep:
72
+ if (task.getRemove().isPresent() && task.getKeep().isPresent()) {
73
+ throw new ConfigException("remove: and keep: must not be multi-select");
74
+ }
75
+ if (!task.getRemove().isPresent() && !task.getKeep().isPresent()) {
76
+ throw new ConfigException("Must require remove: or keep:");
77
+ }
78
+
79
+ boolean acceptUnmatchedColumns = task.getAcceptUnmatchedColumns();
80
+
81
+ ImmutableList.Builder<Column> outputColumns = ImmutableList.builder();
82
+ int index = 0;
83
+ int[] indexMapping = new int[inputSchema.size()];
84
+ for (int i = 0; i < indexMapping.length; i++) {
85
+ indexMapping[i] = -1;
86
+ }
87
+ if (task.getRemove().isPresent()) { // specify remove:
88
+ List<String> removeColumns = getExistentColumns(inputSchema, task.getRemove().get(), acceptUnmatchedColumns);
89
+ for (Column column : inputSchema.getColumns()) {
90
+ if (!removeColumns.contains(column.getName())) {
91
+ outputColumns.add(new Column(index, column.getName(), column.getType()));
92
+ indexMapping[column.getIndex()] = index;
93
+ index++;
94
+ }
95
+ }
96
+ }
97
+ else { // specify keep:
98
+ List<String> keepColumns = getExistentColumns(inputSchema, task.getKeep().get(), acceptUnmatchedColumns);
99
+ for (Column column : inputSchema.getColumns()) {
100
+ if (keepColumns.contains(column.getName())) {
101
+ outputColumns.add(new Column(index, column.getName(), column.getType()));
102
+ indexMapping[column.getIndex()] = index;
103
+ index++;
104
+ }
105
+ }
106
+ }
107
+
108
+ task.setIndexMapping(indexMapping);
109
+ control.run(task.dump(), new Schema(outputColumns.build()));
110
+ }
111
+
112
+ private List<String> getExistentColumns(Schema schema, List<String> specifiedColumns, boolean acceptUnmatch)
113
+ {
114
+ ImmutableList.Builder<String> existentColumns = ImmutableList.builder();
115
+ for (String column : specifiedColumns) {
116
+ try {
117
+ schema.lookupColumn(column);
118
+ existentColumns.add(column);
119
+ }
120
+ catch (SchemaConfigException e) {
121
+ if (!acceptUnmatch) {
122
+ throw new ConfigException(String.format(ENGLISH, "Column '%s' doesn't exist in the schema", column));
123
+ }
124
+ }
125
+ }
126
+ return existentColumns.build();
127
+ }
128
+
129
+ @Override
130
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
131
+ Schema outputSchema, PageOutput output)
132
+ {
133
+ PluginTask task = taskSource.loadTask(PluginTask.class);
134
+ PageReader pageReader = new PageReader(inputSchema);
135
+ PageBuilder pageBuilder = new PageBuilder(getBufferAllocator(), outputSchema, output);
136
+ return new PageConverter(pageReader, pageBuilder, task.getIndexMapping());
137
+ }
138
+
139
+ static class PageConverter
140
+ implements PageOutput
141
+ {
142
+ private final PageReader pageReader;
143
+ private final PageBuilder pageBuilder;
144
+ private final int[] indexMapping;
145
+
146
+ PageConverter(PageReader pageReader, PageBuilder pageBuilder, int[] indexMapping)
147
+ {
148
+ this.pageReader = pageReader;
149
+ this.pageBuilder = pageBuilder;
150
+ this.indexMapping = indexMapping;
151
+ }
152
+
153
+ @Override
154
+ public void add(Page page)
155
+ {
156
+ pageReader.setPage(page);
157
+ while (pageReader.nextRecord()) {
158
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
159
+ @Override
160
+ public void booleanColumn(Column inputColumn)
161
+ {
162
+ int index = indexMapping[inputColumn.getIndex()];
163
+ if (index >= 0) {
164
+ if (pageReader.isNull(inputColumn)) {
165
+ pageBuilder.setNull(index);
166
+ }
167
+ else {
168
+ pageBuilder.setBoolean(index, pageReader.getBoolean(inputColumn));
169
+ }
170
+ }
171
+ }
172
+
173
+ @Override
174
+ public void longColumn(Column inputColumn)
175
+ {
176
+ int index = indexMapping[inputColumn.getIndex()];
177
+ if (index >= 0) {
178
+ if (pageReader.isNull(inputColumn)) {
179
+ pageBuilder.setNull(index);
180
+ }
181
+ else {
182
+ pageBuilder.setLong(index, pageReader.getLong(inputColumn));
183
+ }
184
+ }
185
+ }
186
+
187
+ @Override
188
+ public void doubleColumn(Column inputColumn)
189
+ {
190
+ int index = indexMapping[inputColumn.getIndex()];
191
+ if (index >= 0) {
192
+ if (pageReader.isNull(inputColumn)) {
193
+ pageBuilder.setNull(index);
194
+ }
195
+ else {
196
+ pageBuilder.setDouble(index, pageReader.getDouble(inputColumn));
197
+ }
198
+ }
199
+ }
200
+
201
+ @Override
202
+ public void stringColumn(Column inputColumn)
203
+ {
204
+ int index = indexMapping[inputColumn.getIndex()];
205
+ if (index >= 0) {
206
+ if (pageReader.isNull(inputColumn)) {
207
+ pageBuilder.setNull(index);
208
+ }
209
+ else {
210
+ pageBuilder.setString(index, pageReader.getString(inputColumn));
211
+ }
212
+ }
213
+ }
214
+
215
+ @Override
216
+ public void timestampColumn(Column inputColumn)
217
+ {
218
+ int index = indexMapping[inputColumn.getIndex()];
219
+ if (index >= 0) {
220
+ if (pageReader.isNull(inputColumn)) {
221
+ pageBuilder.setNull(index);
222
+ }
223
+ else {
224
+ pageBuilder.setTimestamp(index, pageReader.getTimestamp(inputColumn));
225
+ }
226
+ }
227
+ }
228
+
229
+ @Override
230
+ public void jsonColumn(Column inputColumn)
231
+ {
232
+ int index = indexMapping[inputColumn.getIndex()];
233
+ if (index >= 0) {
234
+ if (pageReader.isNull(inputColumn)) {
235
+ pageBuilder.setNull(index);
236
+ }
237
+ else {
238
+ pageBuilder.setJson(index, pageReader.getJson(inputColumn));
239
+ }
240
+ }
241
+ }
242
+ });
243
+ pageBuilder.addRecord();
244
+ }
245
+ }
246
+
247
+ private Map<String, Integer> newColumnIndex(Schema schema)
248
+ {
249
+ ImmutableMap.Builder<String, Integer> builder = ImmutableMap.builder();
250
+ for (Column column : schema.getColumns()) {
251
+ builder.put(column.getName(), column.getIndex());
252
+ }
253
+ return builder.build();
254
+ }
255
+
256
+ @Override
257
+ public void finish()
258
+ {
259
+ pageBuilder.finish();
260
+ }
261
+
262
+ @Override
263
+ public void close()
264
+ {
265
+ pageBuilder.close();
266
+ }
267
+ }
268
+ }
@@ -30,6 +30,19 @@ import java.util.regex.PatternSyntaxException;
30
30
  import javax.validation.constraints.Min;
31
31
  import javax.validation.constraints.Size;
32
32
 
33
+ /**
34
+ * |RenameFilterPlugin| renames column names.
35
+ *
36
+ * NOTE: This filter should bahave always in the same way for the same configuration.
37
+ * Changes in its behavior confuse users who are working with the same configuration.
38
+ *
39
+ * Even when a buggy behavior is found, fix it by:
40
+ * 1) Adding a new option, and
41
+ * 2) Implementing a new behavior in the new option.
42
+ *
43
+ * Keep the buggy behavior with the old configuration except for fatal failures so
44
+ * that users are not confused.
45
+ */
33
46
  public class RenameFilterPlugin
34
47
  implements FilterPlugin
35
48
  {
@@ -47,6 +47,7 @@ public class StandardPluginModule
47
47
 
48
48
  // filter plugins
49
49
  registerPluginTo(binder, FilterPlugin.class, "rename", RenameFilterPlugin.class);
50
+ registerPluginTo(binder, FilterPlugin.class, "remove_columns", RemoveColumnsFilterPlugin.class);
50
51
 
51
52
  // default guess plugins
52
53
  registerDefaultGuessPluginTo(binder, new PluginType("gzip"));
@@ -0,0 +1,121 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.exec.PartialExecutionException;
7
+ import org.embulk.test.TestingEmbulk;
8
+ import org.junit.Rule;
9
+ import org.junit.Test;
10
+
11
+ import java.io.IOException;
12
+ import java.nio.file.Path;
13
+
14
+ import static org.embulk.test.EmbulkTests.copyResource;
15
+ import static org.embulk.test.EmbulkTests.readResource;
16
+ import static org.embulk.test.EmbulkTests.readSortedFile;
17
+ import static org.hamcrest.Matchers.is;
18
+ import static org.junit.Assert.assertThat;
19
+ import static org.junit.Assert.assertTrue;
20
+ import static org.junit.Assert.fail;
21
+
22
+ public class TestRemoveColumnsFilterPlugin
23
+ {
24
+ private static final String RESOURCE_NAME_PREFIX = "org/embulk/standards/remove_columns/test/";
25
+
26
+ @Rule
27
+ public TestingEmbulk embulk = TestingEmbulk.builder().build();
28
+
29
+ @Test
30
+ public void useKeepOption()
31
+ throws Exception
32
+ {
33
+ assertRecordsByResource(embulk, "test_keep_in.yml", "test_keep_filter.yml",
34
+ "test_keep.csv", "test_keep_expected.csv");
35
+ }
36
+
37
+ @Test
38
+ public void useKeepWithAcceptUnmatched()
39
+ throws Exception
40
+ {
41
+ assertRecordsByResource(embulk, "test_keep_in.yml", "test_keep_with_unmatched_filter.yml",
42
+ "test_keep.csv", "test_keep_expected.csv");
43
+ }
44
+
45
+ @Test
46
+ public void useKeepWithoutAcceptUnmatched()
47
+ throws Exception
48
+ {
49
+ try {
50
+ assertRecordsByResource(embulk, "test_keep_in.yml", "test_keep_without_unmatched_filter.yml",
51
+ "test_keep.csv", "test_keep_expected.csv");
52
+ fail();
53
+ }
54
+ catch (PartialExecutionException ex) {
55
+ assertTrue(ex.getCause() instanceof ConfigException);
56
+ }
57
+ }
58
+
59
+ @Test
60
+ public void useKeepWithDuplicatedColumnNames()
61
+ throws Exception
62
+ {
63
+ assertRecordsByResource(embulk, "test_keep_with_duplicated_column_names_in.yml", "test_keep_with_duplicated_column_names.yml",
64
+ "test_keep_with_duplicated_column_names.csv", "test_keep_with_duplicated_column_names_expected.csv");
65
+ }
66
+
67
+ @Test
68
+ public void useRemove()
69
+ throws Exception
70
+ {
71
+ assertRecordsByResource(embulk, "test_remove_in.yml", "test_remove_filter.yml",
72
+ "test_remove.csv", "test_remove_expected.csv");
73
+ }
74
+
75
+ @Test
76
+ public void useRemoveWithAcceptUnmatched()
77
+ throws Exception
78
+ {
79
+ assertRecordsByResource(embulk, "test_remove_in.yml", "test_remove_with_unmatched_filter.yml",
80
+ "test_remove.csv", "test_remove_expected.csv");
81
+ }
82
+
83
+ @Test
84
+ public void useRemoveWithoutAcceptUnmatched()
85
+ throws Exception
86
+ {
87
+ try {
88
+ assertRecordsByResource(embulk, "test_remove_in.yml", "test_remove_without_unmatched_filter.yml",
89
+ "test_remove.csv", "test_remove_expected.csv");
90
+ fail();
91
+ }
92
+ catch (PartialExecutionException ex) {
93
+ assertTrue(ex.getCause() instanceof ConfigException);
94
+ }
95
+ }
96
+
97
+ static void assertRecordsByResource(TestingEmbulk embulk,
98
+ String inConfigYamlResourceName, String filterConfigYamlResourceName,
99
+ String sourceCsvResourceName, String resultCsvResourceName)
100
+ throws IOException
101
+ {
102
+ Path inputPath = embulk.createTempFile("csv");
103
+ Path outputPath = embulk.createTempFile("csv");
104
+
105
+ // in: config
106
+ copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
107
+ ConfigSource inConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
108
+ .set("path_prefix", inputPath.toAbsolutePath().toString());
109
+
110
+ // remove_columns filter config
111
+ ConfigSource filterConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
112
+
113
+ TestingEmbulk.RunResult result = embulk.inputBuilder()
114
+ .in(inConfig)
115
+ .filters(ImmutableList.of(filterConfig))
116
+ .outputPath(outputPath)
117
+ .run();
118
+
119
+ assertThat(readSortedFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
120
+ }
121
+ }