embulk 0.8.15-java → 0.8.16-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -1
  3. data/appveyor.yml +8 -0
  4. data/build.gradle +86 -45
  5. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -1
  6. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +43 -4
  7. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +15 -0
  8. data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +38 -1
  9. data/embulk-docs/src/built-in.rst +34 -0
  10. data/embulk-docs/src/release.rst +1 -0
  11. data/embulk-docs/src/release/release-0.8.16.rst +43 -0
  12. data/embulk-standards/build.gradle +1 -0
  13. data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +268 -0
  14. data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +13 -0
  15. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +1 -0
  16. data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +121 -0
  17. data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +8 -0
  18. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +38 -0
  19. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +229 -0
  20. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +1 -0
  21. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +2 -0
  22. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +12 -0
  23. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +1 -0
  24. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +12 -0
  25. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +1 -0
  26. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +1 -0
  27. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +2 -0
  28. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +16 -0
  29. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +1 -0
  30. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +2 -0
  31. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +16 -0
  32. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +1 -0
  33. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +16 -0
  34. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +1 -0
  35. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +1 -0
  36. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +16 -0
  37. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +1 -0
  38. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +1 -0
  39. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +2 -0
  40. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +12 -0
  41. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +1 -0
  42. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +12 -0
  43. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +1 -0
  44. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +2 -0
  45. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +12 -0
  46. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +1 -0
  47. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +2 -0
  48. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +3 -0
  49. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +16 -0
  50. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +1 -0
  51. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +16 -0
  52. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +1 -0
  53. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +2 -0
  54. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +12 -0
  55. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +1 -0
  56. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +5 -0
  57. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +17 -0
  58. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +1 -0
  59. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +4 -0
  60. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +12 -0
  61. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +1 -0
  62. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +5 -0
  63. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +12 -0
  64. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +1 -0
  65. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +5 -0
  66. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +17 -0
  67. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +1 -0
  68. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +5 -0
  69. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +17 -0
  70. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +1 -0
  71. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +5 -0
  72. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +17 -0
  73. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +1 -0
  74. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +4 -0
  75. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +12 -0
  76. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +1 -0
  77. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +5 -0
  78. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +12 -0
  79. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +1 -0
  80. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +4 -0
  81. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +16 -0
  82. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +1 -0
  83. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +5 -0
  84. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +17 -0
  85. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +1 -0
  86. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +5 -0
  87. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +4 -0
  88. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +2 -0
  89. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +18 -0
  90. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +5 -0
  91. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +2 -0
  92. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +4 -0
  93. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +17 -0
  94. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +3 -0
  95. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +2 -0
  96. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +5 -0
  97. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +4 -0
  98. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +2 -0
  99. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +18 -0
  100. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +3 -0
  101. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +2 -0
  102. data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +458 -28
  103. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  104. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  105. data/gradlew +30 -21
  106. data/gradlew.bat +4 -10
  107. data/lib/embulk/command/embulk_migrate_plugin.rb +2 -2
  108. data/lib/embulk/data/new/java/build.gradle.erb +5 -3
  109. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  110. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  111. data/lib/embulk/data/new/java/gradlew +30 -21
  112. data/lib/embulk/data/new/java/gradlew.bat +4 -10
  113. data/lib/embulk/guess/csv.rb +44 -22
  114. data/lib/embulk/guess/newline.rb +10 -4
  115. data/lib/embulk/guess_plugin.rb +3 -1
  116. data/lib/embulk/java/time_helper.rb +2 -2
  117. data/lib/embulk/version.rb +1 -1
  118. metadata +92 -5
@@ -835,6 +835,40 @@ Columns: not recommended
835
835
  .. hint::
836
836
  ``columns`` are applied before ``rules`` if ``columns`` and ``rules`` are specified together. (It is discouraged to specify them together, though.)
837
837
 
838
+
839
+ Remove columns filter plugin
840
+ -----------------------------
841
+
842
+ The ``remove_columns`` filter plugin removes columns from schema.
843
+
844
+ Options
845
+ ~~~~~~~~
846
+
847
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
848
+ | name | type | description | required? |
849
+ +==========================+==========+============================================================+=======================+
850
+ | remove | array | An array of names of columns that it removes from schema. | ``[]`` by default |
851
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
852
+ | keep | array | An array of names of columns that it keeps in schema. | ``[]`` by default |
853
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
854
+ | accept_unmatched_columns | boolean | If true, skip columns that aren't included in schemas. | ``false`` by default |
855
+ +--------------------------+----------+------------------------------------------------------------+-----------------------+
856
+
857
+
858
+ remove: and keep: options are not multi-select.
859
+
860
+ Example
861
+ ~~~~~~~~
862
+
863
+ .. code-block:: yaml
864
+
865
+ # This configuration removes "_c0" and "_c1" named columns from schema.
866
+ filters:
867
+ ...
868
+ - type: remove_columns
869
+ remove: ["_c0", "_c1"]
870
+
871
+
838
872
  Local executor plugin
839
873
  ----------------------
840
874
 
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.8.16
7
8
  release/release-0.8.15
8
9
  release/release-0.8.14
9
10
  release/release-0.8.13
@@ -0,0 +1,43 @@
1
+ Release 0.8.16
2
+ ==================================
3
+
4
+ General Changes
5
+ ------------------
6
+
7
+ * Added remove_columns filter plugin [#530]
8
+
9
+ * http://www.embulk.org/docs/built-in.html#remove_columns-filter-plugin
10
+
11
+ * Supported timestamp format "%Q". (@hiroyuki-sato) [#468, #531]
12
+
13
+ * Improved csv guess plugin:
14
+
15
+ * Added semicolon as delimiter suggest candidate in csv guess plugin. [#527]
16
+
17
+ * Enabled suggesting for a few rows [#533]
18
+
19
+ * Enabled suggesting for a single column [#540]
20
+
21
+ * Changed and removed limitation of minimum 40 bytes size limit of guessing. [#518]
22
+
23
+ * Refactored and introduced TestingEmbulk#{Input,Parser,Output}Builder to embulk-test. [#513, #514, #526]
24
+
25
+ * Fixed PageBuilder to avoid NullPointerException. [#535]
26
+
27
+ * Fixed ResumableInputStream to avoid NullPointerException. [#472]
28
+
29
+ * Fixed TaskValidationException to inherit ConfigException. [#520]
30
+
31
+ * Fixed build.gradle to use Task.doLast instead of Task.leftShift. [#536]
32
+
33
+ * Fixed build failure on AppVeyor by FileNotFoundException. [#537]
34
+
35
+ * Added updateJRuby task to make it easy to upgrade version of JRuby. [#538]
36
+
37
+ * Upgraded gradle v3.2.1. [#528]
38
+
39
+ * Release notes: https://docs.gradle.org/3.2.1/release-notes
40
+
41
+ Release Date
42
+ ------------------
43
+ 2017-01-27
@@ -3,4 +3,5 @@ dependencies {
3
3
  compile 'org.apache.commons:commons-compress:1.10'
4
4
 
5
5
  testCompile project(':embulk-core').sourceSets.test.output
6
+ testCompile project(':embulk-test')
6
7
  }
@@ -0,0 +1,268 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.collect.ImmutableList;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import com.google.inject.Inject;
7
+ import org.embulk.config.Config;
8
+ import org.embulk.config.ConfigDefault;
9
+ import org.embulk.config.ConfigException;
10
+ import org.embulk.config.ConfigSource;
11
+ import org.embulk.config.Task;
12
+ import org.embulk.config.TaskSource;
13
+ import org.embulk.spi.Column;
14
+ import org.embulk.spi.ColumnVisitor;
15
+ import org.embulk.spi.Exec;
16
+ import org.embulk.spi.FilterPlugin;
17
+ import org.embulk.spi.Page;
18
+ import org.embulk.spi.PageBuilder;
19
+ import org.embulk.spi.PageOutput;
20
+ import org.embulk.spi.PageReader;
21
+ import org.embulk.spi.Schema;
22
+ import org.embulk.spi.SchemaConfigException;
23
+ import org.slf4j.Logger;
24
+
25
+ import java.util.List;
26
+ import java.util.Map;
27
+ import java.util.HashMap;
28
+
29
+ import static java.util.Locale.ENGLISH;
30
+ import static org.embulk.spi.Exec.getBufferAllocator;
31
+
32
+ public class RemoveColumnsFilterPlugin
33
+ implements FilterPlugin
34
+ {
35
+ public interface PluginTask
36
+ extends Task
37
+ {
38
+ @Config("remove")
39
+ @ConfigDefault("null")
40
+ public Optional<List<String>> getRemove();
41
+
42
+ // TODO remove_pattern option
43
+
44
+ @Config("keep")
45
+ @ConfigDefault("null")
46
+ public Optional<List<String>> getKeep();
47
+
48
+ // TODO keep_pattern option
49
+
50
+ @Config("accept_unmatched_columns")
51
+ @ConfigDefault("false")
52
+ public boolean getAcceptUnmatchedColumns();
53
+
54
+ public void setIndexMapping(int[] mapping);
55
+ public int[] getIndexMapping();
56
+ }
57
+
58
+ private final Logger LOG;
59
+
60
+ @Inject
61
+ public RemoveColumnsFilterPlugin()
62
+ {
63
+ LOG = Exec.getLogger(getClass());
64
+ }
65
+
66
+ @Override
67
+ public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control)
68
+ {
69
+ PluginTask task = config.loadConfig(PluginTask.class);
70
+
71
+ // validate remove: and keep:
72
+ if (task.getRemove().isPresent() && task.getKeep().isPresent()) {
73
+ throw new ConfigException("remove: and keep: must not be multi-select");
74
+ }
75
+ if (!task.getRemove().isPresent() && !task.getKeep().isPresent()) {
76
+ throw new ConfigException("Must require remove: or keep:");
77
+ }
78
+
79
+ boolean acceptUnmatchedColumns = task.getAcceptUnmatchedColumns();
80
+
81
+ ImmutableList.Builder<Column> outputColumns = ImmutableList.builder();
82
+ int index = 0;
83
+ int[] indexMapping = new int[inputSchema.size()];
84
+ for (int i = 0; i < indexMapping.length; i++) {
85
+ indexMapping[i] = -1;
86
+ }
87
+ if (task.getRemove().isPresent()) { // specify remove:
88
+ List<String> removeColumns = getExistentColumns(inputSchema, task.getRemove().get(), acceptUnmatchedColumns);
89
+ for (Column column : inputSchema.getColumns()) {
90
+ if (!removeColumns.contains(column.getName())) {
91
+ outputColumns.add(new Column(index, column.getName(), column.getType()));
92
+ indexMapping[column.getIndex()] = index;
93
+ index++;
94
+ }
95
+ }
96
+ }
97
+ else { // specify keep:
98
+ List<String> keepColumns = getExistentColumns(inputSchema, task.getKeep().get(), acceptUnmatchedColumns);
99
+ for (Column column : inputSchema.getColumns()) {
100
+ if (keepColumns.contains(column.getName())) {
101
+ outputColumns.add(new Column(index, column.getName(), column.getType()));
102
+ indexMapping[column.getIndex()] = index;
103
+ index++;
104
+ }
105
+ }
106
+ }
107
+
108
+ task.setIndexMapping(indexMapping);
109
+ control.run(task.dump(), new Schema(outputColumns.build()));
110
+ }
111
+
112
+ private List<String> getExistentColumns(Schema schema, List<String> specifiedColumns, boolean acceptUnmatch)
113
+ {
114
+ ImmutableList.Builder<String> existentColumns = ImmutableList.builder();
115
+ for (String column : specifiedColumns) {
116
+ try {
117
+ schema.lookupColumn(column);
118
+ existentColumns.add(column);
119
+ }
120
+ catch (SchemaConfigException e) {
121
+ if (!acceptUnmatch) {
122
+ throw new ConfigException(String.format(ENGLISH, "Column '%s' doesn't exist in the schema", column));
123
+ }
124
+ }
125
+ }
126
+ return existentColumns.build();
127
+ }
128
+
129
+ @Override
130
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
131
+ Schema outputSchema, PageOutput output)
132
+ {
133
+ PluginTask task = taskSource.loadTask(PluginTask.class);
134
+ PageReader pageReader = new PageReader(inputSchema);
135
+ PageBuilder pageBuilder = new PageBuilder(getBufferAllocator(), outputSchema, output);
136
+ return new PageConverter(pageReader, pageBuilder, task.getIndexMapping());
137
+ }
138
+
139
+ static class PageConverter
140
+ implements PageOutput
141
+ {
142
+ private final PageReader pageReader;
143
+ private final PageBuilder pageBuilder;
144
+ private final int[] indexMapping;
145
+
146
+ PageConverter(PageReader pageReader, PageBuilder pageBuilder, int[] indexMapping)
147
+ {
148
+ this.pageReader = pageReader;
149
+ this.pageBuilder = pageBuilder;
150
+ this.indexMapping = indexMapping;
151
+ }
152
+
153
+ @Override
154
+ public void add(Page page)
155
+ {
156
+ pageReader.setPage(page);
157
+ while (pageReader.nextRecord()) {
158
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
159
+ @Override
160
+ public void booleanColumn(Column inputColumn)
161
+ {
162
+ int index = indexMapping[inputColumn.getIndex()];
163
+ if (index >= 0) {
164
+ if (pageReader.isNull(inputColumn)) {
165
+ pageBuilder.setNull(index);
166
+ }
167
+ else {
168
+ pageBuilder.setBoolean(index, pageReader.getBoolean(inputColumn));
169
+ }
170
+ }
171
+ }
172
+
173
+ @Override
174
+ public void longColumn(Column inputColumn)
175
+ {
176
+ int index = indexMapping[inputColumn.getIndex()];
177
+ if (index >= 0) {
178
+ if (pageReader.isNull(inputColumn)) {
179
+ pageBuilder.setNull(index);
180
+ }
181
+ else {
182
+ pageBuilder.setLong(index, pageReader.getLong(inputColumn));
183
+ }
184
+ }
185
+ }
186
+
187
+ @Override
188
+ public void doubleColumn(Column inputColumn)
189
+ {
190
+ int index = indexMapping[inputColumn.getIndex()];
191
+ if (index >= 0) {
192
+ if (pageReader.isNull(inputColumn)) {
193
+ pageBuilder.setNull(index);
194
+ }
195
+ else {
196
+ pageBuilder.setDouble(index, pageReader.getDouble(inputColumn));
197
+ }
198
+ }
199
+ }
200
+
201
+ @Override
202
+ public void stringColumn(Column inputColumn)
203
+ {
204
+ int index = indexMapping[inputColumn.getIndex()];
205
+ if (index >= 0) {
206
+ if (pageReader.isNull(inputColumn)) {
207
+ pageBuilder.setNull(index);
208
+ }
209
+ else {
210
+ pageBuilder.setString(index, pageReader.getString(inputColumn));
211
+ }
212
+ }
213
+ }
214
+
215
+ @Override
216
+ public void timestampColumn(Column inputColumn)
217
+ {
218
+ int index = indexMapping[inputColumn.getIndex()];
219
+ if (index >= 0) {
220
+ if (pageReader.isNull(inputColumn)) {
221
+ pageBuilder.setNull(index);
222
+ }
223
+ else {
224
+ pageBuilder.setTimestamp(index, pageReader.getTimestamp(inputColumn));
225
+ }
226
+ }
227
+ }
228
+
229
+ @Override
230
+ public void jsonColumn(Column inputColumn)
231
+ {
232
+ int index = indexMapping[inputColumn.getIndex()];
233
+ if (index >= 0) {
234
+ if (pageReader.isNull(inputColumn)) {
235
+ pageBuilder.setNull(index);
236
+ }
237
+ else {
238
+ pageBuilder.setJson(index, pageReader.getJson(inputColumn));
239
+ }
240
+ }
241
+ }
242
+ });
243
+ pageBuilder.addRecord();
244
+ }
245
+ }
246
+
247
+ private Map<String, Integer> newColumnIndex(Schema schema)
248
+ {
249
+ ImmutableMap.Builder<String, Integer> builder = ImmutableMap.builder();
250
+ for (Column column : schema.getColumns()) {
251
+ builder.put(column.getName(), column.getIndex());
252
+ }
253
+ return builder.build();
254
+ }
255
+
256
+ @Override
257
+ public void finish()
258
+ {
259
+ pageBuilder.finish();
260
+ }
261
+
262
+ @Override
263
+ public void close()
264
+ {
265
+ pageBuilder.close();
266
+ }
267
+ }
268
+ }
@@ -30,6 +30,19 @@ import java.util.regex.PatternSyntaxException;
30
30
  import javax.validation.constraints.Min;
31
31
  import javax.validation.constraints.Size;
32
32
 
33
+ /**
34
+ * |RenameFilterPlugin| renames column names.
35
+ *
36
+ * NOTE: This filter should bahave always in the same way for the same configuration.
37
+ * Changes in its behavior confuse users who are working with the same configuration.
38
+ *
39
+ * Even when a buggy behavior is found, fix it by:
40
+ * 1) Adding a new option, and
41
+ * 2) Implementing a new behavior in the new option.
42
+ *
43
+ * Keep the buggy behavior with the old configuration except for fatal failures so
44
+ * that users are not confused.
45
+ */
33
46
  public class RenameFilterPlugin
34
47
  implements FilterPlugin
35
48
  {
@@ -47,6 +47,7 @@ public class StandardPluginModule
47
47
 
48
48
  // filter plugins
49
49
  registerPluginTo(binder, FilterPlugin.class, "rename", RenameFilterPlugin.class);
50
+ registerPluginTo(binder, FilterPlugin.class, "remove_columns", RemoveColumnsFilterPlugin.class);
50
51
 
51
52
  // default guess plugins
52
53
  registerDefaultGuessPluginTo(binder, new PluginType("gzip"));
@@ -0,0 +1,121 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.exec.PartialExecutionException;
7
+ import org.embulk.test.TestingEmbulk;
8
+ import org.junit.Rule;
9
+ import org.junit.Test;
10
+
11
+ import java.io.IOException;
12
+ import java.nio.file.Path;
13
+
14
+ import static org.embulk.test.EmbulkTests.copyResource;
15
+ import static org.embulk.test.EmbulkTests.readResource;
16
+ import static org.embulk.test.EmbulkTests.readSortedFile;
17
+ import static org.hamcrest.Matchers.is;
18
+ import static org.junit.Assert.assertThat;
19
+ import static org.junit.Assert.assertTrue;
20
+ import static org.junit.Assert.fail;
21
+
22
+ public class TestRemoveColumnsFilterPlugin
23
+ {
24
+ private static final String RESOURCE_NAME_PREFIX = "org/embulk/standards/remove_columns/test/";
25
+
26
+ @Rule
27
+ public TestingEmbulk embulk = TestingEmbulk.builder().build();
28
+
29
+ @Test
30
+ public void useKeepOption()
31
+ throws Exception
32
+ {
33
+ assertRecordsByResource(embulk, "test_keep_in.yml", "test_keep_filter.yml",
34
+ "test_keep.csv", "test_keep_expected.csv");
35
+ }
36
+
37
+ @Test
38
+ public void useKeepWithAcceptUnmatched()
39
+ throws Exception
40
+ {
41
+ assertRecordsByResource(embulk, "test_keep_in.yml", "test_keep_with_unmatched_filter.yml",
42
+ "test_keep.csv", "test_keep_expected.csv");
43
+ }
44
+
45
+ @Test
46
+ public void useKeepWithoutAcceptUnmatched()
47
+ throws Exception
48
+ {
49
+ try {
50
+ assertRecordsByResource(embulk, "test_keep_in.yml", "test_keep_without_unmatched_filter.yml",
51
+ "test_keep.csv", "test_keep_expected.csv");
52
+ fail();
53
+ }
54
+ catch (PartialExecutionException ex) {
55
+ assertTrue(ex.getCause() instanceof ConfigException);
56
+ }
57
+ }
58
+
59
+ @Test
60
+ public void useKeepWithDuplicatedColumnNames()
61
+ throws Exception
62
+ {
63
+ assertRecordsByResource(embulk, "test_keep_with_duplicated_column_names_in.yml", "test_keep_with_duplicated_column_names.yml",
64
+ "test_keep_with_duplicated_column_names.csv", "test_keep_with_duplicated_column_names_expected.csv");
65
+ }
66
+
67
+ @Test
68
+ public void useRemove()
69
+ throws Exception
70
+ {
71
+ assertRecordsByResource(embulk, "test_remove_in.yml", "test_remove_filter.yml",
72
+ "test_remove.csv", "test_remove_expected.csv");
73
+ }
74
+
75
+ @Test
76
+ public void useRemoveWithAcceptUnmatched()
77
+ throws Exception
78
+ {
79
+ assertRecordsByResource(embulk, "test_remove_in.yml", "test_remove_with_unmatched_filter.yml",
80
+ "test_remove.csv", "test_remove_expected.csv");
81
+ }
82
+
83
+ @Test
84
+ public void useRemoveWithoutAcceptUnmatched()
85
+ throws Exception
86
+ {
87
+ try {
88
+ assertRecordsByResource(embulk, "test_remove_in.yml", "test_remove_without_unmatched_filter.yml",
89
+ "test_remove.csv", "test_remove_expected.csv");
90
+ fail();
91
+ }
92
+ catch (PartialExecutionException ex) {
93
+ assertTrue(ex.getCause() instanceof ConfigException);
94
+ }
95
+ }
96
+
97
+ static void assertRecordsByResource(TestingEmbulk embulk,
98
+ String inConfigYamlResourceName, String filterConfigYamlResourceName,
99
+ String sourceCsvResourceName, String resultCsvResourceName)
100
+ throws IOException
101
+ {
102
+ Path inputPath = embulk.createTempFile("csv");
103
+ Path outputPath = embulk.createTempFile("csv");
104
+
105
+ // in: config
106
+ copyResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName, inputPath);
107
+ ConfigSource inConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + inConfigYamlResourceName)
108
+ .set("path_prefix", inputPath.toAbsolutePath().toString());
109
+
110
+ // remove_columns filter config
111
+ ConfigSource filterConfig = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + filterConfigYamlResourceName);
112
+
113
+ TestingEmbulk.RunResult result = embulk.inputBuilder()
114
+ .in(inConfig)
115
+ .filters(ImmutableList.of(filterConfig))
116
+ .outputPath(outputPath)
117
+ .run();
118
+
119
+ assertThat(readSortedFile(outputPath), is(readResource(RESOURCE_NAME_PREFIX + resultCsvResourceName)));
120
+ }
121
+ }