embulk 0.6.10 → 0.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f9bbe21037e6186e386b39176e66b8336ce8d20
4
- data.tar.gz: 29fef3403d44ff6dc571d3e3c050efc23f77b770
3
+ metadata.gz: d45b70ffc7ebef537906a3f471d74f83d40f18ab
4
+ data.tar.gz: 792c390284e3b949d84b4b9860941620ba6d3f2a
5
5
  SHA512:
6
- metadata.gz: 1481af06ce9c9aa2497ee25ee2c7b4f3be2578acf33e2be01ba06a0727cdc20523af16a4e7bae64658e286b2060e118e6eedd97cec085186843063f5246a6001
7
- data.tar.gz: a3ab3e1c0e0f8477127dd30c5484210ae09845410ba0a747be942868061a002ad35e27e4deed4b63c6d49bca6477103728bb35378d12ae8363e40b24d8bea94b
6
+ metadata.gz: c297663fe08c2c1a83d68942facc2c00801fceab5acdf90a12dcef36b37eac272e10964ddf4f2dd6d869adab6cdeca9bc8925d763f51a6b27feac5c43b78ea65
7
+ data.tar.gz: ea1a1b4a7c3df3a6f61cb52f2487050d1f2b2a8abf1bf2616b9ee97b2f1d493e2a3b6135f1e32e3d5a0edf40d2a135615be309294e2bc7bdfd6a15c1808a8238
data/build.gradle CHANGED
@@ -11,7 +11,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
11
 
12
12
  allprojects {
13
13
  group = 'org.embulk'
14
- version = '0.6.10'
14
+ version = '0.6.11'
15
15
 
16
16
  ext {
17
17
  jrubyVersion = '1.7.19'
@@ -2,9 +2,103 @@
2
2
  : <<BAT
3
3
  @echo off
4
4
 
5
- java -jar %~f0 %*
5
+ setlocal
6
+
7
+ set this=%~f0
8
+ set java_args=
9
+ set jruby_args=
10
+ set default_optimize=
11
+ set overwrite_optimize=
12
+ set status=
13
+ set error=
14
+ set args=
15
+
16
+ rem In jar file, cannot goto ahread for some reason.
17
+
18
+ for %%a in ( %* ) do (
19
+ call :check_arg %%a
20
+ )
21
+
22
+ if "%error%" == "true" exit /b 1
23
+
24
+ set optimize=false
25
+ if "%overwrite_optimize%" == "true" (
26
+ set optimize=true
27
+ ) else (
28
+ if "%default_optimize%" == "true" (
29
+ if not "%overwrite_optimize%" == "false" (
30
+ set optimize=true
31
+ )
32
+ )
33
+ )
34
+
35
+ if "%optimize%" == "true" (
36
+ set java_args=-XX:+AggressiveOpts -XX:+UseConcMarkSweepGC %java_args%
37
+ ) else (
38
+ set java_args=-XX:+AggressiveOpts -XX:+TieredCompilation -XX:TieredStopAtLevel=1 -Xverify:none %java_args%
39
+ )
40
+
41
+ java %java_args% -jar %this% %jruby_args% %args%
42
+
43
+ endlocal
44
+
45
+ exit /b
46
+
47
+ :check_arg
48
+ set arg=%*
49
+
50
+ rem Remove double quotations
51
+ set p1=%arg:~0,1%
52
+ set p1=%p1:"=%
53
+ set p2=%arg:~-1,1%
54
+ set p2=%p2:"=%
55
+ set arg=%p1%%arg:~1,-1%%p2%
56
+
57
+ if "%status%" == "rest" (
58
+ set args=%args% %arg%
59
+
60
+ ) else if "%status%" == "read" (
61
+ call :read_file %arg%
62
+
63
+ ) else if "%arg%" == "-J+O" (
64
+ set overwrite_optimize=true
65
+ set status=rest
66
+
67
+ ) else if "%arg%" == "-J-O" (
68
+ set overwrite_optimize=false
69
+ set status=rest
70
+
71
+ ) else if "%arg:~0,2%" == "-J" (
72
+ if not "%arg:~2%" == "" (
73
+ set java_args=%java_args% %arg:~2%
74
+ ) else (
75
+ set status=read
76
+ )
77
+
78
+ ) else if "%arg:~0,2%" == "-R" (
79
+ set jruby_args=%jruby_args% %arg:~2%
80
+
81
+ ) else if "%arg%" == "run" (
82
+ set default_optimize=true
83
+ set args=%args% %arg%
84
+ set status=rest
85
+
86
+ ) else (
87
+ set args=%args% %arg%
88
+ set status=rest
89
+ )
90
+ exit /b
91
+
92
+ :read_file
93
+ if not exist "%~1" (
94
+ echo "failed to load java argument file."
95
+ set error=true
96
+ ) else (
97
+ for /f "delims=" %%i in (%~1) do set java_args=%java_args% %%i
98
+ )
99
+ set status=
100
+ exit /b
6
101
 
7
- exit /B
8
102
  BAT
9
103
 
10
104
  java_args=""
@@ -0,0 +1,23 @@
1
+ package org.embulk.cli;
2
+
3
+ import java.io.BufferedWriter;
4
+ import java.io.File;
5
+ import java.io.FileOutputStream;
6
+ import java.io.OutputStreamWriter;
7
+ import java.nio.charset.Charset;
8
+ import java.util.Arrays;
9
+
10
+ public class DummyMain {
11
+
12
+ public static void main(String[] args) throws Exception {
13
+ System.out.println(Arrays.asList(args));
14
+ File thisFolder = new File(SelfrunTest.class.getResource("/org/embulk/cli/DummyMain.class").toURI()).getParentFile();
15
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(thisFolder, "args.txt")), Charset.defaultCharset()))) {
16
+ for (String arg : args) {
17
+ writer.write(arg);
18
+ writer.newLine();
19
+ }
20
+ }
21
+ }
22
+
23
+ }
@@ -0,0 +1,281 @@
1
+ package org.embulk.cli;
2
+
3
+ import static org.junit.Assert.assertEquals;
4
+
5
+ import java.io.BufferedReader;
6
+ import java.io.BufferedWriter;
7
+ import java.io.File;
8
+ import java.io.FileOutputStream;
9
+ import java.io.IOException;
10
+ import java.io.InputStreamReader;
11
+ import java.io.OutputStreamWriter;
12
+ import java.nio.charset.Charset;
13
+ import java.nio.file.FileSystem;
14
+ import java.nio.file.FileSystems;
15
+ import java.nio.file.Files;
16
+ import java.nio.file.StandardOpenOption;
17
+ import java.util.Arrays;
18
+ import java.util.List;
19
+
20
+ import org.junit.BeforeClass;
21
+ import org.junit.Test;
22
+
23
+
24
+ public class SelfrunTest {
25
+
26
+ private static File testSelfrun;
27
+
28
+ @BeforeClass
29
+ public static void prepare() throws Exception {
30
+ File selfrun = findSelfrun();
31
+ FileSystem fs = FileSystems.getDefault();
32
+ String line = new String(Files.readAllBytes(fs.getPath(selfrun.getAbsolutePath())), Charset.defaultCharset());
33
+
34
+ File thisFolder = new File(SelfrunTest.class.getResource("/org/embulk/cli/SelfrunTest.class").toURI()).getParentFile();
35
+ testSelfrun = new File(thisFolder, System.getProperty("file.separator").equals("\\") ? "selfrun.bat" : "selfrun.sh");
36
+
37
+ File classpath = thisFolder.getParentFile().getParentFile().getParentFile();
38
+ line = line.replaceAll("java ", "java -classpath " + classpath.getAbsolutePath().replaceAll("\\\\", "\\\\\\\\") + " org.embulk.cli.DummyMain ");
39
+
40
+ // Modify selfrun so that arguments are written in 'args.txt' .
41
+ Files.write(fs.getPath(testSelfrun.getAbsolutePath()), line.getBytes(Charset.defaultCharset()), StandardOpenOption.CREATE);
42
+ if (!testSelfrun.setExecutable(true)) {
43
+ throw new Exception("Cannot se executable.");
44
+ }
45
+ }
46
+
47
+
48
+ @Test
49
+ public void testNoArgument() throws Exception {
50
+ List<String> args = execute();
51
+ assertEquals(Arrays.asList(
52
+ "-XX:+AggressiveOpts",
53
+ "-XX:+TieredCompilation",
54
+ "-XX:TieredStopAtLevel=1",
55
+ "-Xverify:none",
56
+ "-jar",
57
+ testSelfrun.getAbsolutePath()),
58
+ args);
59
+ }
60
+
61
+ @Test
62
+ public void testArguments() throws Exception {
63
+ List<String> args = execute("a1", "a2", "\"a3=v3\"");
64
+ assertEquals(Arrays.asList(
65
+ "-XX:+AggressiveOpts",
66
+ "-XX:+TieredCompilation",
67
+ "-XX:TieredStopAtLevel=1",
68
+ "-Xverify:none",
69
+ "-jar",
70
+ testSelfrun.getAbsolutePath(),
71
+ "a1",
72
+ "a2",
73
+ "a3=v3"),
74
+ args);
75
+ }
76
+
77
+ @Test
78
+ public void testRun() throws Exception {
79
+ List<String> args = execute("run", "a1");
80
+ assertEquals(Arrays.asList(
81
+ "-XX:+AggressiveOpts",
82
+ "-XX:+UseConcMarkSweepGC",
83
+ "-jar",
84
+ testSelfrun.getAbsolutePath(),
85
+ "run",
86
+ "a1"),
87
+ args);
88
+ }
89
+
90
+ @Test
91
+ public void testJpO() throws Exception {
92
+ List<String> args = execute("-J+O", "a1", "a2");
93
+ assertEquals(Arrays.asList(
94
+ "-XX:+AggressiveOpts",
95
+ "-XX:+UseConcMarkSweepGC",
96
+ "-jar",
97
+ testSelfrun.getAbsolutePath(),
98
+ "a1",
99
+ "a2"),
100
+ args);
101
+ }
102
+
103
+ @Test
104
+ public void testJmO() throws Exception {
105
+ List<String> args = execute("-J-O", "a1", "a2");
106
+ assertEquals(Arrays.asList(
107
+ "-XX:+AggressiveOpts",
108
+ "-XX:+TieredCompilation",
109
+ "-XX:TieredStopAtLevel=1",
110
+ "-Xverify:none",
111
+ "-jar",
112
+ testSelfrun.getAbsolutePath(),
113
+ "a1",
114
+ "a2"),
115
+ args);
116
+ }
117
+
118
+ @Test
119
+ public void testR1() throws Exception {
120
+ List<String> args = execute("-Rr1", "a1", "a2");
121
+ assertEquals(Arrays.asList(
122
+ "-XX:+AggressiveOpts",
123
+ "-XX:+TieredCompilation",
124
+ "-XX:TieredStopAtLevel=1",
125
+ "-Xverify:none",
126
+ "-jar",
127
+ testSelfrun.getAbsolutePath(),
128
+ "r1",
129
+ "a1",
130
+ "a2"),
131
+ args);
132
+ }
133
+
134
+ @Test
135
+ public void testR2() throws Exception {
136
+ List<String> args = execute("\"-Rr1=v1\"", "\"-Rr2=v2\"", "a1", "a2");
137
+ assertEquals(Arrays.asList(
138
+ "-XX:+AggressiveOpts",
139
+ "-XX:+TieredCompilation",
140
+ "-XX:TieredStopAtLevel=1",
141
+ "-Xverify:none",
142
+ "-jar",
143
+ testSelfrun.getAbsolutePath(),
144
+ "r1=v1",
145
+ "r2=v2",
146
+ "a1",
147
+ "a2"),
148
+ args);
149
+ }
150
+
151
+ @Test
152
+ public void testRRun() throws Exception {
153
+ List<String> args = execute("-Rr1", "run", "a1");
154
+ assertEquals(Arrays.asList(
155
+ "-XX:+AggressiveOpts",
156
+ "-XX:+UseConcMarkSweepGC",
157
+ "-jar",
158
+ testSelfrun.getAbsolutePath(),
159
+ "r1",
160
+ "run",
161
+ "a1"),
162
+ args);
163
+ }
164
+
165
+ @Test
166
+ public void testJ1() throws Exception {
167
+ List<String> args = execute("-J-Dj1", "a1", "a2");
168
+ assertEquals(Arrays.asList(
169
+ "-XX:+AggressiveOpts",
170
+ "-XX:+TieredCompilation",
171
+ "-XX:TieredStopAtLevel=1",
172
+ "-Xverify:none",
173
+ "-Dj1",
174
+ "-jar",
175
+ testSelfrun.getAbsolutePath(),
176
+ "a1",
177
+ "a2"),
178
+ args);
179
+ }
180
+
181
+ @Test
182
+ public void testJ2() throws Exception {
183
+ List<String> args = execute("\"-J-Dj1=v1\"", "\"-J-Dj2=v2\"", "a1", "a2");
184
+ assertEquals(Arrays.asList(
185
+ "-XX:+AggressiveOpts",
186
+ "-XX:+TieredCompilation",
187
+ "-XX:TieredStopAtLevel=1",
188
+ "-Xverify:none",
189
+ "-Dj1=v1",
190
+ "-Dj2=v2",
191
+ "-jar",
192
+ testSelfrun.getAbsolutePath(),
193
+ "a1",
194
+ "a2"),
195
+ args);
196
+ }
197
+
198
+ @Test
199
+ public void testJR() throws Exception {
200
+ List<String> args = execute("-Jj1", "-Rr1", "a1", "a2");
201
+ assertEquals(Arrays.asList(
202
+ "-XX:+AggressiveOpts",
203
+ "-XX:+TieredCompilation",
204
+ "-XX:TieredStopAtLevel=1",
205
+ "-Xverify:none",
206
+ "j1",
207
+ "-jar",
208
+ testSelfrun.getAbsolutePath(),
209
+ "r1",
210
+ "a1",
211
+ "a2"),
212
+ args);
213
+ }
214
+
215
+ @Test
216
+ public void testJFile() throws Exception {
217
+ File javaArgsFile = new File(testSelfrun.getParentFile(), "java_args.txt");
218
+ FileSystem fs = FileSystems.getDefault();
219
+ Files.write(fs.getPath(javaArgsFile.getAbsolutePath()), "j1 j2 j3".getBytes(Charset.defaultCharset()), StandardOpenOption.CREATE);
220
+
221
+ List<String> args = execute("-J", javaArgsFile.getAbsolutePath(), "a1", "a2");
222
+ assertEquals(Arrays.asList(
223
+ "-XX:+AggressiveOpts",
224
+ "-XX:+TieredCompilation",
225
+ "-XX:TieredStopAtLevel=1",
226
+ "-Xverify:none",
227
+ "j1",
228
+ "j2",
229
+ "j3",
230
+ "-jar",
231
+ testSelfrun.getAbsolutePath(),
232
+ "a1",
233
+ "a2"),
234
+ args);
235
+ }
236
+
237
+ private List<String> execute(String... arguments) throws Exception {
238
+ File temp = new File(testSelfrun.getParentFile(), "call-" + testSelfrun.getName());
239
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(temp), Charset.defaultCharset()))) {
240
+ writer.write(testSelfrun.getAbsolutePath());
241
+ for (String argument : arguments) {
242
+ writer.write(" ");
243
+ writer.write(argument);
244
+ }
245
+ }
246
+ if (!temp.setExecutable(true)) {
247
+ throw new Exception("Cannot se executable.");
248
+ }
249
+
250
+ File argsFile = new File(testSelfrun.getParentFile(), "args.txt");
251
+ if (argsFile.exists()) {
252
+ if (!argsFile.delete()) {
253
+ throw new IOException("Cannot delete " + argsFile);
254
+ }
255
+ }
256
+
257
+ Process process = Runtime.getRuntime().exec(temp.getAbsolutePath());
258
+ int exitCode = process.waitFor();
259
+ if (exitCode != 0 || !argsFile.exists()) {
260
+ StringBuilder builder = new StringBuilder();
261
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream(), Charset.defaultCharset()))) {
262
+ builder.append(reader.readLine());
263
+ builder.append(System.getProperty("line.separator"));
264
+ }
265
+ throw new Exception(builder.toString());
266
+ }
267
+
268
+ FileSystem fs = FileSystems.getDefault();
269
+ List<String> args = Files.readAllLines(fs.getPath(argsFile.getAbsolutePath()), Charset.defaultCharset());
270
+ return args;
271
+ }
272
+
273
+ private static File findSelfrun() {
274
+ File folder = new File(".");
275
+ if (new File(folder, "embulk-cli").exists()) {
276
+ folder = new File(folder, "embulk-cli");
277
+ }
278
+ return new File(new File(new File(new File(folder, "src"), "main"), "sh"), "selfrun.sh");
279
+ }
280
+
281
+ }
@@ -143,6 +143,8 @@ Options
143
143
  +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
144
144
  | trim\_if\_not\_quoted | boolean | If true, remove spaces of a value if the value is not surrounded by the quote character | ``false`` by default |
145
145
  +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
146
+ | comment\_line\_marker | string | Skip a line if the line begins with this string | null by default |
147
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
146
148
  | allow\_optional\_columns | boolean | If true, set null to insufficient columns. Otherwise, skip the row in case of insufficient number of columns | ``false`` by default |
147
149
  +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
148
150
  | allow\_extra\_columns | boolean | If true, ignore too many columns. Otherwise, skip the row in case of too many columns | ``false`` by default |
@@ -204,6 +206,7 @@ Example
204
206
  escape: ''
205
207
  null_string: 'NULL'
206
208
  skip_header_lines: 1
209
+ comment_line_marker: '#'
207
210
  columns:
208
211
  - {name: id, type: long}
209
212
  - {name: account, type: long}
@@ -0,0 +1,19 @@
1
+ Release 0.6.11
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``input-file`` plugin sets ``last_path`` when there are no input files.
8
+ * ``parser-csv`` supports **comment_line_marker** option to skip lines starting with comment characters such as '#'.
9
+ * Fixed a bug where ``guess-csv`` guesses timestamp wrongly if order of date is month-day-year.
10
+
11
+ General Changes
12
+ ------------------
13
+
14
+ * Command line execution supports ``-J``, ``-R``, ``-J+O``, and ``-J-O`` arguments on Windows (@hito4++)
15
+
16
+
17
+ Release Date
18
+ ------------------
19
+ 2015-05-30
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.6.11
7
8
  release/release-0.6.10
8
9
  release/release-0.6.9
9
10
  release/release-0.6.8
@@ -75,6 +75,10 @@ public class CsvParserPlugin
75
75
  @ConfigDefault("131072") //128kB
76
76
  public long getMaxQuotedSizeLimit();
77
77
 
78
+ @Config("comment_line_marker")
79
+ @ConfigDefault("null")
80
+ public Optional<String> getCommentLineMarker();
81
+
78
82
  @Config("allow_optional_columns")
79
83
  @ConfigDefault("false")
80
84
  public boolean getAllowOptionalColumns();
@@ -27,6 +27,7 @@ public class CsvTokenizer
27
27
  private final String newline;
28
28
  private final boolean trimIfNotQuoted;
29
29
  private final long maxQuotedSizeLimit;
30
+ private final String commentLineMarker;
30
31
  private final LineDecoder input;
31
32
 
32
33
  private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
@@ -46,6 +47,7 @@ public class CsvTokenizer
46
47
  newline = task.getNewline().getString();
47
48
  trimIfNotQuoted = task.getTrimIfNotQuoted();
48
49
  maxQuotedSizeLimit = task.getMaxQuotedSizeLimit();
50
+ commentLineMarker = task.getCommentLineMarker().orNull();
49
51
  this.input = input;
50
52
  }
51
53
 
@@ -93,7 +95,7 @@ public class CsvTokenizer
93
95
  }
94
96
  }
95
97
 
96
- private boolean nextLine(boolean ignoreEmptyLine)
98
+ private boolean nextLine(boolean skipEmptyLine)
97
99
  {
98
100
  while (true) {
99
101
  if (!unreadLines.isEmpty()) {
@@ -107,7 +109,10 @@ public class CsvTokenizer
107
109
  linePos = 0;
108
110
  lineNumber++;
109
111
 
110
- if (!line.isEmpty() || !ignoreEmptyLine) {
112
+ boolean skip = skipEmptyLine && (
113
+ line.isEmpty() ||
114
+ (commentLineMarker != null && line.startsWith(commentLineMarker)));
115
+ if (!skip) {
111
116
  return true;
112
117
  }
113
118
  }
@@ -79,10 +79,22 @@ public class LocalFileInputPlugin
79
79
 
80
80
  control.run(taskSource, taskCount);
81
81
 
82
- List<String> files = new ArrayList<String>(task.getFiles());
83
- Collections.sort(files);
84
- return Exec.newConfigDiff().
85
- set("last_path", files.get(files.size() - 1));
82
+ // build next config
83
+ ConfigDiff configDiff = Exec.newConfigDiff();
84
+
85
+ // last_path
86
+ if (task.getFiles().isEmpty()) {
87
+ // keep the last value
88
+ if (task.getLastPath().isPresent()) {
89
+ configDiff.set("last_path", task.getLastPath().get());
90
+ }
91
+ } else {
92
+ List<String> files = new ArrayList<String>(task.getFiles());
93
+ Collections.sort(files);
94
+ configDiff.set("last_path", files.get(files.size() - 1));
95
+ }
96
+
97
+ return configDiff;
86
98
  }
87
99
 
88
100
  @Override
@@ -226,6 +226,20 @@ public class TestCsvTokenizer
226
226
  "\n\"a\\\"aa\",\"b,bb\\\"\"\n\n\"cc\"\"c\",\"\"\"ddd\"\n,\"\"\n")));
227
227
  }
228
228
 
229
+ @Test
230
+ public void testCommentLineMarker() throws Exception
231
+ {
232
+ config.set("comment_line_marker", JsonNodeFactory.instance.textNode("#"));
233
+ reloadPluginTask();
234
+ assertEquals(expectedRecords(2,
235
+ "aaa", "bbb",
236
+ "eee", "fff"),
237
+ parse(task,
238
+ "aaa,bbb",
239
+ "#ccc,ddd",
240
+ "eee,fff"));
241
+ }
242
+
229
243
  @Test
230
244
  public void trimNonQuotedValues() throws Exception
231
245
  {
@@ -93,7 +93,7 @@ out:
93
93
  (If guess supported) you don't have to write `<%= category %>:` section in the configuration file. After writing `in:` section, you can let embulk guess `<%= category %>:` section using this command:
94
94
 
95
95
  ```
96
- $ embulk install <%= project_name %>
96
+ $ embulk gem install <%= project_name %>
97
97
  $ embulk guess -g <%= name %> config.yml -o guessed.yml
98
98
  ```
99
99
  %end
@@ -24,6 +24,11 @@ module Embulk
24
24
  "\\N", # MySQL LOAD, Hive STORED AS TEXTFILE
25
25
  ]
26
26
 
27
+ COMMENT_LINE_MARKER_CANDIDATES = [
28
+ "#",
29
+ "//",
30
+ ]
31
+
27
32
  MAX_SKIP_LINES = 10
28
33
  NO_SKIP_DETECT_LINES = 10
29
34
 
@@ -56,9 +61,12 @@ module Embulk
56
61
  end
57
62
 
58
63
  sample_records = split_lines(parser_guessed, sample_lines, delim)
64
+
59
65
  skip_header_lines = guess_skip_header_lines(sample_records)
60
66
  sample_records = sample_records[skip_header_lines..-1]
61
67
 
68
+ comment_line_marker, sample_records = guess_comment_line_marker(sample_records)
69
+
62
70
  first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
63
71
  other_types = SchemaGuess.types_from_array_records(sample_records[1..-1])
64
72
 
@@ -75,6 +83,8 @@ module Embulk
75
83
  parser_guessed["skip_header_lines"] = skip_header_lines
76
84
  end
77
85
 
86
+ parser_guessed["comment_line_marker"] = comment_line_marker # always set comment_line_marker even if it's null
87
+
78
88
  parser_guessed["allow_extra_columns"] = false
79
89
  parser_guessed["allow_optional_columns"] = false
80
90
 
@@ -113,7 +123,10 @@ module Embulk
113
123
  columns = []
114
124
  while true
115
125
  begin
116
- columns << tokenizer.nextColumn
126
+ column = tokenizer.nextColumn
127
+ quoted = tokenizer.wasQuotedColumn
128
+ column.define_singleton_method(:quoted?) { quoted }
129
+ columns << column
117
130
  rescue org.embulk.standards.CsvTokenizer::TooFewColumnsException
118
131
  rows << columns
119
132
  break
@@ -200,21 +213,38 @@ module Embulk
200
213
  count = counts.inject(0) {|r,c| r + c }
201
214
  [str, count]
202
215
  end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
203
- found = guessed.first
204
- return found ? found[0] : nil
216
+ found_str, found_count = guessed.first
217
+ return found_str ? found_str : nil
205
218
  end
206
219
 
207
220
  def guess_skip_header_lines(sample_records)
208
221
  counts = sample_records.map {|records| records.size }
209
222
  (1..[MAX_SKIP_LINES, counts.length - 1].min).each do |i|
210
223
  check_row_count = counts[i-1]
211
- if counts[i, NO_SKIP_DETECT_LINES].all? {|c| c == check_row_count }
224
+ if counts[i, NO_SKIP_DETECT_LINES].all? {|c| c <= check_row_count }
212
225
  return i - 1
213
226
  end
214
227
  end
215
228
  return 0
216
229
  end
217
230
 
231
+ def guess_comment_line_marker(sample_records)
232
+ guessed = COMMENT_LINE_MARKER_CANDIDATES.map do |str|
233
+ regexp = /^#{Regexp.quote(str)}/
234
+ records = sample_records.reject do |records|
235
+ !records[0].quoted? && !NULL_STRING_CANDIDATES.include?(records[0]) && records[0] =~ regexp
236
+ end
237
+ count = sample_records.size - records.size
238
+ [str, count, records]
239
+ end.select {|str,count,records| count > 0 }.sort_by {|str,count,records| -count }
240
+ found_str, found_count, found_records = guessed.first
241
+ if found_str
242
+ return found_str, found_records
243
+ else
244
+ return nil, sample_records
245
+ end
246
+ end
247
+
218
248
  def array_sum(array)
219
249
  array.inject(0) {|r,i| r += i }
220
250
  end
@@ -196,12 +196,12 @@ module Embulk::Guess
196
196
 
197
197
  parts << :year
198
198
  part_options << nil
199
- delimiters << date_delim
200
199
 
200
+ delimiters << date_delim
201
201
  parts << :month
202
202
  part_options << part_heading_option(dm["month"])
203
- delimiters << date_delim
204
203
 
204
+ delimiters << date_delim
205
205
  parts << :day
206
206
  part_options << part_heading_option(dm["day"])
207
207
 
@@ -210,30 +210,28 @@ module Embulk::Guess
210
210
 
211
211
  parts << :month
212
212
  part_options << part_heading_option(dm["month"])
213
- delimiters << date_delim
214
213
 
214
+ delimiters << date_delim
215
215
  parts << :day
216
216
  part_options << part_heading_option(dm["day"])
217
- delimiters << date_delim
218
217
 
218
+ delimiters << date_delim
219
219
  parts << :year
220
220
  part_options << nil
221
- delimiters << date_delim
222
221
 
223
222
  elsif dm = (/^#{DMY}(?<rest>.*?)$/.match(text) or /^#{DMY_NODELIM}(?<rest>.*?)$/.match(text))
224
223
  date_delim = dm["date_delim"] rescue ""
225
224
 
226
225
  parts << :day
227
226
  part_options << part_heading_option(dm["day"])
228
- delimiters << date_delim
229
227
 
228
+ delimiters << date_delim
230
229
  parts << :month
231
230
  part_options << part_heading_option(dm["month"])
232
- delimiters << date_delim
233
231
 
232
+ delimiters << date_delim
234
233
  parts << :year
235
234
  part_options << nil
236
- delimiters << date_delim
237
235
 
238
236
  else
239
237
  date_delim = ""
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.6.10'
2
+ VERSION = '0.6.11'
3
3
  end
@@ -45,6 +45,13 @@ class TimeFormatGuessTest < ::Test::Unit::TestCase
45
45
  assert_guess "%m.%d.%Y", "01.01.2014"
46
46
  assert_guess "%d/%m/%Y", "13/01/2014"
47
47
  assert_guess "%d/%m/%Y", "21/01/2014"
48
+
49
+ assert_guess "%d/%m/%Y %H-%M-%S,%N", "21/01/2014 01-01-01,000000001"
50
+ assert_guess "%d/%m/%Y %H-%M-%S,%N", "21/01/2014 01-01-01,000001"
51
+ assert_guess "%d/%m/%Y %H-%M-%S,%L", "21/01/2014 01-01-01,001"
52
+ assert_guess "%d/%m/%Y %H-%M-%S", "21/01/2014 01-01-01"
53
+ assert_guess "%d/%m/%Y %H-%M", "21/01/2014 01-01"
54
+ assert_guess "%d/%m/%Y", "21/01/2014"
48
55
  end
49
56
 
50
57
  def test_format_borders
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.10
4
+ version: 0.6.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-22 00:00:00.000000000 Z
11
+ date: 2015-05-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,6 +100,8 @@ files:
100
100
  - embulk-cli/build.gradle
101
101
  - embulk-cli/src/main/java/org/embulk/cli/Main.java
102
102
  - embulk-cli/src/main/sh/selfrun.sh
103
+ - embulk-cli/src/test/java/org/embulk/cli/DummyMain.java
104
+ - embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java
103
105
  - embulk-core/build.gradle
104
106
  - embulk-core/src/main/java/org/embulk/EmbulkService.java
105
107
  - embulk-core/src/main/java/org/embulk/command/PreviewPrinter.java
@@ -294,6 +296,7 @@ files:
294
296
  - embulk-docs/src/release/release-0.6.0.rst
295
297
  - embulk-docs/src/release/release-0.6.1.rst
296
298
  - embulk-docs/src/release/release-0.6.10.rst
299
+ - embulk-docs/src/release/release-0.6.11.rst
297
300
  - embulk-docs/src/release/release-0.6.2.rst
298
301
  - embulk-docs/src/release/release-0.6.3.rst
299
302
  - embulk-docs/src/release/release-0.6.4.rst
@@ -409,8 +412,8 @@ files:
409
412
  - classpath/bval-jsr303-0.5.jar
410
413
  - classpath/commons-beanutils-core-1.8.3.jar
411
414
  - classpath/commons-lang3-3.1.jar
412
- - classpath/embulk-core-0.6.10.jar
413
- - classpath/embulk-standards-0.6.10.jar
415
+ - classpath/embulk-core-0.6.11.jar
416
+ - classpath/embulk-standards-0.6.11.jar
414
417
  - classpath/guava-18.0.jar
415
418
  - classpath/guice-4.0.jar
416
419
  - classpath/guice-multibindings-4.0.jar