embulk 0.6.10 → 0.6.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f9bbe21037e6186e386b39176e66b8336ce8d20
4
- data.tar.gz: 29fef3403d44ff6dc571d3e3c050efc23f77b770
3
+ metadata.gz: d45b70ffc7ebef537906a3f471d74f83d40f18ab
4
+ data.tar.gz: 792c390284e3b949d84b4b9860941620ba6d3f2a
5
5
  SHA512:
6
- metadata.gz: 1481af06ce9c9aa2497ee25ee2c7b4f3be2578acf33e2be01ba06a0727cdc20523af16a4e7bae64658e286b2060e118e6eedd97cec085186843063f5246a6001
7
- data.tar.gz: a3ab3e1c0e0f8477127dd30c5484210ae09845410ba0a747be942868061a002ad35e27e4deed4b63c6d49bca6477103728bb35378d12ae8363e40b24d8bea94b
6
+ metadata.gz: c297663fe08c2c1a83d68942facc2c00801fceab5acdf90a12dcef36b37eac272e10964ddf4f2dd6d869adab6cdeca9bc8925d763f51a6b27feac5c43b78ea65
7
+ data.tar.gz: ea1a1b4a7c3df3a6f61cb52f2487050d1f2b2a8abf1bf2616b9ee97b2f1d493e2a3b6135f1e32e3d5a0edf40d2a135615be309294e2bc7bdfd6a15c1808a8238
data/build.gradle CHANGED
@@ -11,7 +11,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
11
 
12
12
  allprojects {
13
13
  group = 'org.embulk'
14
- version = '0.6.10'
14
+ version = '0.6.11'
15
15
 
16
16
  ext {
17
17
  jrubyVersion = '1.7.19'
@@ -2,9 +2,103 @@
2
2
  : <<BAT
3
3
  @echo off
4
4
 
5
- java -jar %~f0 %*
5
+ setlocal
6
+
7
+ set this=%~f0
8
+ set java_args=
9
+ set jruby_args=
10
+ set default_optimize=
11
+ set overwrite_optimize=
12
+ set status=
13
+ set error=
14
+ set args=
15
+
16
+ rem In jar file, cannot goto ahread for some reason.
17
+
18
+ for %%a in ( %* ) do (
19
+ call :check_arg %%a
20
+ )
21
+
22
+ if "%error%" == "true" exit /b 1
23
+
24
+ set optimize=false
25
+ if "%overwrite_optimize%" == "true" (
26
+ set optimize=true
27
+ ) else (
28
+ if "%default_optimize%" == "true" (
29
+ if not "%overwrite_optimize%" == "false" (
30
+ set optimize=true
31
+ )
32
+ )
33
+ )
34
+
35
+ if "%optimize%" == "true" (
36
+ set java_args=-XX:+AggressiveOpts -XX:+UseConcMarkSweepGC %java_args%
37
+ ) else (
38
+ set java_args=-XX:+AggressiveOpts -XX:+TieredCompilation -XX:TieredStopAtLevel=1 -Xverify:none %java_args%
39
+ )
40
+
41
+ java %java_args% -jar %this% %jruby_args% %args%
42
+
43
+ endlocal
44
+
45
+ exit /b
46
+
47
+ :check_arg
48
+ set arg=%*
49
+
50
+ rem Remove double quotations
51
+ set p1=%arg:~0,1%
52
+ set p1=%p1:"=%
53
+ set p2=%arg:~-1,1%
54
+ set p2=%p2:"=%
55
+ set arg=%p1%%arg:~1,-1%%p2%
56
+
57
+ if "%status%" == "rest" (
58
+ set args=%args% %arg%
59
+
60
+ ) else if "%status%" == "read" (
61
+ call :read_file %arg%
62
+
63
+ ) else if "%arg%" == "-J+O" (
64
+ set overwrite_optimize=true
65
+ set status=rest
66
+
67
+ ) else if "%arg%" == "-J-O" (
68
+ set overwrite_optimize=false
69
+ set status=rest
70
+
71
+ ) else if "%arg:~0,2%" == "-J" (
72
+ if not "%arg:~2%" == "" (
73
+ set java_args=%java_args% %arg:~2%
74
+ ) else (
75
+ set status=read
76
+ )
77
+
78
+ ) else if "%arg:~0,2%" == "-R" (
79
+ set jruby_args=%jruby_args% %arg:~2%
80
+
81
+ ) else if "%arg%" == "run" (
82
+ set default_optimize=true
83
+ set args=%args% %arg%
84
+ set status=rest
85
+
86
+ ) else (
87
+ set args=%args% %arg%
88
+ set status=rest
89
+ )
90
+ exit /b
91
+
92
+ :read_file
93
+ if not exist "%~1" (
94
+ echo "failed to load java argument file."
95
+ set error=true
96
+ ) else (
97
+ for /f "delims=" %%i in (%~1) do set java_args=%java_args% %%i
98
+ )
99
+ set status=
100
+ exit /b
6
101
 
7
- exit /B
8
102
  BAT
9
103
 
10
104
  java_args=""
@@ -0,0 +1,23 @@
1
+ package org.embulk.cli;
2
+
3
+ import java.io.BufferedWriter;
4
+ import java.io.File;
5
+ import java.io.FileOutputStream;
6
+ import java.io.OutputStreamWriter;
7
+ import java.nio.charset.Charset;
8
+ import java.util.Arrays;
9
+
10
+ public class DummyMain {
11
+
12
+ public static void main(String[] args) throws Exception {
13
+ System.out.println(Arrays.asList(args));
14
+ File thisFolder = new File(SelfrunTest.class.getResource("/org/embulk/cli/DummyMain.class").toURI()).getParentFile();
15
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(thisFolder, "args.txt")), Charset.defaultCharset()))) {
16
+ for (String arg : args) {
17
+ writer.write(arg);
18
+ writer.newLine();
19
+ }
20
+ }
21
+ }
22
+
23
+ }
@@ -0,0 +1,281 @@
1
+ package org.embulk.cli;
2
+
3
+ import static org.junit.Assert.assertEquals;
4
+
5
+ import java.io.BufferedReader;
6
+ import java.io.BufferedWriter;
7
+ import java.io.File;
8
+ import java.io.FileOutputStream;
9
+ import java.io.IOException;
10
+ import java.io.InputStreamReader;
11
+ import java.io.OutputStreamWriter;
12
+ import java.nio.charset.Charset;
13
+ import java.nio.file.FileSystem;
14
+ import java.nio.file.FileSystems;
15
+ import java.nio.file.Files;
16
+ import java.nio.file.StandardOpenOption;
17
+ import java.util.Arrays;
18
+ import java.util.List;
19
+
20
+ import org.junit.BeforeClass;
21
+ import org.junit.Test;
22
+
23
+
24
+ public class SelfrunTest {
25
+
26
+ private static File testSelfrun;
27
+
28
+ @BeforeClass
29
+ public static void prepare() throws Exception {
30
+ File selfrun = findSelfrun();
31
+ FileSystem fs = FileSystems.getDefault();
32
+ String line = new String(Files.readAllBytes(fs.getPath(selfrun.getAbsolutePath())), Charset.defaultCharset());
33
+
34
+ File thisFolder = new File(SelfrunTest.class.getResource("/org/embulk/cli/SelfrunTest.class").toURI()).getParentFile();
35
+ testSelfrun = new File(thisFolder, System.getProperty("file.separator").equals("\\") ? "selfrun.bat" : "selfrun.sh");
36
+
37
+ File classpath = thisFolder.getParentFile().getParentFile().getParentFile();
38
+ line = line.replaceAll("java ", "java -classpath " + classpath.getAbsolutePath().replaceAll("\\\\", "\\\\\\\\") + " org.embulk.cli.DummyMain ");
39
+
40
+ // Modify selfrun so that arguments are written in 'args.txt' .
41
+ Files.write(fs.getPath(testSelfrun.getAbsolutePath()), line.getBytes(Charset.defaultCharset()), StandardOpenOption.CREATE);
42
+ if (!testSelfrun.setExecutable(true)) {
43
+ throw new Exception("Cannot se executable.");
44
+ }
45
+ }
46
+
47
+
48
+ @Test
49
+ public void testNoArgument() throws Exception {
50
+ List<String> args = execute();
51
+ assertEquals(Arrays.asList(
52
+ "-XX:+AggressiveOpts",
53
+ "-XX:+TieredCompilation",
54
+ "-XX:TieredStopAtLevel=1",
55
+ "-Xverify:none",
56
+ "-jar",
57
+ testSelfrun.getAbsolutePath()),
58
+ args);
59
+ }
60
+
61
+ @Test
62
+ public void testArguments() throws Exception {
63
+ List<String> args = execute("a1", "a2", "\"a3=v3\"");
64
+ assertEquals(Arrays.asList(
65
+ "-XX:+AggressiveOpts",
66
+ "-XX:+TieredCompilation",
67
+ "-XX:TieredStopAtLevel=1",
68
+ "-Xverify:none",
69
+ "-jar",
70
+ testSelfrun.getAbsolutePath(),
71
+ "a1",
72
+ "a2",
73
+ "a3=v3"),
74
+ args);
75
+ }
76
+
77
+ @Test
78
+ public void testRun() throws Exception {
79
+ List<String> args = execute("run", "a1");
80
+ assertEquals(Arrays.asList(
81
+ "-XX:+AggressiveOpts",
82
+ "-XX:+UseConcMarkSweepGC",
83
+ "-jar",
84
+ testSelfrun.getAbsolutePath(),
85
+ "run",
86
+ "a1"),
87
+ args);
88
+ }
89
+
90
+ @Test
91
+ public void testJpO() throws Exception {
92
+ List<String> args = execute("-J+O", "a1", "a2");
93
+ assertEquals(Arrays.asList(
94
+ "-XX:+AggressiveOpts",
95
+ "-XX:+UseConcMarkSweepGC",
96
+ "-jar",
97
+ testSelfrun.getAbsolutePath(),
98
+ "a1",
99
+ "a2"),
100
+ args);
101
+ }
102
+
103
+ @Test
104
+ public void testJmO() throws Exception {
105
+ List<String> args = execute("-J-O", "a1", "a2");
106
+ assertEquals(Arrays.asList(
107
+ "-XX:+AggressiveOpts",
108
+ "-XX:+TieredCompilation",
109
+ "-XX:TieredStopAtLevel=1",
110
+ "-Xverify:none",
111
+ "-jar",
112
+ testSelfrun.getAbsolutePath(),
113
+ "a1",
114
+ "a2"),
115
+ args);
116
+ }
117
+
118
+ @Test
119
+ public void testR1() throws Exception {
120
+ List<String> args = execute("-Rr1", "a1", "a2");
121
+ assertEquals(Arrays.asList(
122
+ "-XX:+AggressiveOpts",
123
+ "-XX:+TieredCompilation",
124
+ "-XX:TieredStopAtLevel=1",
125
+ "-Xverify:none",
126
+ "-jar",
127
+ testSelfrun.getAbsolutePath(),
128
+ "r1",
129
+ "a1",
130
+ "a2"),
131
+ args);
132
+ }
133
+
134
+ @Test
135
+ public void testR2() throws Exception {
136
+ List<String> args = execute("\"-Rr1=v1\"", "\"-Rr2=v2\"", "a1", "a2");
137
+ assertEquals(Arrays.asList(
138
+ "-XX:+AggressiveOpts",
139
+ "-XX:+TieredCompilation",
140
+ "-XX:TieredStopAtLevel=1",
141
+ "-Xverify:none",
142
+ "-jar",
143
+ testSelfrun.getAbsolutePath(),
144
+ "r1=v1",
145
+ "r2=v2",
146
+ "a1",
147
+ "a2"),
148
+ args);
149
+ }
150
+
151
+ @Test
152
+ public void testRRun() throws Exception {
153
+ List<String> args = execute("-Rr1", "run", "a1");
154
+ assertEquals(Arrays.asList(
155
+ "-XX:+AggressiveOpts",
156
+ "-XX:+UseConcMarkSweepGC",
157
+ "-jar",
158
+ testSelfrun.getAbsolutePath(),
159
+ "r1",
160
+ "run",
161
+ "a1"),
162
+ args);
163
+ }
164
+
165
+ @Test
166
+ public void testJ1() throws Exception {
167
+ List<String> args = execute("-J-Dj1", "a1", "a2");
168
+ assertEquals(Arrays.asList(
169
+ "-XX:+AggressiveOpts",
170
+ "-XX:+TieredCompilation",
171
+ "-XX:TieredStopAtLevel=1",
172
+ "-Xverify:none",
173
+ "-Dj1",
174
+ "-jar",
175
+ testSelfrun.getAbsolutePath(),
176
+ "a1",
177
+ "a2"),
178
+ args);
179
+ }
180
+
181
+ @Test
182
+ public void testJ2() throws Exception {
183
+ List<String> args = execute("\"-J-Dj1=v1\"", "\"-J-Dj2=v2\"", "a1", "a2");
184
+ assertEquals(Arrays.asList(
185
+ "-XX:+AggressiveOpts",
186
+ "-XX:+TieredCompilation",
187
+ "-XX:TieredStopAtLevel=1",
188
+ "-Xverify:none",
189
+ "-Dj1=v1",
190
+ "-Dj2=v2",
191
+ "-jar",
192
+ testSelfrun.getAbsolutePath(),
193
+ "a1",
194
+ "a2"),
195
+ args);
196
+ }
197
+
198
+ @Test
199
+ public void testJR() throws Exception {
200
+ List<String> args = execute("-Jj1", "-Rr1", "a1", "a2");
201
+ assertEquals(Arrays.asList(
202
+ "-XX:+AggressiveOpts",
203
+ "-XX:+TieredCompilation",
204
+ "-XX:TieredStopAtLevel=1",
205
+ "-Xverify:none",
206
+ "j1",
207
+ "-jar",
208
+ testSelfrun.getAbsolutePath(),
209
+ "r1",
210
+ "a1",
211
+ "a2"),
212
+ args);
213
+ }
214
+
215
+ @Test
216
+ public void testJFile() throws Exception {
217
+ File javaArgsFile = new File(testSelfrun.getParentFile(), "java_args.txt");
218
+ FileSystem fs = FileSystems.getDefault();
219
+ Files.write(fs.getPath(javaArgsFile.getAbsolutePath()), "j1 j2 j3".getBytes(Charset.defaultCharset()), StandardOpenOption.CREATE);
220
+
221
+ List<String> args = execute("-J", javaArgsFile.getAbsolutePath(), "a1", "a2");
222
+ assertEquals(Arrays.asList(
223
+ "-XX:+AggressiveOpts",
224
+ "-XX:+TieredCompilation",
225
+ "-XX:TieredStopAtLevel=1",
226
+ "-Xverify:none",
227
+ "j1",
228
+ "j2",
229
+ "j3",
230
+ "-jar",
231
+ testSelfrun.getAbsolutePath(),
232
+ "a1",
233
+ "a2"),
234
+ args);
235
+ }
236
+
237
+ private List<String> execute(String... arguments) throws Exception {
238
+ File temp = new File(testSelfrun.getParentFile(), "call-" + testSelfrun.getName());
239
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(temp), Charset.defaultCharset()))) {
240
+ writer.write(testSelfrun.getAbsolutePath());
241
+ for (String argument : arguments) {
242
+ writer.write(" ");
243
+ writer.write(argument);
244
+ }
245
+ }
246
+ if (!temp.setExecutable(true)) {
247
+ throw new Exception("Cannot se executable.");
248
+ }
249
+
250
+ File argsFile = new File(testSelfrun.getParentFile(), "args.txt");
251
+ if (argsFile.exists()) {
252
+ if (!argsFile.delete()) {
253
+ throw new IOException("Cannot delete " + argsFile);
254
+ }
255
+ }
256
+
257
+ Process process = Runtime.getRuntime().exec(temp.getAbsolutePath());
258
+ int exitCode = process.waitFor();
259
+ if (exitCode != 0 || !argsFile.exists()) {
260
+ StringBuilder builder = new StringBuilder();
261
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream(), Charset.defaultCharset()))) {
262
+ builder.append(reader.readLine());
263
+ builder.append(System.getProperty("line.separator"));
264
+ }
265
+ throw new Exception(builder.toString());
266
+ }
267
+
268
+ FileSystem fs = FileSystems.getDefault();
269
+ List<String> args = Files.readAllLines(fs.getPath(argsFile.getAbsolutePath()), Charset.defaultCharset());
270
+ return args;
271
+ }
272
+
273
+ private static File findSelfrun() {
274
+ File folder = new File(".");
275
+ if (new File(folder, "embulk-cli").exists()) {
276
+ folder = new File(folder, "embulk-cli");
277
+ }
278
+ return new File(new File(new File(new File(folder, "src"), "main"), "sh"), "selfrun.sh");
279
+ }
280
+
281
+ }
@@ -143,6 +143,8 @@ Options
143
143
  +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
144
144
  | trim\_if\_not\_quoted | boolean | If true, remove spaces of a value if the value is not surrounded by the quote character | ``false`` by default |
145
145
  +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
146
+ | comment\_line\_marker | string | Skip a line if the line begins with this string | null by default |
147
+ +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
146
148
  | allow\_optional\_columns | boolean | If true, set null to insufficient columns. Otherwise, skip the row in case of insufficient number of columns | ``false`` by default |
147
149
  +----------------------------+----------+----------------------------------------------------------------------------------------------------------------+------------------------+
148
150
  | allow\_extra\_columns | boolean | If true, ignore too many columns. Otherwise, skip the row in case of too many columns | ``false`` by default |
@@ -204,6 +206,7 @@ Example
204
206
  escape: ''
205
207
  null_string: 'NULL'
206
208
  skip_header_lines: 1
209
+ comment_line_marker: '#'
207
210
  columns:
208
211
  - {name: id, type: long}
209
212
  - {name: account, type: long}
@@ -0,0 +1,19 @@
1
+ Release 0.6.11
2
+ ==================================
3
+
4
+ Built-in plugins
5
+ ------------------
6
+
7
+ * ``input-file`` plugin sets ``last_path`` when there are no input files.
8
+ * ``parser-csv`` supports **comment_line_marker** option to skip lines starting with comment characters such as '#'.
9
+ * Fixed a bug where ``guess-csv`` guesses timestamp wrongly if order of date is month-day-year.
10
+
11
+ General Changes
12
+ ------------------
13
+
14
+ * Command line execution supports ``-J``, ``-R``, ``-J+O``, and ``-J-O`` arguments on Windows (@hito4++)
15
+
16
+
17
+ Release Date
18
+ ------------------
19
+ 2015-05-30
@@ -4,6 +4,7 @@ Release Notes
4
4
  .. toctree::
5
5
  :maxdepth: 1
6
6
 
7
+ release/release-0.6.11
7
8
  release/release-0.6.10
8
9
  release/release-0.6.9
9
10
  release/release-0.6.8
@@ -75,6 +75,10 @@ public class CsvParserPlugin
75
75
  @ConfigDefault("131072") //128kB
76
76
  public long getMaxQuotedSizeLimit();
77
77
 
78
+ @Config("comment_line_marker")
79
+ @ConfigDefault("null")
80
+ public Optional<String> getCommentLineMarker();
81
+
78
82
  @Config("allow_optional_columns")
79
83
  @ConfigDefault("false")
80
84
  public boolean getAllowOptionalColumns();
@@ -27,6 +27,7 @@ public class CsvTokenizer
27
27
  private final String newline;
28
28
  private final boolean trimIfNotQuoted;
29
29
  private final long maxQuotedSizeLimit;
30
+ private final String commentLineMarker;
30
31
  private final LineDecoder input;
31
32
 
32
33
  private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
@@ -46,6 +47,7 @@ public class CsvTokenizer
46
47
  newline = task.getNewline().getString();
47
48
  trimIfNotQuoted = task.getTrimIfNotQuoted();
48
49
  maxQuotedSizeLimit = task.getMaxQuotedSizeLimit();
50
+ commentLineMarker = task.getCommentLineMarker().orNull();
49
51
  this.input = input;
50
52
  }
51
53
 
@@ -93,7 +95,7 @@ public class CsvTokenizer
93
95
  }
94
96
  }
95
97
 
96
- private boolean nextLine(boolean ignoreEmptyLine)
98
+ private boolean nextLine(boolean skipEmptyLine)
97
99
  {
98
100
  while (true) {
99
101
  if (!unreadLines.isEmpty()) {
@@ -107,7 +109,10 @@ public class CsvTokenizer
107
109
  linePos = 0;
108
110
  lineNumber++;
109
111
 
110
- if (!line.isEmpty() || !ignoreEmptyLine) {
112
+ boolean skip = skipEmptyLine && (
113
+ line.isEmpty() ||
114
+ (commentLineMarker != null && line.startsWith(commentLineMarker)));
115
+ if (!skip) {
111
116
  return true;
112
117
  }
113
118
  }
@@ -79,10 +79,22 @@ public class LocalFileInputPlugin
79
79
 
80
80
  control.run(taskSource, taskCount);
81
81
 
82
- List<String> files = new ArrayList<String>(task.getFiles());
83
- Collections.sort(files);
84
- return Exec.newConfigDiff().
85
- set("last_path", files.get(files.size() - 1));
82
+ // build next config
83
+ ConfigDiff configDiff = Exec.newConfigDiff();
84
+
85
+ // last_path
86
+ if (task.getFiles().isEmpty()) {
87
+ // keep the last value
88
+ if (task.getLastPath().isPresent()) {
89
+ configDiff.set("last_path", task.getLastPath().get());
90
+ }
91
+ } else {
92
+ List<String> files = new ArrayList<String>(task.getFiles());
93
+ Collections.sort(files);
94
+ configDiff.set("last_path", files.get(files.size() - 1));
95
+ }
96
+
97
+ return configDiff;
86
98
  }
87
99
 
88
100
  @Override
@@ -226,6 +226,20 @@ public class TestCsvTokenizer
226
226
  "\n\"a\\\"aa\",\"b,bb\\\"\"\n\n\"cc\"\"c\",\"\"\"ddd\"\n,\"\"\n")));
227
227
  }
228
228
 
229
+ @Test
230
+ public void testCommentLineMarker() throws Exception
231
+ {
232
+ config.set("comment_line_marker", JsonNodeFactory.instance.textNode("#"));
233
+ reloadPluginTask();
234
+ assertEquals(expectedRecords(2,
235
+ "aaa", "bbb",
236
+ "eee", "fff"),
237
+ parse(task,
238
+ "aaa,bbb",
239
+ "#ccc,ddd",
240
+ "eee,fff"));
241
+ }
242
+
229
243
  @Test
230
244
  public void trimNonQuotedValues() throws Exception
231
245
  {
@@ -93,7 +93,7 @@ out:
93
93
  (If guess supported) you don't have to write `<%= category %>:` section in the configuration file. After writing `in:` section, you can let embulk guess `<%= category %>:` section using this command:
94
94
 
95
95
  ```
96
- $ embulk install <%= project_name %>
96
+ $ embulk gem install <%= project_name %>
97
97
  $ embulk guess -g <%= name %> config.yml -o guessed.yml
98
98
  ```
99
99
  %end
@@ -24,6 +24,11 @@ module Embulk
24
24
  "\\N", # MySQL LOAD, Hive STORED AS TEXTFILE
25
25
  ]
26
26
 
27
+ COMMENT_LINE_MARKER_CANDIDATES = [
28
+ "#",
29
+ "//",
30
+ ]
31
+
27
32
  MAX_SKIP_LINES = 10
28
33
  NO_SKIP_DETECT_LINES = 10
29
34
 
@@ -56,9 +61,12 @@ module Embulk
56
61
  end
57
62
 
58
63
  sample_records = split_lines(parser_guessed, sample_lines, delim)
64
+
59
65
  skip_header_lines = guess_skip_header_lines(sample_records)
60
66
  sample_records = sample_records[skip_header_lines..-1]
61
67
 
68
+ comment_line_marker, sample_records = guess_comment_line_marker(sample_records)
69
+
62
70
  first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
63
71
  other_types = SchemaGuess.types_from_array_records(sample_records[1..-1])
64
72
 
@@ -75,6 +83,8 @@ module Embulk
75
83
  parser_guessed["skip_header_lines"] = skip_header_lines
76
84
  end
77
85
 
86
+ parser_guessed["comment_line_marker"] = comment_line_marker # always set comment_line_marker even if it's null
87
+
78
88
  parser_guessed["allow_extra_columns"] = false
79
89
  parser_guessed["allow_optional_columns"] = false
80
90
 
@@ -113,7 +123,10 @@ module Embulk
113
123
  columns = []
114
124
  while true
115
125
  begin
116
- columns << tokenizer.nextColumn
126
+ column = tokenizer.nextColumn
127
+ quoted = tokenizer.wasQuotedColumn
128
+ column.define_singleton_method(:quoted?) { quoted }
129
+ columns << column
117
130
  rescue org.embulk.standards.CsvTokenizer::TooFewColumnsException
118
131
  rows << columns
119
132
  break
@@ -200,21 +213,38 @@ module Embulk
200
213
  count = counts.inject(0) {|r,c| r + c }
201
214
  [str, count]
202
215
  end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
203
- found = guessed.first
204
- return found ? found[0] : nil
216
+ found_str, found_count = guessed.first
217
+ return found_str ? found_str : nil
205
218
  end
206
219
 
207
220
  def guess_skip_header_lines(sample_records)
208
221
  counts = sample_records.map {|records| records.size }
209
222
  (1..[MAX_SKIP_LINES, counts.length - 1].min).each do |i|
210
223
  check_row_count = counts[i-1]
211
- if counts[i, NO_SKIP_DETECT_LINES].all? {|c| c == check_row_count }
224
+ if counts[i, NO_SKIP_DETECT_LINES].all? {|c| c <= check_row_count }
212
225
  return i - 1
213
226
  end
214
227
  end
215
228
  return 0
216
229
  end
217
230
 
231
+ def guess_comment_line_marker(sample_records)
232
+ guessed = COMMENT_LINE_MARKER_CANDIDATES.map do |str|
233
+ regexp = /^#{Regexp.quote(str)}/
234
+ records = sample_records.reject do |records|
235
+ !records[0].quoted? && !NULL_STRING_CANDIDATES.include?(records[0]) && records[0] =~ regexp
236
+ end
237
+ count = sample_records.size - records.size
238
+ [str, count, records]
239
+ end.select {|str,count,records| count > 0 }.sort_by {|str,count,records| -count }
240
+ found_str, found_count, found_records = guessed.first
241
+ if found_str
242
+ return found_str, found_records
243
+ else
244
+ return nil, sample_records
245
+ end
246
+ end
247
+
218
248
  def array_sum(array)
219
249
  array.inject(0) {|r,i| r += i }
220
250
  end
@@ -196,12 +196,12 @@ module Embulk::Guess
196
196
 
197
197
  parts << :year
198
198
  part_options << nil
199
- delimiters << date_delim
200
199
 
200
+ delimiters << date_delim
201
201
  parts << :month
202
202
  part_options << part_heading_option(dm["month"])
203
- delimiters << date_delim
204
203
 
204
+ delimiters << date_delim
205
205
  parts << :day
206
206
  part_options << part_heading_option(dm["day"])
207
207
 
@@ -210,30 +210,28 @@ module Embulk::Guess
210
210
 
211
211
  parts << :month
212
212
  part_options << part_heading_option(dm["month"])
213
- delimiters << date_delim
214
213
 
214
+ delimiters << date_delim
215
215
  parts << :day
216
216
  part_options << part_heading_option(dm["day"])
217
- delimiters << date_delim
218
217
 
218
+ delimiters << date_delim
219
219
  parts << :year
220
220
  part_options << nil
221
- delimiters << date_delim
222
221
 
223
222
  elsif dm = (/^#{DMY}(?<rest>.*?)$/.match(text) or /^#{DMY_NODELIM}(?<rest>.*?)$/.match(text))
224
223
  date_delim = dm["date_delim"] rescue ""
225
224
 
226
225
  parts << :day
227
226
  part_options << part_heading_option(dm["day"])
228
- delimiters << date_delim
229
227
 
228
+ delimiters << date_delim
230
229
  parts << :month
231
230
  part_options << part_heading_option(dm["month"])
232
- delimiters << date_delim
233
231
 
232
+ delimiters << date_delim
234
233
  parts << :year
235
234
  part_options << nil
236
- delimiters << date_delim
237
235
 
238
236
  else
239
237
  date_delim = ""
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.6.10'
2
+ VERSION = '0.6.11'
3
3
  end
@@ -45,6 +45,13 @@ class TimeFormatGuessTest < ::Test::Unit::TestCase
45
45
  assert_guess "%m.%d.%Y", "01.01.2014"
46
46
  assert_guess "%d/%m/%Y", "13/01/2014"
47
47
  assert_guess "%d/%m/%Y", "21/01/2014"
48
+
49
+ assert_guess "%d/%m/%Y %H-%M-%S,%N", "21/01/2014 01-01-01,000000001"
50
+ assert_guess "%d/%m/%Y %H-%M-%S,%N", "21/01/2014 01-01-01,000001"
51
+ assert_guess "%d/%m/%Y %H-%M-%S,%L", "21/01/2014 01-01-01,001"
52
+ assert_guess "%d/%m/%Y %H-%M-%S", "21/01/2014 01-01-01"
53
+ assert_guess "%d/%m/%Y %H-%M", "21/01/2014 01-01"
54
+ assert_guess "%d/%m/%Y", "21/01/2014"
48
55
  end
49
56
 
50
57
  def test_format_borders
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.10
4
+ version: 0.6.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-22 00:00:00.000000000 Z
11
+ date: 2015-05-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,6 +100,8 @@ files:
100
100
  - embulk-cli/build.gradle
101
101
  - embulk-cli/src/main/java/org/embulk/cli/Main.java
102
102
  - embulk-cli/src/main/sh/selfrun.sh
103
+ - embulk-cli/src/test/java/org/embulk/cli/DummyMain.java
104
+ - embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java
103
105
  - embulk-core/build.gradle
104
106
  - embulk-core/src/main/java/org/embulk/EmbulkService.java
105
107
  - embulk-core/src/main/java/org/embulk/command/PreviewPrinter.java
@@ -294,6 +296,7 @@ files:
294
296
  - embulk-docs/src/release/release-0.6.0.rst
295
297
  - embulk-docs/src/release/release-0.6.1.rst
296
298
  - embulk-docs/src/release/release-0.6.10.rst
299
+ - embulk-docs/src/release/release-0.6.11.rst
297
300
  - embulk-docs/src/release/release-0.6.2.rst
298
301
  - embulk-docs/src/release/release-0.6.3.rst
299
302
  - embulk-docs/src/release/release-0.6.4.rst
@@ -409,8 +412,8 @@ files:
409
412
  - classpath/bval-jsr303-0.5.jar
410
413
  - classpath/commons-beanutils-core-1.8.3.jar
411
414
  - classpath/commons-lang3-3.1.jar
412
- - classpath/embulk-core-0.6.10.jar
413
- - classpath/embulk-standards-0.6.10.jar
415
+ - classpath/embulk-core-0.6.11.jar
416
+ - classpath/embulk-standards-0.6.11.jar
414
417
  - classpath/guava-18.0.jar
415
418
  - classpath/guice-4.0.jar
416
419
  - classpath/guice-multibindings-4.0.jar