embulk 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d63d08592793b691be7f691d842f73d18a202d45
4
- data.tar.gz: 5a14e328213b2a97fe6e0abaa044428bb5caa0fd
3
+ metadata.gz: b24d1b7fd55459a318f9dbce2fff9b976f40738b
4
+ data.tar.gz: 88573c322b4d3e32f4fb11775122db1c1b28f3d2
5
5
  SHA512:
6
- metadata.gz: 8e447c2ae251e6ef3309c0862344d10074797389ea2c55bce2e2302d82eea2425345316efdf7dae9adcd9f08de5b0132c04e6cad43edaef29ac3cf635ad640f8
7
- data.tar.gz: 0b33b0a4f2dc7911d1432828e303c464c8b25bda00dc9eb3df8c9fb905c85011c30132292c581163fb557d2e2eaeb3a32a8bbd61bd486509361b4464aa2477ed
6
+ metadata.gz: 3179a8555d542ae38a785ad308a9b44560ae66889f263c932675e4ef5050b719636126ec0a960c6dba098030e269c3b0dd7b34f841d278857464c143ea0a2940
7
+ data.tar.gz: 20772970a71e622473f119ebc4ee9316fdbfff48e96948345fb6a001342d727a70ecd525e217fac98eb0838f2e20ccebcfd6d48603c945a0d9b096f7070ea92a
data/.gitignore CHANGED
@@ -1,5 +1,6 @@
1
1
  target/
2
2
  build/
3
+ pkg/
3
4
  *.iml
4
5
  *~
5
6
  ._*
data/README.md CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
24
24
  The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
25
25
 
26
26
  ```
27
- wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.2.jar -O embulk.jar
27
+ wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.3.jar -O embulk.jar
28
28
  java -jar embulk.jar --help
29
29
  ```
30
30
 
data/bin/embulk CHANGED
@@ -13,7 +13,7 @@ while i = ARGV.find_index {|arg| arg =~ /^\-D/ }
13
13
  end
14
14
  ARGV.slice!(i, 2)
15
15
  else
16
- java_args << ARGV[i]
16
+ java_args << ARGV[i][2..-1]
17
17
  ARGV.slice!(i)
18
18
  end
19
19
  end
@@ -4,19 +4,22 @@ plugins {
4
4
  id 'com.github.ben-manes.versions' version '0.7'
5
5
  id 'com.github.jruby-gradle.base' version '0.1.5'
6
6
  id 'com.github.johnrengelman.shadow' version '1.2.0'
7
- id 'java'
8
7
  }
9
8
 
9
+ def java_projects = [project(":embulk-core"), project(":embulk-standards"), project(":embulk-cli")]
10
+ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
+
10
12
  allprojects {
11
13
  group = 'org.embulk'
12
- version = '0.4.2'
14
+ version = '0.4.3'
13
15
 
14
- apply plugin: 'maven' // install jar files to the local repo: $ gradle install
16
+ apply plugin: 'java'
15
17
  apply plugin: 'maven-publish'
16
18
  apply plugin: 'com.jfrog.bintray'
17
- apply plugin: 'java'
18
19
 
19
- // upload artifacts to Bintray: $ gradle bintrayUpload
20
+ //
21
+ // bintrayUpload task
22
+ //
20
23
  bintray {
21
24
  // write at your bintray user name and api key to ~/.gradle/gradle.properties file:
22
25
  // bintray_user=frsyuki
@@ -24,7 +27,13 @@ allprojects {
24
27
  user = project.hasProperty('bintray_user') ? bintray_user : ''
25
28
  key = project.hasProperty('bintray_api_key') ? bintray_api_key : ''
26
29
 
27
- publications = ['mavenJava']
30
+ publications = ['bintrayMavenRelease']
31
+
32
+ filesSpec {
33
+ // include embulk-<version>.jar built by 'cli' task
34
+ from "pkg/embulk-${project.version}.jar"
35
+ into "embulk-${project.version}.jar"
36
+ }
28
37
 
29
38
  dryRun = false
30
39
  publish = false // TODO automate uploading embulk.jar and make this true
@@ -56,54 +65,24 @@ allprojects {
56
65
  }
57
66
 
58
67
  subprojects {
59
- apply plugin: 'findbugs'
60
- apply plugin: 'jacoco'
61
-
62
- repositories {
63
- mavenCentral()
64
- jcenter()
65
- }
66
-
67
- compileJava.options.encoding = 'UTF-8' // source encoding
68
- sourceCompatibility = 1.7
69
- targetCompatibility = 1.7
68
+ if (java_projects.contains(project)) {
69
+ apply plugin: 'maven'
70
+ apply plugin: 'findbugs'
71
+ apply plugin: 'jacoco'
72
+
73
+ repositories {
74
+ mavenCentral()
75
+ jcenter()
76
+ }
70
77
 
71
- configurations {
72
- // guice depends on asm and cglib but version of the libraries conflict
73
- // with ones bundled in jruby-complete and cause bytecode compatibility error
74
- compile.exclude group: 'asm', module: 'asm'
75
- compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
76
- }
78
+ compileJava.options.encoding = 'UTF-8' // source encoding
79
+ sourceCompatibility = 1.7
80
+ targetCompatibility = 1.7
77
81
 
78
- // determine which dependencies have updates: $ gradle dependencyUpdates
79
- dependencies {
80
- compile 'com.google.guava:guava:18.0'
81
- compile 'com.google.inject:guice:3.0'
82
- compile 'com.google.inject.extensions:guice-multibindings:3.0'
83
- compile 'javax.inject:javax.inject:1'
84
- compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
85
- compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
86
- compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
87
- compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
88
- compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
89
- compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
90
- compile 'log4j:log4j:1.2.17'
91
- compile 'org.slf4j:slf4j-api:1.7.10'
92
- compile 'org.slf4j:slf4j-log4j12:1.7.10'
93
- compile 'org.jruby:jruby-complete:1.7.19'
94
- compile 'com.google.code.findbugs:annotations:3.0.0'
95
- compile 'org.yaml:snakeyaml:1.14'
96
- compile 'javax.validation:validation-api:1.1.0.Final'
97
- compile 'org.apache.bval:bval-jsr303:0.5'
98
- compile 'io.airlift:slice:0.9'
99
- compile 'joda-time:joda-time:2.7'
100
- compile 'io.netty:netty-buffer:5.0.0.Alpha1'
101
- compile 'com.ibm.icu:icu4j:54.1.1'
102
-
103
- testCompile 'junit:junit:4.12'
104
- }
82
+ dependencies {
83
+ testCompile 'junit:junit:4.12'
84
+ }
105
85
 
106
- gradle.projectsEvaluated {
107
86
  tasks.withType(JavaCompile) {
108
87
  options.compilerArgs << "-Xlint:unchecked" //<< "-Xlint:deprecation"
109
88
  }
@@ -113,39 +92,36 @@ subprojects {
113
92
  html.enabled = true
114
93
  }
115
94
  }
116
- }
117
95
 
118
- findbugs {
119
- ignoreFailures = true
120
- }
96
+ findbugs {
97
+ ignoreFailures = true
98
+ }
121
99
 
122
- javadoc {
123
- options {
124
- locale = 'en_US'
125
- encoding = 'UTF-8'
100
+ javadoc {
101
+ options {
102
+ locale = 'en_US'
103
+ encoding = 'UTF-8'
104
+ }
126
105
  }
127
- }
128
106
 
129
- // add javadoc/source jar tasks as artifacts to be released
130
- task sourcesJar(type: Jar, dependsOn: classes) {
131
- classifier = 'sources'
132
- from sourceSets.main.allSource
133
- }
134
- task javadocJar(type: Jar, dependsOn: javadoc) {
135
- classifier = 'javadoc'
136
- from javadoc.destinationDir
137
- }
138
- artifacts {
139
- archives sourcesJar, javadocJar
107
+ // add javadoc/source jar tasks as artifacts to be released
108
+ task sourcesJar(type: Jar, dependsOn: classes) {
109
+ classifier = 'sources'
110
+ from sourceSets.main.allSource
111
+ }
112
+ task javadocJar(type: Jar, dependsOn: javadoc) {
113
+ classifier = 'javadoc'
114
+ from javadoc.destinationDir
115
+ }
116
+ artifacts {
117
+ archives sourcesJar, javadocJar
118
+ }
140
119
  }
141
120
 
142
121
  publishing {
143
122
  publications {
144
- if (!project.name.equals("embulk-docs")) {
145
- mavenJava(MavenPublication) {
146
- groupId project.group
147
- artifactId project.name
148
- version project.version
123
+ if (release_projects.contains(project)) {
124
+ bintrayMavenRelease(MavenPublication) {
149
125
  from components.java
150
126
  artifact sourcesJar
151
127
  artifact javadocJar
@@ -155,6 +131,28 @@ subprojects {
155
131
  }
156
132
  }
157
133
 
134
+ //
135
+ // classpath task
136
+ //
137
+ task classpath(dependsOn: ['build', ':embulk-cli:classpath']) << { }
138
+ clean { delete 'classpath' }
139
+
140
+ //
141
+ // cli task
142
+ //
143
+ task cli(dependsOn: ':embulk-cli:shadowJar') << {
144
+ file('pkg').mkdirs()
145
+ File f = file("pkg/embulk-${project.version}.jar")
146
+ f.write('''\
147
+ #!/bin/sh
148
+ exec java -jar "$0" "$@"
149
+ exit 127
150
+ ''')
151
+ f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
152
+ f.setExecutable(true)
153
+ }
154
+ bintrayUpload.dependsOn(['cli'])
155
+
158
156
  project(':embulk-cli') {
159
157
  apply plugin: 'com.github.johnrengelman.shadow'
160
158
 
@@ -167,31 +165,19 @@ project(':embulk-cli') {
167
165
  'Specification-Version': project.version,
168
166
  'Main-Class': 'org.embulk.cli.Main'
169
167
  }
170
- append("${parent.projectDir}/COPYING")
168
+ append("${rootProject.projectDir}/COPYING")
171
169
  }
172
170
 
173
171
  task classpath(type: Copy) {
174
- doFirst { file("${parent.projectDir}/classpath").mkdirs() }
172
+ doFirst { file("${rootProject.projectDir}/classpath").mkdirs() }
175
173
  from configurations.runtime
176
- into "${parent.projectDir}/classpath"
174
+ into "${rootProject.projectDir}/classpath"
177
175
  }
178
176
  }
179
177
 
180
- task classpath(dependsOn: ['build', ':embulk-cli:classpath']) << { }
181
- clean { delete 'classpath' }
182
-
183
- task cli(dependsOn: ':embulk-cli:shadowJar') << {
184
- file('pkg').mkdirs()
185
- File f = file("pkg/embulk-${project.version}.jar")
186
- f.write('''\
187
- #!/bin/sh
188
- exec java -jar "$0" "$@"
189
- exit 127
190
- ''')
191
- f.append(file("embulk-cli/build/libs/embulk-cli-${project.version}-all.jar").readBytes())
192
- f.setExecutable(true)
193
- }
194
-
178
+ //
179
+ // gem task
180
+ //
195
181
  import com.github.jrubygradle.JRubyExec
196
182
  task gem(type: JRubyExec) {
197
183
  jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'build'
@@ -201,6 +187,18 @@ task gem(type: JRubyExec) {
201
187
  gem.dependsOn('gemspec')
202
188
  gem.dependsOn('classpath')
203
189
 
190
+ //
191
+ // rubyGemsUpload task
192
+ //
193
+ task rubyGemsUpload(type: JRubyExec, dependsOn: ["gem"]) {
194
+ jrubyArgs '-rrubygems/gem_runner', '-eGem::GemRunner.new.run(ARGV)', 'push'
195
+ script "pkg/embulk-${project.version}.gem"
196
+ }
197
+ gem.dependsOn('gemspec')
198
+
199
+ //
200
+ // releaseCheck and release tasks
201
+ //
204
202
  task releaseCheck << {
205
203
  if (!file("lib/embulk/version.rb").getText().contains("${project.version}")) {
206
204
  throw new GradleException("lib/embulk/version.rb doesn't include ${project.version}")
@@ -215,21 +213,21 @@ task releaseCheck << {
215
213
  if (!file("embulk-docs/src/release/release-${project.version}.rst").getText().contains(date)) {
216
214
  throw new GradleException("embulk-docs/src/release/release-${project.version}.rst doesn't include today's release date")
217
215
  }
218
- println "Ready. Run ./gradlew release"
216
+ // TODO check git-ls-files includes release-<version>.rst file
217
+ println "Ready. Run 'release' task."
219
218
  }
220
219
 
221
- task release(dependsOn: ["cli", "gem"]) << {
220
+ task release(dependsOn: ["releaseCheck", "bintrayUpload", "rubyGemsUpload"]) << {
222
221
  println """
223
- manual operations:
222
+ Manual operations:
224
223
 
225
- git commit -a -m v${project.version}
224
+ git commit -am v${project.version}
226
225
  git tag v${project.version}
227
- gem push pkg/embulk-${project.version}.gem"
228
- ./gradlew bintrayUpload
229
- open "https://bintray.com/embulk/maven/embulk/${project.version}/view" # and upload pkg/embulk-${project.version}.jar
230
226
 
231
227
  """
232
228
  }
229
+ bintrayUpload.mustRunAfter('releaseCheck')
230
+ rubyGemsUpload.mustRunAfter('releaseCheck')
233
231
 
234
232
  task gemspec << {
235
233
  file('build').mkdirs()
@@ -1,6 +1,41 @@
1
+ // include ruby scripts
1
2
  sourceSets {
2
3
  main.resources {
3
- srcDirs 'src/main/resources'
4
- srcDirs "${parent.projectDir}/lib"
4
+ srcDirs "${rootProject.projectDir}/lib"
5
5
  }
6
6
  }
7
+
8
+ configurations {
9
+ // com.google.inject:guice depends on asm and cglib but version of the libraries conflict
10
+ // with ones bundled in jruby-complete and cause bytecode compatibility error
11
+ compile.exclude group: 'asm', module: 'asm'
12
+ compile.exclude group: 'org.sonatype.sisu.inject', module: 'cglib'
13
+ }
14
+
15
+ // determine which dependencies have updates: $ gradle dependencyUpdates
16
+ dependencies {
17
+ compile 'com.google.guava:guava:18.0'
18
+ compile 'com.google.inject:guice:3.0'
19
+ compile 'com.google.inject.extensions:guice-multibindings:3.0'
20
+ compile 'javax.inject:javax.inject:1'
21
+ compile 'com.fasterxml.jackson.core:jackson-annotations:2.5.0'
22
+ compile 'com.fasterxml.jackson.core:jackson-core:2.5.0'
23
+ compile 'com.fasterxml.jackson.core:jackson-databind:2.5.0'
24
+ compile 'com.fasterxml.jackson.datatype:jackson-datatype-guava:2.5.0'
25
+ compile 'com.fasterxml.jackson.datatype:jackson-datatype-joda:2.5.0'
26
+ compile 'com.fasterxml.jackson.module:jackson-module-guice:2.5.0'
27
+ compile 'log4j:log4j:1.2.17'
28
+ compile 'org.slf4j:slf4j-api:1.7.10'
29
+ compile 'org.slf4j:slf4j-log4j12:1.7.10'
30
+ compile 'org.jruby:jruby-complete:1.7.19'
31
+ compile 'com.google.code.findbugs:annotations:3.0.0'
32
+ compile 'org.yaml:snakeyaml:1.14'
33
+ compile 'javax.validation:validation-api:1.1.0.Final'
34
+ compile 'org.apache.bval:bval-jsr303:0.5'
35
+ compile 'io.airlift:slice:0.9'
36
+ compile 'joda-time:joda-time:2.7'
37
+ compile 'io.netty:netty-buffer:5.0.0.Alpha1'
38
+
39
+ // for embulk/guess/charset.rb
40
+ compile 'com.ibm.icu:icu4j:54.1.1'
41
+ }
@@ -169,7 +169,11 @@ public class Runner
169
169
 
170
170
  String yml = writeNextConfig(options.getNextConfigOutputPath(), config, configDiff);
171
171
  System.err.println(yml);
172
- System.out.println("Created "+options.getNextConfigOutputPath());
172
+ if (options.getNextConfigOutputPath() == null) {
173
+ System.out.println("Use -o PATH option to write the guessed config file to a file.");
174
+ } else {
175
+ System.out.println("Created '"+options.getNextConfigOutputPath()+"' file.");
176
+ }
173
177
  }
174
178
 
175
179
  private void checkFileWritable(String path)
@@ -13,4 +13,5 @@ Release Notes
13
13
  release/release-0.4.0
14
14
  release/release-0.4.1
15
15
  release/release-0.4.2
16
+ release/release-0.4.3
16
17
 
@@ -0,0 +1,34 @@
1
+ Release 0.4.3
2
+ ==================================
3
+
4
+ CLI
5
+ ------------------
6
+
7
+ * All subcommands show current time with timezone and embulk's version number at the beginning.
8
+
9
+ Plugin API Changes
10
+ ------------------
11
+
12
+ * ``Thread.currentThread().getContextClassLoader()`` no longer returns JRuby's classloader. It returns null so that dependent libraries fallback to appropriate ``this.getClass().getContextClassLoader()`` call.
13
+
14
+ Built-in plugins
15
+ ------------------
16
+
17
+ * ``guess/csv`` guesses ``escape`` and ``null_string`` options.
18
+ * Fixed ``guess/csv`` fails if the csv file includes a timestamp value with timezone (@kinyuka++).
19
+ * Fixed memory leak at ``output/file`` (@akirakw++).
20
+ * Fixed ``input/file`` loads unnecessary files when it lists files from ``.``.
21
+
22
+ General Changes
23
+ ------------------
24
+
25
+ * embulk-cli artifact is no longer released.
26
+ * embulk-standards artifact doesn't directly depend on dependencies of embulk-core.
27
+ * Updated the build script.
28
+
29
+ * ``bintrayUpload`` task uploads embulk-<version>.jar.
30
+ * ``release`` task actually releases gem to RubyGems and publishes jar files to Bintray.
31
+
32
+ Release Date
33
+ ------------------
34
+ 2015-02-17
@@ -51,6 +51,8 @@ public class LocalFileInputPlugin
51
51
 
52
52
  private final Logger log = Exec.getLogger(getClass());
53
53
 
54
+ private final static Path CURRENT_DIR = Paths.get(".").normalize();
55
+
54
56
  @Override
55
57
  public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
56
58
  {
@@ -92,27 +94,58 @@ public class LocalFileInputPlugin
92
94
  } else {
93
95
  fileNamePrefix = pathPrefix.getFileName().toString();
94
96
  Path d = pathPrefix.getParent();
95
- directory = (d == null ? Paths.get(".") : d);
97
+ directory = (d == null ? CURRENT_DIR : d);
96
98
  }
97
99
 
98
100
  final ImmutableList.Builder<String> builder = ImmutableList.builder();
99
101
  final String lastPath = task.getLastPath().orNull();
100
102
  try {
101
- log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory, fileNamePrefix);
103
+ log.info("Listing local files at directory '{}' filtering filename by prefix '{}'", directory.equals(CURRENT_DIR) ? "." : directory.toString(), fileNamePrefix);
102
104
  Files.walkFileTree(directory, new SimpleFileVisitor<Path>() {
103
105
  @Override
104
- public FileVisitResult visitFile(Path path, BasicFileAttributes aAttrs)
106
+ public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs)
107
+ {
108
+ if (path.equals(directory)) {
109
+ return FileVisitResult.CONTINUE;
110
+ } else if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
111
+ return FileVisitResult.SKIP_SUBTREE;
112
+ } else {
113
+ Path parent = path.getParent();
114
+ if (parent == null) {
115
+ parent = CURRENT_DIR;
116
+ }
117
+ if (parent.equals(directory)) {
118
+ if (path.getFileName().toString().startsWith(fileNamePrefix)) {
119
+ return FileVisitResult.CONTINUE;
120
+ } else {
121
+ return FileVisitResult.SKIP_SUBTREE;
122
+ }
123
+ } else {
124
+ return FileVisitResult.CONTINUE;
125
+ }
126
+ }
127
+ }
128
+
129
+ @Override
130
+ public FileVisitResult visitFile(Path path, BasicFileAttributes attrs)
105
131
  {
106
- if (lastPath == null || path.toString().compareTo(lastPath) > 0) {
107
- if (path.getParent().equals(directory)) {
132
+ if (lastPath != null && path.toString().compareTo(lastPath) <= 0) {
133
+ return FileVisitResult.CONTINUE;
134
+ } else {
135
+ Path parent = path.getParent();
136
+ if (parent == null) {
137
+ parent = CURRENT_DIR;
138
+ }
139
+ if (parent.equals(directory)) {
108
140
  if (path.getFileName().toString().startsWith(fileNamePrefix)) {
109
141
  builder.add(path.toString());
142
+ return FileVisitResult.CONTINUE;
110
143
  }
111
144
  } else {
112
145
  builder.add(path.toString());
113
146
  }
147
+ return FileVisitResult.CONTINUE;
114
148
  }
115
- return FileVisitResult.CONTINUE;
116
149
  }
117
150
  });
118
151
  } catch (IOException ex) {
@@ -112,6 +112,8 @@ public class LocalFileOutputPlugin
112
112
  output.write(buffer.array(), buffer.offset(), buffer.limit());
113
113
  } catch (IOException ex) {
114
114
  throw new RuntimeException(ex);
115
+ } finally {
116
+ buffer.release();
115
117
  }
116
118
  }
117
119
 
@@ -1,3 +1,7 @@
1
+ # reset context class loader set by org.jruby.Main.main to nil. embulk manages
2
+ # multiple classloaders. default classloader should be Plugin.class.getClassloader().
3
+ java.lang.Thread.current_thread.set_context_class_loader(nil)
4
+
1
5
  bundle_path = ENV['EMBULK_BUNDLE_PATH'].to_s
2
6
  bundle_path = nil if bundle_path.empty?
3
7
 
@@ -13,8 +13,8 @@ module Embulk
13
13
  id,account,time,purchase,comment
14
14
  1,32864,2015-01-27 19:23:49,20150127,embulk
15
15
  2,14824,2015-01-27 19:01:23,20150127,embulk jruby
16
- 3,27559,2015-01-28 02:20:02,20150128,embulk core
17
- 4,11270,2015-01-29 11:54:36,20150129,"Embulk ""csv"" parser plugin"
16
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
17
+ 4,11270,2015-01-29 11:54:36,20150129,NULL
18
18
  EOF
19
19
  end
20
20
 
@@ -17,8 +17,9 @@ module Embulk
17
17
  # to make sure org.embulk.jruby.JRubyScriptingModule can require 'embulk/java/bootstrap'
18
18
  $LOAD_PATH << Embulk.home('lib')
19
19
 
20
+ require 'embulk/version'
21
+
20
22
  if argv.include?('--version')
21
- require 'embulk/version'
22
23
  puts "embulk #{Embulk::VERSION}"
23
24
  exit 0
24
25
  end
@@ -31,6 +32,8 @@ module Embulk
31
32
  require 'optparse'
32
33
  op = OptionParser.new
33
34
 
35
+ puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S %Z")}: Embulk v#{Embulk::VERSION}"
36
+
34
37
  load_paths = []
35
38
  classpaths = []
36
39
  classpath_separator = java.io.File.pathSeparator
@@ -4,7 +4,7 @@ TODO: Write short description here
4
4
 
5
5
  ## Overview
6
6
 
7
- * **Plugin type**: <%= embulk_category %>
7
+ * **Plugin type**: <%= display_category %>
8
8
  * **Load all or nothing**: yes
9
9
  * **Resume supported**: no
10
10
 
@@ -13,6 +13,17 @@ module Embulk
13
13
  "\"", "'"
14
14
  ]
15
15
 
16
+ ESCAPE_CANDIDATES = [
17
+ "\\"
18
+ ]
19
+
20
+ NULL_STRING_CANDIDATES = [
21
+ "null",
22
+ "NULL",
23
+ "#N/A",
24
+ "\\N", # MySQL LOAD, Hive STORED AS TEXTFILE
25
+ ]
26
+
16
27
  # CsvParserPlugin.TRUE_STRINGS
17
28
  TRUE_STRINGS = Hash[*%w[
18
29
  true True TRUE
@@ -35,6 +46,13 @@ module Embulk
35
46
  quote = guess_quote(sample_lines, delim)
36
47
  parser_guessed["quote"] = quote ? quote : ''
37
48
 
49
+ escape = guess_escape(sample_lines, delim, quote)
50
+ parser_guessed["escape"] = escape ? escape : ''
51
+
52
+ null_string = guess_null_string(sample_lines, delim)
53
+ parser_guessed["null_string"] = null_string if null_string
54
+ # don't even set null_string to avoid confusion of null and 'null' in YAML format
55
+
38
56
  sample_records = sample_lines.map {|line| line.split(delim) } # TODO use CsvTokenizer
39
57
  first_types = guess_field_types(sample_records[0, 1])
40
58
  other_types = guess_field_types(sample_records[1..-1])
@@ -119,6 +137,32 @@ module Embulk
119
137
  end
120
138
  end
121
139
 
140
+ def guess_escape(sample_lines, delim, optional_quote)
141
+ guessed = ESCAPE_CANDIDATES.map do |str|
142
+ if optional_quote
143
+ regexp = /#{Regexp.quote(str)}(?:#{Regexp.quote(delim)}|#{Regexp.quote(optional_quote)})/
144
+ else
145
+ regexp = /#{Regexp.quote(str)}#{Regexp.quote(delim)}/
146
+ end
147
+ counts = sample_lines.map {|line| line.scan(regexp).count }
148
+ count = counts.inject(0) {|r,c| r + c }
149
+ [str, count]
150
+ end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
151
+ found = guessed.first
152
+ return found ? found[0] : nil
153
+ end
154
+
155
+ def guess_null_string(sample_lines, delim)
156
+ guessed = NULL_STRING_CANDIDATES.map do |str|
157
+ regexp = /(?:^|#{Regexp.quote(delim)})#{Regexp.quote(str)}(?:$|#{Regexp.quote(delim)})/
158
+ counts = sample_lines.map {|line| line.scan(regexp).count }
159
+ count = counts.inject(0) {|r,c| r + c }
160
+ [str, count]
161
+ end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
162
+ found = guessed.first
163
+ return found ? found[0] : nil
164
+ end
165
+
122
166
  def guess_field_types(field_lines)
123
167
  column_lines = []
124
168
  field_lines.each do |fields|
@@ -231,7 +231,7 @@ module Embulk::Guess
231
231
  end
232
232
 
233
233
  if zm = /^#{TZ}$/.match(rest)
234
- delimiters << zm["zone_space"] || ''
234
+ delimiters << (zm["zone_space"] || '')
235
235
  if zm["z"]
236
236
  # TODO ISO 8601
237
237
  parts << :zone_off
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = "0.4.2"
2
+ VERSION = "0.4.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-16 00:00:00.000000000 Z
11
+ date: 2015-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -161,7 +161,6 @@ files:
161
161
  - embulk-core/src/main/java/org/embulk/plugin/PluginSource.java
162
162
  - embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java
163
163
  - embulk-core/src/main/java/org/embulk/plugin/PluginType.java
164
- - embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java
165
164
  - embulk-core/src/main/java/org/embulk/spi/Buffer.java
166
165
  - embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java
167
166
  - embulk-core/src/main/java/org/embulk/spi/Column.java
@@ -268,6 +267,7 @@ files:
268
267
  - embulk-docs/src/release/release-0.4.0.rst
269
268
  - embulk-docs/src/release/release-0.4.1.rst
270
269
  - embulk-docs/src/release/release-0.4.2.rst
270
+ - embulk-docs/src/release/release-0.4.3.rst
271
271
  - embulk-standards/build.gradle
272
272
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
273
273
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -362,8 +362,8 @@ files:
362
362
  - classpath/bval-jsr303-0.5.jar
363
363
  - classpath/commons-beanutils-core-1.8.3.jar
364
364
  - classpath/commons-lang3-3.1.jar
365
- - classpath/embulk-core-0.4.2.jar
366
- - classpath/embulk-standards-0.4.2.jar
365
+ - classpath/embulk-core-0.4.3.jar
366
+ - classpath/embulk-standards-0.4.3.jar
367
367
  - classpath/guava-18.0.jar
368
368
  - classpath/guice-3.0.jar
369
369
  - classpath/guice-multibindings-3.0.jar
@@ -1,19 +0,0 @@
1
- package org.embulk.plugin;
2
-
3
- public class SetThreadContextClassLoader
4
- implements AutoCloseable
5
- {
6
- private final ClassLoader original;
7
-
8
- public SetThreadContextClassLoader(ClassLoader classLoader)
9
- {
10
- this.original = Thread.currentThread().getContextClassLoader();
11
- Thread.currentThread().setContextClassLoader(classLoader);
12
- }
13
-
14
- @Override
15
- public void close()
16
- {
17
- Thread.currentThread().setContextClassLoader(original);
18
- }
19
- }