embulk-guess-csv_verify 0.10.29-java → 0.10.30-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 560fef03cacdda19b9365be2a7352d5a00c47b415fb89cd1f7ae6664d0603091
4
- data.tar.gz: 79ab9de8f15c812cd3093207329674370d37563bdc05360f2652072a15afe12c
3
+ metadata.gz: c980881f2e59cd43b69eebe56691108f9a9ff2436bd9d2446b2b22330bf8f88c
4
+ data.tar.gz: da1cf6e385c0b77d8b3d1aca9076e5ab5f2a44e81ceb357207837bcfd255de0f
5
5
  SHA512:
6
- metadata.gz: ced3fb4071fa98eb20d4411272184217e9fc28e2c19e29192830d9fbcc158b67cdb87cfa7191c3fdf78794017009feef25982f60ae34bbae7cee08dc9257eafb
7
- data.tar.gz: 357413c30817f29b0d2bdd1e22f093387374d4b1affff93e4ff88463f3468dfcfba4d1115da4f0f8daf48eb7f3a511ac012b0173af16a867fb1f2c63d9272bb6
6
+ metadata.gz: a3c223339d066fd2a4241232cdd6362e0836d051eb7a209353bbb4879c800fcc561ede18005b4c257e062119271e000f36cf3972afbce164f37fb7b6e51fad86
7
+ data.tar.gz: 61a444758472772ff192f932dabe6ed9fea417c413ec03203782931a0efe0d44f270686cb546870ec7bdc6a732edd89df27d97605e23a2d81838c08eb4e95511
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -25,13 +25,13 @@ module Embulk
25
25
  CONFIG_MAPPER_FACTORY_CLASS = CLASSLOADER.loadClass("org.embulk.util.config.ConfigMapperFactory").ruby_class
26
26
  TYPE_MODULE_CLASS = CLASSLOADER.loadClass("org.embulk.util.config.modules.TypeModule").ruby_class
27
27
  CONFIG_MAPPER_FACTORY = CONFIG_MAPPER_FACTORY_CLASS.builder.addDefaultModules.addModule(TYPE_MODULE_CLASS.new).build
28
- PLUGIN_TASK_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvParserPlugin$PluginTask")
28
+ LEGACY_PLUGIN_TASK_CLASS = CLASSLOADER.loadClass("org.embulk.standards.CsvParserPlugin$PluginTask")
29
29
  LIST_FILE_INPUT_CLASS = CLASSLOADER.loadClass("org.embulk.util.file.ListFileInput").ruby_class
30
30
  LINE_DECODER_CLASS = CLASSLOADER.loadClass("org.embulk.util.text.LineDecoder").ruby_class
31
31
  CSV_GUESS_PLUGIN_CLASS = CLASSLOADER.loadClass("org.embulk.guess.csv.CsvGuessPlugin").ruby_class
32
- CSV_TOKENIZER_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer").ruby_class
33
- TOO_FEW_COLUMNS_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$TooFewColumnsException").ruby_class
34
- INVALID_VALUE_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$InvalidValueException").ruby_class
32
+ LEGACY_CSV_TOKENIZER_CLASS = CLASSLOADER.loadClass("org.embulk.standards.CsvTokenizer").ruby_class
33
+ LEGACY_TOO_FEW_COLUMNS_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$TooFewColumnsException").ruby_class
34
+ LEGACY_INVALID_VALUE_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$InvalidValueException").ruby_class
35
35
 
36
36
  DELIMITER_CANDIDATES = [
37
37
  ",", "\t", "|", ";"
@@ -71,7 +71,9 @@ module Embulk
71
71
  end
72
72
  guessed_ruby_converted = config_to_java(guessed_ruby)
73
73
  if !guessed_java.equals(guessed_ruby_converted)
74
- raise_and_log_guess_diff(guessed_ruby, guessed_java)
74
+ log_guess_diff(guessed_ruby, guessed_java, "decoders")
75
+ log_guess_diff(guessed_ruby, guessed_java, "parser")
76
+ raise "embulk-guess-csv has difference between Java/Ruby."
75
77
  end
76
78
  rescue Exception => e
77
79
  # Any error from the Java-based guess plugin should pass-through just with logging.
@@ -237,34 +239,23 @@ module Embulk
237
239
 
238
240
  private
239
241
 
240
- def raise_and_log_guess_diff(guessed_ruby_entire, guessed_java_entire)
241
- guessed_ruby = guessed_ruby_entire["parser"] || {}
242
- guessed_java = guessed_java_entire.getNestedOrGetEmpty("parser")
243
-
244
- require 'set'
245
- keys = Set.new(guessed_ruby.keys) + Set.new(guessed_java.getAttributeNames)
242
+ def log_guess_diff(guessed_ruby_entire, guessed_java_entire, key)
243
+ guessed_ruby = guessed_ruby_entire[key] || {}
244
+ guessed_java = guessed_java_entire.getNestedOrGetEmpty(key)
246
245
 
247
246
  begin
248
247
  require 'json'
249
248
  rescue LoadError
250
- Embulk.logger.warn "The 'json' gem is not installed. No details compared."
251
- guessed_java_hash = nil
249
+ raise "The 'json' gem is not installed. No details compared."
252
250
  else
253
251
  guessed_java_hash = JSON.parse(guessed_java.toJson)
254
252
  end
255
253
 
256
- diffs = []
257
- keys.each do |key|
258
- if !guessed_ruby.has_key?(key)
259
- diffs << "Only embulk-guess-csv (Java) has: \"#{key}\""
260
- elsif !guessed_java.has(key.to_java)
261
- diffs << "Only embulk-guess-csv (Ruby) has: \"#{key}\""
262
- elsif guessed_java_hash && guessed_ruby[key] != guessed_java_hash[key]
263
- diffs << "embulk-guess-csv has difference between Java/Ruby: \"#{key}\""
264
- end
254
+ if guessed_java_hash && guessed_ruby != guessed_java_hash
255
+ Embulk.logger.error "[Embulk CSV guess verify] '#{key}' has difference."
256
+ Embulk.logger.error "[Embulk CSV guess verify] Java => #{guessed_java_hash.to_json}"
257
+ Embulk.logger.error "[Embulk CSV guess verify] Ruby => #{guessed_ruby.to_json}"
265
258
  end
266
-
267
- raise "embulk-guess-csv has difference between Java/Ruby: #{diffs.inspect}"
268
259
  end
269
260
 
270
261
  def config_to_java(config_ruby)
@@ -289,12 +280,11 @@ module Embulk
289
280
  def split_lines(parser_config, skip_empty_lines, sample_lines, delim, extra_config)
290
281
  null_string = parser_config["null_string"]
291
282
  config = parser_config.merge(extra_config).merge({"charset" => "UTF-8", "columns" => []})
292
- parser_task = CONFIG_MAPPER_FACTORY.createConfigMapper.map(config_to_java(parser_config), PLUGIN_TASK_CLASS)
283
+ parser_task = config.load_config(LEGACY_PLUGIN_TASK_CLASS)
293
284
  data = sample_lines.map {|line| line.force_encoding('UTF-8') }.join(parser_task.getNewline.getString.encode('UTF-8'))
294
285
  sample = Buffer.from_ruby_string(data)
295
- decoder = LINE_DECODER_CLASS.of(
296
- LIST_FILE_INPUT_CLASS.new([[sample.to_java]]), parser_task.getCharset, parser_task.getLineDelimiterRecognized.orElse(nil))
297
- tokenizer = CSV_TOKENIZER_CLASS.new(decoder, parser_task)
286
+ decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
287
+ tokenizer = LEGACY_CSV_TOKENIZER_CLASS.new(decoder, parser_task)
298
288
  rows = []
299
289
  while tokenizer.nextFile
300
290
  while tokenizer.nextRecord(skip_empty_lines)
@@ -308,12 +298,12 @@ module Embulk
308
298
  column = nil
309
299
  end
310
300
  columns << column
311
- rescue TOO_FEW_COLUMNS_EXCEPTION_CLASS
301
+ rescue LEGACY_TOO_FEW_COLUMNS_EXCEPTION_CLASS
312
302
  rows << columns
313
303
  break
314
304
  end
315
305
  end
316
- rescue INVALID_VALUE_EXCEPTION_CLASS
306
+ rescue LEGACY_INVALID_VALUE_EXCEPTION_CLASS
317
307
  # TODO warning
318
308
  tokenizer.skipCurrentLine
319
309
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-guess-csv_verify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.29
4
+ version: 0.10.30
5
5
  platform: java
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -10,19 +10,29 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-04-07 00:00:00.000000000 Z
13
+ date: 2021-04-15 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: Verification-purpose Embulk CSV guess plugin to compare the old Ruby-based
16
- one and the new Java-based one (not for your production use)
16
+ one and the new Java-based one (not for your production use; note that 'decoders'
17
+ and 'parser' sections in your configuration can be logged even if they contain confidential
18
+ information)
17
19
  email:
18
20
  - dmikurube@treasure-data.com
19
21
  executables: []
20
22
  extensions: []
21
23
  extra_rdoc_files: []
22
24
  files:
23
- - classpath/embulk-guess-csv-0.10.29.jar
24
- - classpath/embulk-guess-csv_verify-0.10.29.jar
25
- - classpath/embulk-parser-csv-0.10.29.jar
25
+ - classpath/aopalliance-1.0.jar
26
+ - classpath/bval-core-0.5.jar
27
+ - classpath/bval-jsr303-0.5.jar
28
+ - classpath/commons-beanutils-core-1.8.3.jar
29
+ - classpath/commons-lang3-3.4.jar
30
+ - classpath/embulk-api-0.10.30.jar
31
+ - classpath/embulk-core-0.10.30.jar
32
+ - classpath/embulk-guess-csv-0.10.30.jar
33
+ - classpath/embulk-guess-csv_verify-0.10.30.jar
34
+ - classpath/embulk-parser-csv-0.10.30.jar
35
+ - classpath/embulk-spi-0.10.30.jar
26
36
  - classpath/embulk-util-config-0.2.1.jar
27
37
  - classpath/embulk-util-file-0.1.3.jar
28
38
  - classpath/embulk-util-guess-0.1.1.jar
@@ -30,11 +40,19 @@ files:
30
40
  - classpath/embulk-util-rubytime-0.3.2.jar
31
41
  - classpath/embulk-util-text-0.1.0.jar
32
42
  - classpath/embulk-util-timestamp-0.2.1.jar
43
+ - classpath/guava-18.0.jar
44
+ - classpath/guice-4.0.jar
45
+ - classpath/guice-multibindings-4.0.jar
33
46
  - classpath/icu4j-54.1.1.jar
34
47
  - classpath/jackson-annotations-2.6.7.jar
35
48
  - classpath/jackson-core-2.6.7.jar
36
49
  - classpath/jackson-databind-2.6.7.jar
50
+ - classpath/jackson-datatype-guava-2.6.7.jar
37
51
  - classpath/jackson-datatype-jdk8-2.6.7.jar
52
+ - classpath/jackson-module-guice-2.6.7.jar
53
+ - classpath/javax.inject-1.jar
54
+ - classpath/msgpack-core-0.8.11.jar
55
+ - classpath/slf4j-api-1.7.30.jar
38
56
  - classpath/validation-api-1.1.0.Final.jar
39
57
  - lib/embulk/guess/csv_verify.rb
40
58
  homepage: https://github.com/embulk/embulk
Binary file