embulk-guess-csv_verify 0.10.29-java → 0.10.30-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 560fef03cacdda19b9365be2a7352d5a00c47b415fb89cd1f7ae6664d0603091
4
- data.tar.gz: 79ab9de8f15c812cd3093207329674370d37563bdc05360f2652072a15afe12c
3
+ metadata.gz: c980881f2e59cd43b69eebe56691108f9a9ff2436bd9d2446b2b22330bf8f88c
4
+ data.tar.gz: da1cf6e385c0b77d8b3d1aca9076e5ab5f2a44e81ceb357207837bcfd255de0f
5
5
  SHA512:
6
- metadata.gz: ced3fb4071fa98eb20d4411272184217e9fc28e2c19e29192830d9fbcc158b67cdb87cfa7191c3fdf78794017009feef25982f60ae34bbae7cee08dc9257eafb
7
- data.tar.gz: 357413c30817f29b0d2bdd1e22f093387374d4b1affff93e4ff88463f3468dfcfba4d1115da4f0f8daf48eb7f3a511ac012b0173af16a867fb1f2c63d9272bb6
6
+ metadata.gz: a3c223339d066fd2a4241232cdd6362e0836d051eb7a209353bbb4879c800fcc561ede18005b4c257e062119271e000f36cf3972afbce164f37fb7b6e51fad86
7
+ data.tar.gz: 61a444758472772ff192f932dabe6ed9fea417c413ec03203782931a0efe0d44f270686cb546870ec7bdc6a732edd89df27d97605e23a2d81838c08eb4e95511
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -25,13 +25,13 @@ module Embulk
25
25
  CONFIG_MAPPER_FACTORY_CLASS = CLASSLOADER.loadClass("org.embulk.util.config.ConfigMapperFactory").ruby_class
26
26
  TYPE_MODULE_CLASS = CLASSLOADER.loadClass("org.embulk.util.config.modules.TypeModule").ruby_class
27
27
  CONFIG_MAPPER_FACTORY = CONFIG_MAPPER_FACTORY_CLASS.builder.addDefaultModules.addModule(TYPE_MODULE_CLASS.new).build
28
- PLUGIN_TASK_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvParserPlugin$PluginTask")
28
+ LEGACY_PLUGIN_TASK_CLASS = CLASSLOADER.loadClass("org.embulk.standards.CsvParserPlugin$PluginTask")
29
29
  LIST_FILE_INPUT_CLASS = CLASSLOADER.loadClass("org.embulk.util.file.ListFileInput").ruby_class
30
30
  LINE_DECODER_CLASS = CLASSLOADER.loadClass("org.embulk.util.text.LineDecoder").ruby_class
31
31
  CSV_GUESS_PLUGIN_CLASS = CLASSLOADER.loadClass("org.embulk.guess.csv.CsvGuessPlugin").ruby_class
32
- CSV_TOKENIZER_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer").ruby_class
33
- TOO_FEW_COLUMNS_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$TooFewColumnsException").ruby_class
34
- INVALID_VALUE_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$InvalidValueException").ruby_class
32
+ LEGACY_CSV_TOKENIZER_CLASS = CLASSLOADER.loadClass("org.embulk.standards.CsvTokenizer").ruby_class
33
+ LEGACY_TOO_FEW_COLUMNS_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$TooFewColumnsException").ruby_class
34
+ LEGACY_INVALID_VALUE_EXCEPTION_CLASS = CLASSLOADER.loadClass("org.embulk.parser.csv.CsvTokenizer$InvalidValueException").ruby_class
35
35
 
36
36
  DELIMITER_CANDIDATES = [
37
37
  ",", "\t", "|", ";"
@@ -71,7 +71,9 @@ module Embulk
71
71
  end
72
72
  guessed_ruby_converted = config_to_java(guessed_ruby)
73
73
  if !guessed_java.equals(guessed_ruby_converted)
74
- raise_and_log_guess_diff(guessed_ruby, guessed_java)
74
+ log_guess_diff(guessed_ruby, guessed_java, "decoders")
75
+ log_guess_diff(guessed_ruby, guessed_java, "parser")
76
+ raise "embulk-guess-csv has difference between Java/Ruby."
75
77
  end
76
78
  rescue Exception => e
77
79
  # Any error from the Java-based guess plugin should pass-through just with logging.
@@ -237,34 +239,23 @@ module Embulk
237
239
 
238
240
  private
239
241
 
240
- def raise_and_log_guess_diff(guessed_ruby_entire, guessed_java_entire)
241
- guessed_ruby = guessed_ruby_entire["parser"] || {}
242
- guessed_java = guessed_java_entire.getNestedOrGetEmpty("parser")
243
-
244
- require 'set'
245
- keys = Set.new(guessed_ruby.keys) + Set.new(guessed_java.getAttributeNames)
242
+ def log_guess_diff(guessed_ruby_entire, guessed_java_entire, key)
243
+ guessed_ruby = guessed_ruby_entire[key] || {}
244
+ guessed_java = guessed_java_entire.getNestedOrGetEmpty(key)
246
245
 
247
246
  begin
248
247
  require 'json'
249
248
  rescue LoadError
250
- Embulk.logger.warn "The 'json' gem is not installed. No details compared."
251
- guessed_java_hash = nil
249
+ raise "The 'json' gem is not installed. No details compared."
252
250
  else
253
251
  guessed_java_hash = JSON.parse(guessed_java.toJson)
254
252
  end
255
253
 
256
- diffs = []
257
- keys.each do |key|
258
- if !guessed_ruby.has_key?(key)
259
- diffs << "Only embulk-guess-csv (Java) has: \"#{key}\""
260
- elsif !guessed_java.has(key.to_java)
261
- diffs << "Only embulk-guess-csv (Ruby) has: \"#{key}\""
262
- elsif guessed_java_hash && guessed_ruby[key] != guessed_java_hash[key]
263
- diffs << "embulk-guess-csv has difference between Java/Ruby: \"#{key}\""
264
- end
254
+ if guessed_java_hash && guessed_ruby != guessed_java_hash
255
+ Embulk.logger.error "[Embulk CSV guess verify] '#{key}' has difference."
256
+ Embulk.logger.error "[Embulk CSV guess verify] Java => #{guessed_java_hash.to_json}"
257
+ Embulk.logger.error "[Embulk CSV guess verify] Ruby => #{guessed_ruby.to_json}"
265
258
  end
266
-
267
- raise "embulk-guess-csv has difference between Java/Ruby: #{diffs.inspect}"
268
259
  end
269
260
 
270
261
  def config_to_java(config_ruby)
@@ -289,12 +280,11 @@ module Embulk
289
280
  def split_lines(parser_config, skip_empty_lines, sample_lines, delim, extra_config)
290
281
  null_string = parser_config["null_string"]
291
282
  config = parser_config.merge(extra_config).merge({"charset" => "UTF-8", "columns" => []})
292
- parser_task = CONFIG_MAPPER_FACTORY.createConfigMapper.map(config_to_java(parser_config), PLUGIN_TASK_CLASS)
283
+ parser_task = config.load_config(LEGACY_PLUGIN_TASK_CLASS)
293
284
  data = sample_lines.map {|line| line.force_encoding('UTF-8') }.join(parser_task.getNewline.getString.encode('UTF-8'))
294
285
  sample = Buffer.from_ruby_string(data)
295
- decoder = LINE_DECODER_CLASS.of(
296
- LIST_FILE_INPUT_CLASS.new([[sample.to_java]]), parser_task.getCharset, parser_task.getLineDelimiterRecognized.orElse(nil))
297
- tokenizer = CSV_TOKENIZER_CLASS.new(decoder, parser_task)
286
+ decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
287
+ tokenizer = LEGACY_CSV_TOKENIZER_CLASS.new(decoder, parser_task)
298
288
  rows = []
299
289
  while tokenizer.nextFile
300
290
  while tokenizer.nextRecord(skip_empty_lines)
@@ -308,12 +298,12 @@ module Embulk
308
298
  column = nil
309
299
  end
310
300
  columns << column
311
- rescue TOO_FEW_COLUMNS_EXCEPTION_CLASS
301
+ rescue LEGACY_TOO_FEW_COLUMNS_EXCEPTION_CLASS
312
302
  rows << columns
313
303
  break
314
304
  end
315
305
  end
316
- rescue INVALID_VALUE_EXCEPTION_CLASS
306
+ rescue LEGACY_INVALID_VALUE_EXCEPTION_CLASS
317
307
  # TODO warning
318
308
  tokenizer.skipCurrentLine
319
309
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-guess-csv_verify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.29
4
+ version: 0.10.30
5
5
  platform: java
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -10,19 +10,29 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-04-07 00:00:00.000000000 Z
13
+ date: 2021-04-15 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: Verification-purpose Embulk CSV guess plugin to compare the old Ruby-based
16
- one and the new Java-based one (not for your production use)
16
+ one and the new Java-based one (not for your production use; note that 'decoders'
17
+ and 'parser' sections in your configuration can be logged even if they contain confidential
18
+ information)
17
19
  email:
18
20
  - dmikurube@treasure-data.com
19
21
  executables: []
20
22
  extensions: []
21
23
  extra_rdoc_files: []
22
24
  files:
23
- - classpath/embulk-guess-csv-0.10.29.jar
24
- - classpath/embulk-guess-csv_verify-0.10.29.jar
25
- - classpath/embulk-parser-csv-0.10.29.jar
25
+ - classpath/aopalliance-1.0.jar
26
+ - classpath/bval-core-0.5.jar
27
+ - classpath/bval-jsr303-0.5.jar
28
+ - classpath/commons-beanutils-core-1.8.3.jar
29
+ - classpath/commons-lang3-3.4.jar
30
+ - classpath/embulk-api-0.10.30.jar
31
+ - classpath/embulk-core-0.10.30.jar
32
+ - classpath/embulk-guess-csv-0.10.30.jar
33
+ - classpath/embulk-guess-csv_verify-0.10.30.jar
34
+ - classpath/embulk-parser-csv-0.10.30.jar
35
+ - classpath/embulk-spi-0.10.30.jar
26
36
  - classpath/embulk-util-config-0.2.1.jar
27
37
  - classpath/embulk-util-file-0.1.3.jar
28
38
  - classpath/embulk-util-guess-0.1.1.jar
@@ -30,11 +40,19 @@ files:
30
40
  - classpath/embulk-util-rubytime-0.3.2.jar
31
41
  - classpath/embulk-util-text-0.1.0.jar
32
42
  - classpath/embulk-util-timestamp-0.2.1.jar
43
+ - classpath/guava-18.0.jar
44
+ - classpath/guice-4.0.jar
45
+ - classpath/guice-multibindings-4.0.jar
33
46
  - classpath/icu4j-54.1.1.jar
34
47
  - classpath/jackson-annotations-2.6.7.jar
35
48
  - classpath/jackson-core-2.6.7.jar
36
49
  - classpath/jackson-databind-2.6.7.jar
50
+ - classpath/jackson-datatype-guava-2.6.7.jar
37
51
  - classpath/jackson-datatype-jdk8-2.6.7.jar
52
+ - classpath/jackson-module-guice-2.6.7.jar
53
+ - classpath/javax.inject-1.jar
54
+ - classpath/msgpack-core-0.8.11.jar
55
+ - classpath/slf4j-api-1.7.30.jar
38
56
  - classpath/validation-api-1.1.0.Final.jar
39
57
  - lib/embulk/guess/csv_verify.rb
40
58
  homepage: https://github.com/embulk/embulk
Binary file