embulk 0.7.11-java → 0.8.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/README.md +1 -1
  4. data/build.gradle +2 -2
  5. data/embulk-core/build.gradle +2 -0
  6. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
  7. data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
  9. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
  10. data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
  13. data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
  15. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
  16. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
  25. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
  27. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
  28. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
  29. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
  30. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
  32. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
  33. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
  35. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
  36. data/embulk-docs/src/built-in.rst +40 -3
  37. data/embulk-docs/src/conf.py +2 -2
  38. data/embulk-docs/src/release.rst +1 -1
  39. data/embulk-docs/src/release/release-0.8.0.rst +68 -0
  40. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
  41. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
  42. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
  43. data/embulk.gemspec +1 -1
  44. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  45. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  46. data/gradlew +3 -7
  47. data/lib/embulk/column.rb +2 -0
  48. data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
  49. data/lib/embulk/command/embulk_new_plugin.rb +2 -0
  50. data/lib/embulk/command/embulk_run.rb +17 -10
  51. data/lib/embulk/data/bundle/.ruby-version +1 -1
  52. data/lib/embulk/data/new/java/build.gradle.erb +21 -0
  53. data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
  54. data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
  55. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  56. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  57. data/lib/embulk/data/new/java/gradlew +3 -7
  58. data/lib/embulk/data/new/ruby/.ruby-version +1 -1
  59. data/lib/embulk/guess/csv.rb +1 -1
  60. data/lib/embulk/guess/schema_guess.rb +6 -0
  61. data/lib/embulk/guess_plugin.rb +1 -1
  62. data/lib/embulk/java/imports.rb +4 -0
  63. data/lib/embulk/plugin_registry.rb +8 -12
  64. data/lib/embulk/schema.rb +6 -0
  65. data/lib/embulk/version.rb +1 -1
  66. data/test/guess/test_csv_guess.rb +170 -0
  67. data/test/helper.rb +2 -0
  68. metadata +17 -15
  69. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
  70. data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
  71. data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
  72. data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
  73. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
  74. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
  75. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
  76. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
  77. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
  78. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
  79. data/embulk-docs/src/release/release-0.7.11.rst +0 -13
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Wed Jan 13 12:41:02 JST 2016
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
@@ -42,11 +42,6 @@ case "`uname`" in
42
42
  ;;
43
43
  esac
44
44
 
45
- # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
- if $cygwin ; then
47
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
- fi
49
-
50
45
  # Attempt to set APP_HOME
51
46
  # Resolve links: $0 may be a link
52
47
  PRG="$0"
@@ -61,9 +56,9 @@ while [ -h "$PRG" ] ; do
61
56
  fi
62
57
  done
63
58
  SAVED="`pwd`"
64
- cd "`dirname \"$PRG\"`/" >&-
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
65
60
  APP_HOME="`pwd -P`"
66
- cd "$SAVED" >&-
61
+ cd "$SAVED" >/dev/null
67
62
 
68
63
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
64
 
@@ -114,6 +109,7 @@ fi
114
109
  if $cygwin ; then
115
110
  APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
111
  CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
113
 
118
114
  # We build the pattern for arguments to be converted via cygpath
119
115
  ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
@@ -1 +1 @@
1
- jruby-9.0.0.0
1
+ jruby-9.0.4.0
@@ -72,8 +72,8 @@ module Embulk
72
72
  else
73
73
  # disable escaping (set null)
74
74
  end
75
- parser_guessed["escape"] = escape
76
75
  end
76
+ parser_guessed["escape"] = escape
77
77
  else
78
78
  # escape does nothing if quote is disabled
79
79
  end
@@ -68,6 +68,12 @@ module Embulk::Guess
68
68
  return nil
69
69
  end
70
70
 
71
+ begin
72
+ JSON.parse(str)
73
+ return "json"
74
+ rescue
75
+ end
76
+
71
77
  return "string"
72
78
  end
73
79
 
@@ -27,7 +27,7 @@ module Embulk
27
27
  end
28
28
 
29
29
  def self.from_java(java_class)
30
- JavaPlugin.ruby_adapter(java_class, GuessPlugin, RubyAdapter)
30
+ JavaPlugin.ruby_adapter_class(java_class, GuessPlugin, RubyAdapter)
31
31
  end
32
32
 
33
33
  module RubyAdapter
@@ -22,6 +22,10 @@ module Embulk::Java
22
22
  include_package 'org.embulk.time'
23
23
  end
24
24
 
25
+ module Json
26
+ include_package 'org.embulk.json'
27
+ end
28
+
25
29
  module Type
26
30
  include_package 'org.embulk.type'
27
31
  end
@@ -27,9 +27,9 @@ module Embulk
27
27
  if value = @map[type]
28
28
  return value
29
29
  end
30
- raise PluginLoadError, "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is installed but it does not correctly register plugin."
30
+ raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is installed but it does not correctly register plugin."
31
31
  else
32
- raise PluginLoadError, "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is not installed. Run 'embulk gem search -rd embulk-#{@category}' command to find plugins."
32
+ raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is not installed. Run 'embulk gem search -rd embulk-#{@category}' command to find plugins."
33
33
  end
34
34
  end
35
35
 
@@ -58,10 +58,8 @@ module Embulk
58
58
 
59
59
  # search gems
60
60
  if defined?(::Gem::Specification) && ::Gem::Specification.respond_to?(:find_all)
61
- specs = Kernel::RUBYGEMS_ACTIVATION_MONITOR.synchronize do # this lock is added as a workaround of https://github.com/jruby/jruby/issues/3652
62
- Gem::Specification.find_all do |spec|
63
- spec.contains_requirable_file? name
64
- end
61
+ specs = Gem::Specification.find_all do |spec|
62
+ spec.contains_requirable_file? name
65
63
  end
66
64
 
67
65
  # prefer newer version
@@ -80,12 +78,10 @@ module Embulk
80
78
  def require_and_show(path, spec=nil)
81
79
  require path
82
80
  unless spec
83
- name, spec = Kernel::RUBYGEMS_ACTIVATION_MONITOR.synchronize do # this lock is added as a workaround of https://github.com/jruby/jruby/issues/3652
84
- Gem.loaded_specs.find {|name,spec|
85
- #spec.files.include?(path)
86
- spec.contains_requirable_file?(path)
87
- }
88
- end
81
+ name, spec = Gem.loaded_specs.find {|name,spec|
82
+ #spec.files.include?(path)
83
+ spec.contains_requirable_file?(path)
84
+ }
89
85
  end
90
86
  if spec
91
87
  unless @loaded_gems[spec.name]
@@ -19,6 +19,7 @@ module Embulk
19
19
  each do |column|
20
20
  idx = column.index
21
21
  column_script =
22
+ "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
22
23
  "if reader.isNull(#{idx})\n" <<
23
24
  "record << nil\n" <<
24
25
  "else\n" <<
@@ -33,6 +34,8 @@ module Embulk
33
34
  "record << reader.getString(#{idx})"
34
35
  when :timestamp
35
36
  "record << reader.getTimestamp(#{idx}).getRubyTime(JRuby.runtime)"
37
+ when :json
38
+ "record << MessagePack.unpack(value_api.toMessagePack(JRuby.runtime, reader.getJson(#{idx})))"
36
39
  else
37
40
  raise "Unknown type #{column.type.inspect}"
38
41
  end <<
@@ -45,6 +48,7 @@ module Embulk
45
48
 
46
49
  record_writer_script = "lambda do |builder,record|\n"
47
50
  record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
51
+ record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
48
52
  each do |column|
49
53
  idx = column.index
50
54
  column_script =
@@ -62,6 +66,8 @@ module Embulk
62
66
  "builder.setString(#{idx}, record[#{idx}])"
63
67
  when :timestamp
64
68
  "builder.setTimestamp(#{idx}, java_timestamp_class.fromRubyTime(record[#{idx}]))"
69
+ when :json
70
+ "builder.setJson(#{idx}, value_api.fromMessagePack(MessagePack.pack(record[#{idx}])))"
65
71
  else
66
72
  raise "Unknown type #{column.type.inspect}"
67
73
  end <<
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.7.11'
2
+ VERSION = '0.8.0'
3
3
  end
@@ -0,0 +1,170 @@
1
+ require 'helper'
2
+ require 'time'
3
+ require 'embulk/guess/csv'
4
+
5
+ class CsvGuessTest < ::Test::Unit::TestCase
6
+ class TestDelimiter < self
7
+ data(
8
+ "\t" => "\t",
9
+ "," => ",",
10
+ "|" => "|",
11
+ )
12
+ def test_delimiter_detection(delim)
13
+ actual = guess([
14
+ ["1", "foo"].join(delim),
15
+ ["2", "bar"].join(delim),
16
+ ])
17
+ assert_equal delim, actual["parser"]["delimiter"]
18
+ end
19
+ end
20
+
21
+ class TestQuote < self
22
+ data(
23
+ "'" => "'",
24
+ '"' => '"',
25
+ nil => nil,
26
+ )
27
+ def test_quote(quotation)
28
+ actual = guess([
29
+ %w(1 foo).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
30
+ %w(2 bar).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
31
+ ])
32
+ assert_equal quotation, actual["parser"]["quote"]
33
+ end
34
+ end
35
+
36
+ class TestEscape < self
37
+ data(
38
+ "\\" => "\\",
39
+ '"' => '"',
40
+ )
41
+ def test_escape(char)
42
+ actual = guess([
43
+ %Q('1'\t'F#{char}'OO'),
44
+ %Q('2'\t'FOOOOOOOO#{char}'OO'),
45
+ ])
46
+ assert_equal char, actual["parser"]["escape"]
47
+ end
48
+ end
49
+
50
+ class TestSkipHeaderLines < self
51
+ def test_skip_header_lines_one
52
+ actual = guess([
53
+ "col1\tcol2",
54
+ "1\tfoo",
55
+ "2\tbar",
56
+ ])
57
+ assert_equal 1, actual["parser"]["skip_header_lines"]
58
+ end
59
+
60
+ def test_skip_header_lines_three
61
+ actual = guess([
62
+ "this is a CSV",
63
+ "created for a test",
64
+ "col1\tcol2",
65
+ "1\tfoo",
66
+ "2\tbar",
67
+ ])
68
+ assert_equal 3, actual["parser"]["skip_header_lines"]
69
+ end
70
+ end
71
+
72
+ class TestNullString < self
73
+ data(
74
+ "\\N" => "\\N",
75
+ "null" => "null",
76
+ "NULL" => "NULL",
77
+ "#N/A" => "#N/A",
78
+ nil => nil,
79
+ )
80
+ def test_null_string(null)
81
+ actual = guess([
82
+ "1\tfoo\t#{null}",
83
+ "2\tbar\t#{null}",
84
+ ])
85
+ assert_equal null, actual["parser"]["null_string"]
86
+ end
87
+ end
88
+
89
+ class TestTrim < self
90
+ def test_trim_flag_when_will_be_long_if_strip_arround_space
91
+ actual = guess([
92
+ " 1 \tfoo",
93
+ " 2 \tfoo",
94
+ " 3 \tfoo",
95
+ ])
96
+ assert_equal true, actual["parser"]["trim_if_not_quoted"]
97
+ end
98
+ end
99
+
100
+ class TestCommentLineMarker < self
101
+ data(
102
+ "#" => "#",
103
+ "//" => "//",
104
+ )
105
+ def test_comment_line_marker(marker)
106
+ actual = guess([
107
+ "foo\t 1\tother",
108
+ "#{marker} foo\t 2\tother",
109
+ "foo\t 3\tother",
110
+ ])
111
+ assert_equal marker, actual["parser"]["comment_line_marker"]
112
+ end
113
+ end
114
+
115
+ class TestColumns < self
116
+ def test_columns_without_header
117
+ actual = guess([
118
+ "1\tfoo\t2000-01-01T00:00:00+0900",
119
+ "2\tbar\t2000-01-01T00:00:00+0900",
120
+ ])
121
+ expected = [
122
+ {"name" => "c0", "type" => "long"},
123
+ {"name" => "c1", "type" => "string"},
124
+ {"name" => "c2", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
125
+ ]
126
+ assert_equal expected, actual["parser"]["columns"]
127
+ end
128
+
129
+ def test_columns_with_header
130
+ actual = guess([
131
+ "num\tstr\ttime",
132
+ "1\tfoo\t2000-01-01T00:00:00+0900",
133
+ "2\tbar\t2000-01-01T00:00:00+0900",
134
+ ])
135
+ expected = [
136
+ {"name" => "num", "type" => "long"},
137
+ {"name" => "str", "type" => "string"},
138
+ {"name" => "time", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
139
+ ]
140
+ assert_equal expected, actual["parser"]["columns"]
141
+ end
142
+
143
+ def test_complex_line
144
+ actual = guess([
145
+ %Q(this is useless header),
146
+ %Q(and more),
147
+ %Q(num,str,quoted_num,time),
148
+ %Q(1, "value with space "" and quote in it", "123",21150312000000Z),
149
+ %Q(2),
150
+ %Q(# 3, "this is commented out" ,"1",21150312000000Z),
151
+ ])
152
+ expected = [
153
+ {"name" => "num", "type" => "long"},
154
+ {"name" => "str", "type" => "string"},
155
+ {"name" => "quoted_num", "type" => "long"},
156
+ {"name" => "time", "type" => "timestamp", "format"=>"%Y%m%d%H%M%S%z"},
157
+ ]
158
+ assert_equal expected, actual["parser"]["columns"]
159
+ end
160
+ end
161
+
162
+ def guess(texts)
163
+ conf = Embulk::DataSource.new({
164
+ parser: {
165
+ type: "csv"
166
+ }
167
+ })
168
+ Embulk::Guess::CsvGuessPlugin.new.guess_lines(conf, Array(texts))
169
+ end
170
+ end