embulk 0.7.11 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/README.md +1 -1
  4. data/build.gradle +2 -2
  5. data/embulk-core/build.gradle +2 -0
  6. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
  7. data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
  9. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
  10. data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
  13. data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
  15. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
  16. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
  25. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
  27. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
  28. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
  29. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
  30. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
  32. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
  33. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
  35. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
  36. data/embulk-docs/src/built-in.rst +40 -3
  37. data/embulk-docs/src/conf.py +2 -2
  38. data/embulk-docs/src/release.rst +1 -1
  39. data/embulk-docs/src/release/release-0.8.0.rst +68 -0
  40. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
  41. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
  42. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
  43. data/embulk.gemspec +1 -1
  44. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  45. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  46. data/gradlew +3 -7
  47. data/lib/embulk/column.rb +2 -0
  48. data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
  49. data/lib/embulk/command/embulk_new_plugin.rb +2 -0
  50. data/lib/embulk/command/embulk_run.rb +17 -10
  51. data/lib/embulk/data/bundle/.ruby-version +1 -1
  52. data/lib/embulk/data/new/java/build.gradle.erb +21 -0
  53. data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
  54. data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
  55. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  56. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  57. data/lib/embulk/data/new/java/gradlew +3 -7
  58. data/lib/embulk/data/new/ruby/.ruby-version +1 -1
  59. data/lib/embulk/guess/csv.rb +1 -1
  60. data/lib/embulk/guess/schema_guess.rb +6 -0
  61. data/lib/embulk/guess_plugin.rb +1 -1
  62. data/lib/embulk/java/imports.rb +4 -0
  63. data/lib/embulk/plugin_registry.rb +8 -12
  64. data/lib/embulk/schema.rb +6 -0
  65. data/lib/embulk/version.rb +1 -1
  66. data/test/guess/test_csv_guess.rb +170 -0
  67. data/test/helper.rb +2 -0
  68. metadata +19 -17
  69. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
  70. data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
  71. data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
  72. data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
  73. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
  74. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
  75. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
  76. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
  77. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
  78. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
  79. data/embulk-docs/src/release/release-0.7.11.rst +0 -13
@@ -0,0 +1,108 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE module PUBLIC
3
+ "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4
+ "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5
+ <!--
6
+ This is a subset of ./checkstyle.xml which allows some loose styles
7
+ -->
8
+ <module name="Checker">
9
+ <module name="FileTabCharacter"/>
10
+ <module name="NewlineAtEndOfFile">
11
+ <property name="lineSeparator" value="lf"/>
12
+ </module>
13
+ <module name="RegexpMultiline">
14
+ <property name="format" value="\r"/>
15
+ <property name="message" value="Line contains carriage return"/>
16
+ </module>
17
+ <module name="RegexpMultiline">
18
+ <property name="format" value=" \n"/>
19
+ <property name="message" value="Line has trailing whitespace"/>
20
+ </module>
21
+ <module name="RegexpMultiline">
22
+ <property name="format" value="\n\n\n"/>
23
+ <property name="message" value="Multiple consecutive blank lines"/>
24
+ </module>
25
+ <module name="RegexpMultiline">
26
+ <property name="format" value="\n\n\Z"/>
27
+ <property name="message" value="Blank line before end of file"/>
28
+ </module>
29
+
30
+ <module name="TreeWalker">
31
+ <module name="EmptyBlock">
32
+ <property name="option" value="text"/>
33
+ <property name="tokens" value="
34
+ LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35
+ LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36
+ </module>
37
+ <module name="EmptyStatement"/>
38
+ <module name="EmptyForInitializerPad"/>
39
+ <module name="EmptyForIteratorPad">
40
+ <property name="option" value="space"/>
41
+ </module>
42
+ <module name="MethodParamPad">
43
+ <property name="allowLineBreaks" value="true"/>
44
+ <property name="option" value="nospace"/>
45
+ </module>
46
+ <module name="ParenPad"/>
47
+ <module name="TypecastParenPad"/>
48
+ <module name="NeedBraces"/>
49
+ <module name="LeftCurly">
50
+ <property name="option" value="nl"/>
51
+ <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52
+ </module>
53
+ <module name="LeftCurly">
54
+ <property name="option" value="eol"/>
55
+ <property name="tokens" value="
56
+ LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57
+ LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58
+ </module>
59
+ <module name="RightCurly">
60
+ <property name="option" value="alone"/>
61
+ </module>
62
+ <module name="GenericWhitespace"/>
63
+ <module name="WhitespaceAfter"/>
64
+ <module name="NoWhitespaceBefore"/>
65
+
66
+ <module name="UpperEll"/>
67
+ <module name="DefaultComesLast"/>
68
+ <module name="ArrayTypeStyle"/>
69
+ <module name="MultipleVariableDeclarations"/>
70
+ <module name="ModifierOrder"/>
71
+ <module name="OneStatementPerLine"/>
72
+ <module name="StringLiteralEquality"/>
73
+ <module name="MutableException"/>
74
+ <module name="EqualsHashCode"/>
75
+ <module name="InnerAssignment"/>
76
+ <module name="InterfaceIsType"/>
77
+ <module name="HideUtilityClassConstructor"/>
78
+
79
+ <module name="MemberName"/>
80
+ <module name="LocalVariableName"/>
81
+ <module name="LocalFinalVariableName"/>
82
+ <module name="TypeName"/>
83
+ <module name="PackageName"/>
84
+ <module name="ParameterName"/>
85
+ <module name="StaticVariableName"/>
86
+ <module name="ClassTypeParameterName">
87
+ <property name="format" value="^[A-Z][0-9]?$"/>
88
+ </module>
89
+ <module name="MethodTypeParameterName">
90
+ <property name="format" value="^[A-Z][0-9]?$"/>
91
+ </module>
92
+
93
+ <module name="WhitespaceAround">
94
+ <property name="allowEmptyConstructors" value="true"/>
95
+ <property name="allowEmptyMethods" value="true"/>
96
+ <property name="ignoreEnhancedForColon" value="false"/>
97
+ <property name="tokens" value="
98
+ ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99
+ BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100
+ LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101
+ LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102
+ LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103
+ LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104
+ PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105
+ STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106
+ </module>
107
+ </module>
108
+ </module>
@@ -1,6 +1,6 @@
1
- #Tue Aug 11 00:26:20 PDT 2015
1
+ #Wed Jan 13 12:41:02 JST 2016
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
@@ -42,11 +42,6 @@ case "`uname`" in
42
42
  ;;
43
43
  esac
44
44
 
45
- # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
- if $cygwin ; then
47
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
- fi
49
-
50
45
  # Attempt to set APP_HOME
51
46
  # Resolve links: $0 may be a link
52
47
  PRG="$0"
@@ -61,9 +56,9 @@ while [ -h "$PRG" ] ; do
61
56
  fi
62
57
  done
63
58
  SAVED="`pwd`"
64
- cd "`dirname \"$PRG\"`/" >&-
59
+ cd "`dirname \"$PRG\"`/" >/dev/null
65
60
  APP_HOME="`pwd -P`"
66
- cd "$SAVED" >&-
61
+ cd "$SAVED" >/dev/null
67
62
 
68
63
  CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
64
 
@@ -114,6 +109,7 @@ fi
114
109
  if $cygwin ; then
115
110
  APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
111
  CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
112
+ JAVACMD=`cygpath --unix "$JAVACMD"`
117
113
 
118
114
  # We build the pattern for arguments to be converted via cygpath
119
115
  ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
@@ -1 +1 @@
1
- jruby-9.0.0.0
1
+ jruby-9.0.4.0
@@ -72,8 +72,8 @@ module Embulk
72
72
  else
73
73
  # disable escaping (set null)
74
74
  end
75
- parser_guessed["escape"] = escape
76
75
  end
76
+ parser_guessed["escape"] = escape
77
77
  else
78
78
  # escape does nothing if quote is disabled
79
79
  end
@@ -68,6 +68,12 @@ module Embulk::Guess
68
68
  return nil
69
69
  end
70
70
 
71
+ begin
72
+ JSON.parse(str)
73
+ return "json"
74
+ rescue
75
+ end
76
+
71
77
  return "string"
72
78
  end
73
79
 
@@ -27,7 +27,7 @@ module Embulk
27
27
  end
28
28
 
29
29
  def self.from_java(java_class)
30
- JavaPlugin.ruby_adapter(java_class, GuessPlugin, RubyAdapter)
30
+ JavaPlugin.ruby_adapter_class(java_class, GuessPlugin, RubyAdapter)
31
31
  end
32
32
 
33
33
  module RubyAdapter
@@ -22,6 +22,10 @@ module Embulk::Java
22
22
  include_package 'org.embulk.time'
23
23
  end
24
24
 
25
+ module Json
26
+ include_package 'org.embulk.json'
27
+ end
28
+
25
29
  module Type
26
30
  include_package 'org.embulk.type'
27
31
  end
@@ -27,9 +27,9 @@ module Embulk
27
27
  if value = @map[type]
28
28
  return value
29
29
  end
30
- raise PluginLoadError, "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is installed but it does not correctly register plugin."
30
+ raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is installed but it does not correctly register plugin."
31
31
  else
32
- raise PluginLoadError, "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is not installed. Run 'embulk gem search -rd embulk-#{@category}' command to find plugins."
32
+ raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is not installed. Run 'embulk gem search -rd embulk-#{@category}' command to find plugins."
33
33
  end
34
34
  end
35
35
 
@@ -58,10 +58,8 @@ module Embulk
58
58
 
59
59
  # search gems
60
60
  if defined?(::Gem::Specification) && ::Gem::Specification.respond_to?(:find_all)
61
- specs = Kernel::RUBYGEMS_ACTIVATION_MONITOR.synchronize do # this lock is added as a workaround of https://github.com/jruby/jruby/issues/3652
62
- Gem::Specification.find_all do |spec|
63
- spec.contains_requirable_file? name
64
- end
61
+ specs = Gem::Specification.find_all do |spec|
62
+ spec.contains_requirable_file? name
65
63
  end
66
64
 
67
65
  # prefer newer version
@@ -80,12 +78,10 @@ module Embulk
80
78
  def require_and_show(path, spec=nil)
81
79
  require path
82
80
  unless spec
83
- name, spec = Kernel::RUBYGEMS_ACTIVATION_MONITOR.synchronize do # this lock is added as a workaround of https://github.com/jruby/jruby/issues/3652
84
- Gem.loaded_specs.find {|name,spec|
85
- #spec.files.include?(path)
86
- spec.contains_requirable_file?(path)
87
- }
88
- end
81
+ name, spec = Gem.loaded_specs.find {|name,spec|
82
+ #spec.files.include?(path)
83
+ spec.contains_requirable_file?(path)
84
+ }
89
85
  end
90
86
  if spec
91
87
  unless @loaded_gems[spec.name]
data/lib/embulk/schema.rb CHANGED
@@ -19,6 +19,7 @@ module Embulk
19
19
  each do |column|
20
20
  idx = column.index
21
21
  column_script =
22
+ "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
22
23
  "if reader.isNull(#{idx})\n" <<
23
24
  "record << nil\n" <<
24
25
  "else\n" <<
@@ -33,6 +34,8 @@ module Embulk
33
34
  "record << reader.getString(#{idx})"
34
35
  when :timestamp
35
36
  "record << reader.getTimestamp(#{idx}).getRubyTime(JRuby.runtime)"
37
+ when :json
38
+ "record << MessagePack.unpack(value_api.toMessagePack(JRuby.runtime, reader.getJson(#{idx})))"
36
39
  else
37
40
  raise "Unknown type #{column.type.inspect}"
38
41
  end <<
@@ -45,6 +48,7 @@ module Embulk
45
48
 
46
49
  record_writer_script = "lambda do |builder,record|\n"
47
50
  record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
51
+ record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
48
52
  each do |column|
49
53
  idx = column.index
50
54
  column_script =
@@ -62,6 +66,8 @@ module Embulk
62
66
  "builder.setString(#{idx}, record[#{idx}])"
63
67
  when :timestamp
64
68
  "builder.setTimestamp(#{idx}, java_timestamp_class.fromRubyTime(record[#{idx}]))"
69
+ when :json
70
+ "builder.setJson(#{idx}, value_api.fromMessagePack(MessagePack.pack(record[#{idx}])))"
65
71
  else
66
72
  raise "Unknown type #{column.type.inspect}"
67
73
  end <<
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.7.11'
2
+ VERSION = '0.8.0'
3
3
  end
@@ -0,0 +1,170 @@
1
+ require 'helper'
2
+ require 'time'
3
+ require 'embulk/guess/csv'
4
+
5
+ class CsvGuessTest < ::Test::Unit::TestCase
6
+ class TestDelimiter < self
7
+ data(
8
+ "\t" => "\t",
9
+ "," => ",",
10
+ "|" => "|",
11
+ )
12
+ def test_delimiter_detection(delim)
13
+ actual = guess([
14
+ ["1", "foo"].join(delim),
15
+ ["2", "bar"].join(delim),
16
+ ])
17
+ assert_equal delim, actual["parser"]["delimiter"]
18
+ end
19
+ end
20
+
21
+ class TestQuote < self
22
+ data(
23
+ "'" => "'",
24
+ '"' => '"',
25
+ nil => nil,
26
+ )
27
+ def test_quote(quotation)
28
+ actual = guess([
29
+ %w(1 foo).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
30
+ %w(2 bar).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
31
+ ])
32
+ assert_equal quotation, actual["parser"]["quote"]
33
+ end
34
+ end
35
+
36
+ class TestEscape < self
37
+ data(
38
+ "\\" => "\\",
39
+ '"' => '"',
40
+ )
41
+ def test_escape(char)
42
+ actual = guess([
43
+ %Q('1'\t'F#{char}'OO'),
44
+ %Q('2'\t'FOOOOOOOO#{char}'OO'),
45
+ ])
46
+ assert_equal char, actual["parser"]["escape"]
47
+ end
48
+ end
49
+
50
+ class TestSkipHeaderLines < self
51
+ def test_skip_header_lines_one
52
+ actual = guess([
53
+ "col1\tcol2",
54
+ "1\tfoo",
55
+ "2\tbar",
56
+ ])
57
+ assert_equal 1, actual["parser"]["skip_header_lines"]
58
+ end
59
+
60
+ def test_skip_header_lines_three
61
+ actual = guess([
62
+ "this is a CSV",
63
+ "created for a test",
64
+ "col1\tcol2",
65
+ "1\tfoo",
66
+ "2\tbar",
67
+ ])
68
+ assert_equal 3, actual["parser"]["skip_header_lines"]
69
+ end
70
+ end
71
+
72
+ class TestNullString < self
73
+ data(
74
+ "\\N" => "\\N",
75
+ "null" => "null",
76
+ "NULL" => "NULL",
77
+ "#N/A" => "#N/A",
78
+ nil => nil,
79
+ )
80
+ def test_null_string(null)
81
+ actual = guess([
82
+ "1\tfoo\t#{null}",
83
+ "2\tbar\t#{null}",
84
+ ])
85
+ assert_equal null, actual["parser"]["null_string"]
86
+ end
87
+ end
88
+
89
+ class TestTrim < self
90
+ def test_trim_flag_when_will_be_long_if_strip_arround_space
91
+ actual = guess([
92
+ " 1 \tfoo",
93
+ " 2 \tfoo",
94
+ " 3 \tfoo",
95
+ ])
96
+ assert_equal true, actual["parser"]["trim_if_not_quoted"]
97
+ end
98
+ end
99
+
100
+ class TestCommentLineMarker < self
101
+ data(
102
+ "#" => "#",
103
+ "//" => "//",
104
+ )
105
+ def test_comment_line_marker(marker)
106
+ actual = guess([
107
+ "foo\t 1\tother",
108
+ "#{marker} foo\t 2\tother",
109
+ "foo\t 3\tother",
110
+ ])
111
+ assert_equal marker, actual["parser"]["comment_line_marker"]
112
+ end
113
+ end
114
+
115
+ class TestColumns < self
116
+ def test_columns_without_header
117
+ actual = guess([
118
+ "1\tfoo\t2000-01-01T00:00:00+0900",
119
+ "2\tbar\t2000-01-01T00:00:00+0900",
120
+ ])
121
+ expected = [
122
+ {"name" => "c0", "type" => "long"},
123
+ {"name" => "c1", "type" => "string"},
124
+ {"name" => "c2", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
125
+ ]
126
+ assert_equal expected, actual["parser"]["columns"]
127
+ end
128
+
129
+ def test_columns_with_header
130
+ actual = guess([
131
+ "num\tstr\ttime",
132
+ "1\tfoo\t2000-01-01T00:00:00+0900",
133
+ "2\tbar\t2000-01-01T00:00:00+0900",
134
+ ])
135
+ expected = [
136
+ {"name" => "num", "type" => "long"},
137
+ {"name" => "str", "type" => "string"},
138
+ {"name" => "time", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
139
+ ]
140
+ assert_equal expected, actual["parser"]["columns"]
141
+ end
142
+
143
+ def test_complex_line
144
+ actual = guess([
145
+ %Q(this is useless header),
146
+ %Q(and more),
147
+ %Q(num,str,quoted_num,time),
148
+ %Q(1, "value with space "" and quote in it", "123",21150312000000Z),
149
+ %Q(2),
150
+ %Q(# 3, "this is commented out" ,"1",21150312000000Z),
151
+ ])
152
+ expected = [
153
+ {"name" => "num", "type" => "long"},
154
+ {"name" => "str", "type" => "string"},
155
+ {"name" => "quoted_num", "type" => "long"},
156
+ {"name" => "time", "type" => "timestamp", "format"=>"%Y%m%d%H%M%S%z"},
157
+ ]
158
+ assert_equal expected, actual["parser"]["columns"]
159
+ end
160
+ end
161
+
162
+ def guess(texts)
163
+ conf = Embulk::DataSource.new({
164
+ parser: {
165
+ type: "csv"
166
+ }
167
+ })
168
+ Embulk::Guess::CsvGuessPlugin.new.guess_lines(conf, Array(texts))
169
+ end
170
+ end