embulk 0.7.11 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +1 -1
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +2 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
- data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
- data/embulk-docs/src/built-in.rst +40 -3
- data/embulk-docs/src/conf.py +2 -2
- data/embulk-docs/src/release.rst +1 -1
- data/embulk-docs/src/release/release-0.8.0.rst +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
- data/embulk.gemspec +1 -1
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +3 -7
- data/lib/embulk/column.rb +2 -0
- data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
- data/lib/embulk/command/embulk_new_plugin.rb +2 -0
- data/lib/embulk/command/embulk_run.rb +17 -10
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/java/build.gradle.erb +21 -0
- data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
- data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +3 -7
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/guess/csv.rb +1 -1
- data/lib/embulk/guess/schema_guess.rb +6 -0
- data/lib/embulk/guess_plugin.rb +1 -1
- data/lib/embulk/java/imports.rb +4 -0
- data/lib/embulk/plugin_registry.rb +8 -12
- data/lib/embulk/schema.rb +6 -0
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_csv_guess.rb +170 -0
- data/test/helper.rb +2 -0
- metadata +19 -17
- data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
- data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
- data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
- data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
- data/embulk-docs/src/release/release-0.7.11.rst +0 -13
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE module PUBLIC
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
5
|
+
<!--
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
7
|
+
-->
|
8
|
+
<module name="Checker">
|
9
|
+
<module name="FileTabCharacter"/>
|
10
|
+
<module name="NewlineAtEndOfFile">
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
12
|
+
</module>
|
13
|
+
<module name="RegexpMultiline">
|
14
|
+
<property name="format" value="\r"/>
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
16
|
+
</module>
|
17
|
+
<module name="RegexpMultiline">
|
18
|
+
<property name="format" value=" \n"/>
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
20
|
+
</module>
|
21
|
+
<module name="RegexpMultiline">
|
22
|
+
<property name="format" value="\n\n\n"/>
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
24
|
+
</module>
|
25
|
+
<module name="RegexpMultiline">
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
28
|
+
</module>
|
29
|
+
|
30
|
+
<module name="TreeWalker">
|
31
|
+
<module name="EmptyBlock">
|
32
|
+
<property name="option" value="text"/>
|
33
|
+
<property name="tokens" value="
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
36
|
+
</module>
|
37
|
+
<module name="EmptyStatement"/>
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
39
|
+
<module name="EmptyForIteratorPad">
|
40
|
+
<property name="option" value="space"/>
|
41
|
+
</module>
|
42
|
+
<module name="MethodParamPad">
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
44
|
+
<property name="option" value="nospace"/>
|
45
|
+
</module>
|
46
|
+
<module name="ParenPad"/>
|
47
|
+
<module name="TypecastParenPad"/>
|
48
|
+
<module name="NeedBraces"/>
|
49
|
+
<module name="LeftCurly">
|
50
|
+
<property name="option" value="nl"/>
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
52
|
+
</module>
|
53
|
+
<module name="LeftCurly">
|
54
|
+
<property name="option" value="eol"/>
|
55
|
+
<property name="tokens" value="
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
58
|
+
</module>
|
59
|
+
<module name="RightCurly">
|
60
|
+
<property name="option" value="alone"/>
|
61
|
+
</module>
|
62
|
+
<module name="GenericWhitespace"/>
|
63
|
+
<module name="WhitespaceAfter"/>
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
65
|
+
|
66
|
+
<module name="UpperEll"/>
|
67
|
+
<module name="DefaultComesLast"/>
|
68
|
+
<module name="ArrayTypeStyle"/>
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
70
|
+
<module name="ModifierOrder"/>
|
71
|
+
<module name="OneStatementPerLine"/>
|
72
|
+
<module name="StringLiteralEquality"/>
|
73
|
+
<module name="MutableException"/>
|
74
|
+
<module name="EqualsHashCode"/>
|
75
|
+
<module name="InnerAssignment"/>
|
76
|
+
<module name="InterfaceIsType"/>
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
78
|
+
|
79
|
+
<module name="MemberName"/>
|
80
|
+
<module name="LocalVariableName"/>
|
81
|
+
<module name="LocalFinalVariableName"/>
|
82
|
+
<module name="TypeName"/>
|
83
|
+
<module name="PackageName"/>
|
84
|
+
<module name="ParameterName"/>
|
85
|
+
<module name="StaticVariableName"/>
|
86
|
+
<module name="ClassTypeParameterName">
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
88
|
+
</module>
|
89
|
+
<module name="MethodTypeParameterName">
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
91
|
+
</module>
|
92
|
+
|
93
|
+
<module name="WhitespaceAround">
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
97
|
+
<property name="tokens" value="
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
106
|
+
</module>
|
107
|
+
</module>
|
108
|
+
</module>
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Wed Jan 13 12:41:02 JST 2016
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
|
@@ -42,11 +42,6 @@ case "`uname`" in
|
|
42
42
|
;;
|
43
43
|
esac
|
44
44
|
|
45
|
-
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
46
|
-
if $cygwin ; then
|
47
|
-
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
48
|
-
fi
|
49
|
-
|
50
45
|
# Attempt to set APP_HOME
|
51
46
|
# Resolve links: $0 may be a link
|
52
47
|
PRG="$0"
|
@@ -61,9 +56,9 @@ while [ -h "$PRG" ] ; do
|
|
61
56
|
fi
|
62
57
|
done
|
63
58
|
SAVED="`pwd`"
|
64
|
-
cd "`dirname \"$PRG\"`/"
|
59
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
65
60
|
APP_HOME="`pwd -P`"
|
66
|
-
cd "$SAVED"
|
61
|
+
cd "$SAVED" >/dev/null
|
67
62
|
|
68
63
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
69
64
|
|
@@ -114,6 +109,7 @@ fi
|
|
114
109
|
if $cygwin ; then
|
115
110
|
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
116
111
|
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
112
|
+
JAVACMD=`cygpath --unix "$JAVACMD"`
|
117
113
|
|
118
114
|
# We build the pattern for arguments to be converted via cygpath
|
119
115
|
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
@@ -1 +1 @@
|
|
1
|
-
jruby-9.0.
|
1
|
+
jruby-9.0.4.0
|
data/lib/embulk/guess/csv.rb
CHANGED
data/lib/embulk/guess_plugin.rb
CHANGED
data/lib/embulk/java/imports.rb
CHANGED
@@ -27,9 +27,9 @@ module Embulk
|
|
27
27
|
if value = @map[type]
|
28
28
|
return value
|
29
29
|
end
|
30
|
-
raise PluginLoadError
|
30
|
+
raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is installed but it does not correctly register plugin."
|
31
31
|
else
|
32
|
-
raise PluginLoadError
|
32
|
+
raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is not installed. Run 'embulk gem search -rd embulk-#{@category}' command to find plugins."
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
@@ -58,10 +58,8 @@ module Embulk
|
|
58
58
|
|
59
59
|
# search gems
|
60
60
|
if defined?(::Gem::Specification) && ::Gem::Specification.respond_to?(:find_all)
|
61
|
-
specs =
|
62
|
-
|
63
|
-
spec.contains_requirable_file? name
|
64
|
-
end
|
61
|
+
specs = Gem::Specification.find_all do |spec|
|
62
|
+
spec.contains_requirable_file? name
|
65
63
|
end
|
66
64
|
|
67
65
|
# prefer newer version
|
@@ -80,12 +78,10 @@ module Embulk
|
|
80
78
|
def require_and_show(path, spec=nil)
|
81
79
|
require path
|
82
80
|
unless spec
|
83
|
-
name, spec =
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
}
|
88
|
-
end
|
81
|
+
name, spec = Gem.loaded_specs.find {|name,spec|
|
82
|
+
#spec.files.include?(path)
|
83
|
+
spec.contains_requirable_file?(path)
|
84
|
+
}
|
89
85
|
end
|
90
86
|
if spec
|
91
87
|
unless @loaded_gems[spec.name]
|
data/lib/embulk/schema.rb
CHANGED
@@ -19,6 +19,7 @@ module Embulk
|
|
19
19
|
each do |column|
|
20
20
|
idx = column.index
|
21
21
|
column_script =
|
22
|
+
"value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
|
22
23
|
"if reader.isNull(#{idx})\n" <<
|
23
24
|
"record << nil\n" <<
|
24
25
|
"else\n" <<
|
@@ -33,6 +34,8 @@ module Embulk
|
|
33
34
|
"record << reader.getString(#{idx})"
|
34
35
|
when :timestamp
|
35
36
|
"record << reader.getTimestamp(#{idx}).getRubyTime(JRuby.runtime)"
|
37
|
+
when :json
|
38
|
+
"record << MessagePack.unpack(value_api.toMessagePack(JRuby.runtime, reader.getJson(#{idx})))"
|
36
39
|
else
|
37
40
|
raise "Unknown type #{column.type.inspect}"
|
38
41
|
end <<
|
@@ -45,6 +48,7 @@ module Embulk
|
|
45
48
|
|
46
49
|
record_writer_script = "lambda do |builder,record|\n"
|
47
50
|
record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
|
51
|
+
record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
|
48
52
|
each do |column|
|
49
53
|
idx = column.index
|
50
54
|
column_script =
|
@@ -62,6 +66,8 @@ module Embulk
|
|
62
66
|
"builder.setString(#{idx}, record[#{idx}])"
|
63
67
|
when :timestamp
|
64
68
|
"builder.setTimestamp(#{idx}, java_timestamp_class.fromRubyTime(record[#{idx}]))"
|
69
|
+
when :json
|
70
|
+
"builder.setJson(#{idx}, value_api.fromMessagePack(MessagePack.pack(record[#{idx}])))"
|
65
71
|
else
|
66
72
|
raise "Unknown type #{column.type.inspect}"
|
67
73
|
end <<
|
data/lib/embulk/version.rb
CHANGED
@@ -0,0 +1,170 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'time'
|
3
|
+
require 'embulk/guess/csv'
|
4
|
+
|
5
|
+
class CsvGuessTest < ::Test::Unit::TestCase
|
6
|
+
class TestDelimiter < self
|
7
|
+
data(
|
8
|
+
"\t" => "\t",
|
9
|
+
"," => ",",
|
10
|
+
"|" => "|",
|
11
|
+
)
|
12
|
+
def test_delimiter_detection(delim)
|
13
|
+
actual = guess([
|
14
|
+
["1", "foo"].join(delim),
|
15
|
+
["2", "bar"].join(delim),
|
16
|
+
])
|
17
|
+
assert_equal delim, actual["parser"]["delimiter"]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class TestQuote < self
|
22
|
+
data(
|
23
|
+
"'" => "'",
|
24
|
+
'"' => '"',
|
25
|
+
nil => nil,
|
26
|
+
)
|
27
|
+
def test_quote(quotation)
|
28
|
+
actual = guess([
|
29
|
+
%w(1 foo).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
|
30
|
+
%w(2 bar).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
|
31
|
+
])
|
32
|
+
assert_equal quotation, actual["parser"]["quote"]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class TestEscape < self
|
37
|
+
data(
|
38
|
+
"\\" => "\\",
|
39
|
+
'"' => '"',
|
40
|
+
)
|
41
|
+
def test_escape(char)
|
42
|
+
actual = guess([
|
43
|
+
%Q('1'\t'F#{char}'OO'),
|
44
|
+
%Q('2'\t'FOOOOOOOO#{char}'OO'),
|
45
|
+
])
|
46
|
+
assert_equal char, actual["parser"]["escape"]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class TestSkipHeaderLines < self
|
51
|
+
def test_skip_header_lines_one
|
52
|
+
actual = guess([
|
53
|
+
"col1\tcol2",
|
54
|
+
"1\tfoo",
|
55
|
+
"2\tbar",
|
56
|
+
])
|
57
|
+
assert_equal 1, actual["parser"]["skip_header_lines"]
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_skip_header_lines_three
|
61
|
+
actual = guess([
|
62
|
+
"this is a CSV",
|
63
|
+
"created for a test",
|
64
|
+
"col1\tcol2",
|
65
|
+
"1\tfoo",
|
66
|
+
"2\tbar",
|
67
|
+
])
|
68
|
+
assert_equal 3, actual["parser"]["skip_header_lines"]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
class TestNullString < self
|
73
|
+
data(
|
74
|
+
"\\N" => "\\N",
|
75
|
+
"null" => "null",
|
76
|
+
"NULL" => "NULL",
|
77
|
+
"#N/A" => "#N/A",
|
78
|
+
nil => nil,
|
79
|
+
)
|
80
|
+
def test_null_string(null)
|
81
|
+
actual = guess([
|
82
|
+
"1\tfoo\t#{null}",
|
83
|
+
"2\tbar\t#{null}",
|
84
|
+
])
|
85
|
+
assert_equal null, actual["parser"]["null_string"]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class TestTrim < self
|
90
|
+
def test_trim_flag_when_will_be_long_if_strip_arround_space
|
91
|
+
actual = guess([
|
92
|
+
" 1 \tfoo",
|
93
|
+
" 2 \tfoo",
|
94
|
+
" 3 \tfoo",
|
95
|
+
])
|
96
|
+
assert_equal true, actual["parser"]["trim_if_not_quoted"]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
class TestCommentLineMarker < self
|
101
|
+
data(
|
102
|
+
"#" => "#",
|
103
|
+
"//" => "//",
|
104
|
+
)
|
105
|
+
def test_comment_line_marker(marker)
|
106
|
+
actual = guess([
|
107
|
+
"foo\t 1\tother",
|
108
|
+
"#{marker} foo\t 2\tother",
|
109
|
+
"foo\t 3\tother",
|
110
|
+
])
|
111
|
+
assert_equal marker, actual["parser"]["comment_line_marker"]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
class TestColumns < self
|
116
|
+
def test_columns_without_header
|
117
|
+
actual = guess([
|
118
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
119
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
120
|
+
])
|
121
|
+
expected = [
|
122
|
+
{"name" => "c0", "type" => "long"},
|
123
|
+
{"name" => "c1", "type" => "string"},
|
124
|
+
{"name" => "c2", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
|
125
|
+
]
|
126
|
+
assert_equal expected, actual["parser"]["columns"]
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_columns_with_header
|
130
|
+
actual = guess([
|
131
|
+
"num\tstr\ttime",
|
132
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
133
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
134
|
+
])
|
135
|
+
expected = [
|
136
|
+
{"name" => "num", "type" => "long"},
|
137
|
+
{"name" => "str", "type" => "string"},
|
138
|
+
{"name" => "time", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
|
139
|
+
]
|
140
|
+
assert_equal expected, actual["parser"]["columns"]
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_complex_line
|
144
|
+
actual = guess([
|
145
|
+
%Q(this is useless header),
|
146
|
+
%Q(and more),
|
147
|
+
%Q(num,str,quoted_num,time),
|
148
|
+
%Q(1, "value with space "" and quote in it", "123",21150312000000Z),
|
149
|
+
%Q(2),
|
150
|
+
%Q(# 3, "this is commented out" ,"1",21150312000000Z),
|
151
|
+
])
|
152
|
+
expected = [
|
153
|
+
{"name" => "num", "type" => "long"},
|
154
|
+
{"name" => "str", "type" => "string"},
|
155
|
+
{"name" => "quoted_num", "type" => "long"},
|
156
|
+
{"name" => "time", "type" => "timestamp", "format"=>"%Y%m%d%H%M%S%z"},
|
157
|
+
]
|
158
|
+
assert_equal expected, actual["parser"]["columns"]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def guess(texts)
|
163
|
+
conf = Embulk::DataSource.new({
|
164
|
+
parser: {
|
165
|
+
type: "csv"
|
166
|
+
}
|
167
|
+
})
|
168
|
+
Embulk::Guess::CsvGuessPlugin.new.guess_lines(conf, Array(texts))
|
169
|
+
end
|
170
|
+
end
|