embulk 0.7.11-java → 0.8.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +1 -1
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +2 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
- data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
- data/embulk-docs/src/built-in.rst +40 -3
- data/embulk-docs/src/conf.py +2 -2
- data/embulk-docs/src/release.rst +1 -1
- data/embulk-docs/src/release/release-0.8.0.rst +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
- data/embulk.gemspec +1 -1
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +3 -7
- data/lib/embulk/column.rb +2 -0
- data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
- data/lib/embulk/command/embulk_new_plugin.rb +2 -0
- data/lib/embulk/command/embulk_run.rb +17 -10
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/java/build.gradle.erb +21 -0
- data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
- data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +3 -7
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/guess/csv.rb +1 -1
- data/lib/embulk/guess/schema_guess.rb +6 -0
- data/lib/embulk/guess_plugin.rb +1 -1
- data/lib/embulk/java/imports.rb +4 -0
- data/lib/embulk/plugin_registry.rb +8 -12
- data/lib/embulk/schema.rb +6 -0
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_csv_guess.rb +170 -0
- data/test/helper.rb +2 -0
- metadata +17 -15
- data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
- data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
- data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
- data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
- data/embulk-docs/src/release/release-0.7.11.rst +0 -13
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE module PUBLIC
|
|
3
|
+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
|
4
|
+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
|
5
|
+
<!--
|
|
6
|
+
This is a subset of ./checkstyle.xml which allows some loose styles
|
|
7
|
+
-->
|
|
8
|
+
<module name="Checker">
|
|
9
|
+
<module name="FileTabCharacter"/>
|
|
10
|
+
<module name="NewlineAtEndOfFile">
|
|
11
|
+
<property name="lineSeparator" value="lf"/>
|
|
12
|
+
</module>
|
|
13
|
+
<module name="RegexpMultiline">
|
|
14
|
+
<property name="format" value="\r"/>
|
|
15
|
+
<property name="message" value="Line contains carriage return"/>
|
|
16
|
+
</module>
|
|
17
|
+
<module name="RegexpMultiline">
|
|
18
|
+
<property name="format" value=" \n"/>
|
|
19
|
+
<property name="message" value="Line has trailing whitespace"/>
|
|
20
|
+
</module>
|
|
21
|
+
<module name="RegexpMultiline">
|
|
22
|
+
<property name="format" value="\n\n\n"/>
|
|
23
|
+
<property name="message" value="Multiple consecutive blank lines"/>
|
|
24
|
+
</module>
|
|
25
|
+
<module name="RegexpMultiline">
|
|
26
|
+
<property name="format" value="\n\n\Z"/>
|
|
27
|
+
<property name="message" value="Blank line before end of file"/>
|
|
28
|
+
</module>
|
|
29
|
+
|
|
30
|
+
<module name="TreeWalker">
|
|
31
|
+
<module name="EmptyBlock">
|
|
32
|
+
<property name="option" value="text"/>
|
|
33
|
+
<property name="tokens" value="
|
|
34
|
+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
|
|
35
|
+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
|
|
36
|
+
</module>
|
|
37
|
+
<module name="EmptyStatement"/>
|
|
38
|
+
<module name="EmptyForInitializerPad"/>
|
|
39
|
+
<module name="EmptyForIteratorPad">
|
|
40
|
+
<property name="option" value="space"/>
|
|
41
|
+
</module>
|
|
42
|
+
<module name="MethodParamPad">
|
|
43
|
+
<property name="allowLineBreaks" value="true"/>
|
|
44
|
+
<property name="option" value="nospace"/>
|
|
45
|
+
</module>
|
|
46
|
+
<module name="ParenPad"/>
|
|
47
|
+
<module name="TypecastParenPad"/>
|
|
48
|
+
<module name="NeedBraces"/>
|
|
49
|
+
<module name="LeftCurly">
|
|
50
|
+
<property name="option" value="nl"/>
|
|
51
|
+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
|
|
52
|
+
</module>
|
|
53
|
+
<module name="LeftCurly">
|
|
54
|
+
<property name="option" value="eol"/>
|
|
55
|
+
<property name="tokens" value="
|
|
56
|
+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
|
|
57
|
+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
|
|
58
|
+
</module>
|
|
59
|
+
<module name="RightCurly">
|
|
60
|
+
<property name="option" value="alone"/>
|
|
61
|
+
</module>
|
|
62
|
+
<module name="GenericWhitespace"/>
|
|
63
|
+
<module name="WhitespaceAfter"/>
|
|
64
|
+
<module name="NoWhitespaceBefore"/>
|
|
65
|
+
|
|
66
|
+
<module name="UpperEll"/>
|
|
67
|
+
<module name="DefaultComesLast"/>
|
|
68
|
+
<module name="ArrayTypeStyle"/>
|
|
69
|
+
<module name="MultipleVariableDeclarations"/>
|
|
70
|
+
<module name="ModifierOrder"/>
|
|
71
|
+
<module name="OneStatementPerLine"/>
|
|
72
|
+
<module name="StringLiteralEquality"/>
|
|
73
|
+
<module name="MutableException"/>
|
|
74
|
+
<module name="EqualsHashCode"/>
|
|
75
|
+
<module name="InnerAssignment"/>
|
|
76
|
+
<module name="InterfaceIsType"/>
|
|
77
|
+
<module name="HideUtilityClassConstructor"/>
|
|
78
|
+
|
|
79
|
+
<module name="MemberName"/>
|
|
80
|
+
<module name="LocalVariableName"/>
|
|
81
|
+
<module name="LocalFinalVariableName"/>
|
|
82
|
+
<module name="TypeName"/>
|
|
83
|
+
<module name="PackageName"/>
|
|
84
|
+
<module name="ParameterName"/>
|
|
85
|
+
<module name="StaticVariableName"/>
|
|
86
|
+
<module name="ClassTypeParameterName">
|
|
87
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
|
88
|
+
</module>
|
|
89
|
+
<module name="MethodTypeParameterName">
|
|
90
|
+
<property name="format" value="^[A-Z][0-9]?$"/>
|
|
91
|
+
</module>
|
|
92
|
+
|
|
93
|
+
<module name="WhitespaceAround">
|
|
94
|
+
<property name="allowEmptyConstructors" value="true"/>
|
|
95
|
+
<property name="allowEmptyMethods" value="true"/>
|
|
96
|
+
<property name="ignoreEnhancedForColon" value="false"/>
|
|
97
|
+
<property name="tokens" value="
|
|
98
|
+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
|
|
99
|
+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
|
|
100
|
+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
|
|
101
|
+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
|
|
102
|
+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
|
|
103
|
+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
|
|
104
|
+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
|
|
105
|
+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
|
|
106
|
+
</module>
|
|
107
|
+
</module>
|
|
108
|
+
</module>
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
#
|
|
1
|
+
#Wed Jan 13 12:41:02 JST 2016
|
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
|
3
3
|
distributionPath=wrapper/dists
|
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
|
5
5
|
zipStorePath=wrapper/dists
|
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
|
|
@@ -42,11 +42,6 @@ case "`uname`" in
|
|
|
42
42
|
;;
|
|
43
43
|
esac
|
|
44
44
|
|
|
45
|
-
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
|
46
|
-
if $cygwin ; then
|
|
47
|
-
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
|
48
|
-
fi
|
|
49
|
-
|
|
50
45
|
# Attempt to set APP_HOME
|
|
51
46
|
# Resolve links: $0 may be a link
|
|
52
47
|
PRG="$0"
|
|
@@ -61,9 +56,9 @@ while [ -h "$PRG" ] ; do
|
|
|
61
56
|
fi
|
|
62
57
|
done
|
|
63
58
|
SAVED="`pwd`"
|
|
64
|
-
cd "`dirname \"$PRG\"`/"
|
|
59
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
|
65
60
|
APP_HOME="`pwd -P`"
|
|
66
|
-
cd "$SAVED"
|
|
61
|
+
cd "$SAVED" >/dev/null
|
|
67
62
|
|
|
68
63
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
|
69
64
|
|
|
@@ -114,6 +109,7 @@ fi
|
|
|
114
109
|
if $cygwin ; then
|
|
115
110
|
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
|
116
111
|
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
|
112
|
+
JAVACMD=`cygpath --unix "$JAVACMD"`
|
|
117
113
|
|
|
118
114
|
# We build the pattern for arguments to be converted via cygpath
|
|
119
115
|
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
|
@@ -1 +1 @@
|
|
|
1
|
-
jruby-9.0.
|
|
1
|
+
jruby-9.0.4.0
|
data/lib/embulk/guess/csv.rb
CHANGED
data/lib/embulk/guess_plugin.rb
CHANGED
data/lib/embulk/java/imports.rb
CHANGED
|
@@ -27,9 +27,9 @@ module Embulk
|
|
|
27
27
|
if value = @map[type]
|
|
28
28
|
return value
|
|
29
29
|
end
|
|
30
|
-
raise PluginLoadError
|
|
30
|
+
raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is installed but it does not correctly register plugin."
|
|
31
31
|
else
|
|
32
|
-
raise PluginLoadError
|
|
32
|
+
raise PluginLoadError.new "Unknown #{@category} plugin '#{type}'. #{@search_prefix}#{type}.rb is not installed. Run 'embulk gem search -rd embulk-#{@category}' command to find plugins."
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
|
|
@@ -58,10 +58,8 @@ module Embulk
|
|
|
58
58
|
|
|
59
59
|
# search gems
|
|
60
60
|
if defined?(::Gem::Specification) && ::Gem::Specification.respond_to?(:find_all)
|
|
61
|
-
specs =
|
|
62
|
-
|
|
63
|
-
spec.contains_requirable_file? name
|
|
64
|
-
end
|
|
61
|
+
specs = Gem::Specification.find_all do |spec|
|
|
62
|
+
spec.contains_requirable_file? name
|
|
65
63
|
end
|
|
66
64
|
|
|
67
65
|
# prefer newer version
|
|
@@ -80,12 +78,10 @@ module Embulk
|
|
|
80
78
|
def require_and_show(path, spec=nil)
|
|
81
79
|
require path
|
|
82
80
|
unless spec
|
|
83
|
-
name, spec =
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
}
|
|
88
|
-
end
|
|
81
|
+
name, spec = Gem.loaded_specs.find {|name,spec|
|
|
82
|
+
#spec.files.include?(path)
|
|
83
|
+
spec.contains_requirable_file?(path)
|
|
84
|
+
}
|
|
89
85
|
end
|
|
90
86
|
if spec
|
|
91
87
|
unless @loaded_gems[spec.name]
|
data/lib/embulk/schema.rb
CHANGED
|
@@ -19,6 +19,7 @@ module Embulk
|
|
|
19
19
|
each do |column|
|
|
20
20
|
idx = column.index
|
|
21
21
|
column_script =
|
|
22
|
+
"value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
|
|
22
23
|
"if reader.isNull(#{idx})\n" <<
|
|
23
24
|
"record << nil\n" <<
|
|
24
25
|
"else\n" <<
|
|
@@ -33,6 +34,8 @@ module Embulk
|
|
|
33
34
|
"record << reader.getString(#{idx})"
|
|
34
35
|
when :timestamp
|
|
35
36
|
"record << reader.getTimestamp(#{idx}).getRubyTime(JRuby.runtime)"
|
|
37
|
+
when :json
|
|
38
|
+
"record << MessagePack.unpack(value_api.toMessagePack(JRuby.runtime, reader.getJson(#{idx})))"
|
|
36
39
|
else
|
|
37
40
|
raise "Unknown type #{column.type.inspect}"
|
|
38
41
|
end <<
|
|
@@ -45,6 +48,7 @@ module Embulk
|
|
|
45
48
|
|
|
46
49
|
record_writer_script = "lambda do |builder,record|\n"
|
|
47
50
|
record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
|
|
51
|
+
record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
|
|
48
52
|
each do |column|
|
|
49
53
|
idx = column.index
|
|
50
54
|
column_script =
|
|
@@ -62,6 +66,8 @@ module Embulk
|
|
|
62
66
|
"builder.setString(#{idx}, record[#{idx}])"
|
|
63
67
|
when :timestamp
|
|
64
68
|
"builder.setTimestamp(#{idx}, java_timestamp_class.fromRubyTime(record[#{idx}]))"
|
|
69
|
+
when :json
|
|
70
|
+
"builder.setJson(#{idx}, value_api.fromMessagePack(MessagePack.pack(record[#{idx}])))"
|
|
65
71
|
else
|
|
66
72
|
raise "Unknown type #{column.type.inspect}"
|
|
67
73
|
end <<
|
data/lib/embulk/version.rb
CHANGED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
require 'helper'
|
|
2
|
+
require 'time'
|
|
3
|
+
require 'embulk/guess/csv'
|
|
4
|
+
|
|
5
|
+
class CsvGuessTest < ::Test::Unit::TestCase
|
|
6
|
+
class TestDelimiter < self
|
|
7
|
+
data(
|
|
8
|
+
"\t" => "\t",
|
|
9
|
+
"," => ",",
|
|
10
|
+
"|" => "|",
|
|
11
|
+
)
|
|
12
|
+
def test_delimiter_detection(delim)
|
|
13
|
+
actual = guess([
|
|
14
|
+
["1", "foo"].join(delim),
|
|
15
|
+
["2", "bar"].join(delim),
|
|
16
|
+
])
|
|
17
|
+
assert_equal delim, actual["parser"]["delimiter"]
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class TestQuote < self
|
|
22
|
+
data(
|
|
23
|
+
"'" => "'",
|
|
24
|
+
'"' => '"',
|
|
25
|
+
nil => nil,
|
|
26
|
+
)
|
|
27
|
+
def test_quote(quotation)
|
|
28
|
+
actual = guess([
|
|
29
|
+
%w(1 foo).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
|
|
30
|
+
%w(2 bar).map{|str| %Q(#{quotation}#{str}#{quotation})}.join("\t"),
|
|
31
|
+
])
|
|
32
|
+
assert_equal quotation, actual["parser"]["quote"]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
class TestEscape < self
|
|
37
|
+
data(
|
|
38
|
+
"\\" => "\\",
|
|
39
|
+
'"' => '"',
|
|
40
|
+
)
|
|
41
|
+
def test_escape(char)
|
|
42
|
+
actual = guess([
|
|
43
|
+
%Q('1'\t'F#{char}'OO'),
|
|
44
|
+
%Q('2'\t'FOOOOOOOO#{char}'OO'),
|
|
45
|
+
])
|
|
46
|
+
assert_equal char, actual["parser"]["escape"]
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class TestSkipHeaderLines < self
|
|
51
|
+
def test_skip_header_lines_one
|
|
52
|
+
actual = guess([
|
|
53
|
+
"col1\tcol2",
|
|
54
|
+
"1\tfoo",
|
|
55
|
+
"2\tbar",
|
|
56
|
+
])
|
|
57
|
+
assert_equal 1, actual["parser"]["skip_header_lines"]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def test_skip_header_lines_three
|
|
61
|
+
actual = guess([
|
|
62
|
+
"this is a CSV",
|
|
63
|
+
"created for a test",
|
|
64
|
+
"col1\tcol2",
|
|
65
|
+
"1\tfoo",
|
|
66
|
+
"2\tbar",
|
|
67
|
+
])
|
|
68
|
+
assert_equal 3, actual["parser"]["skip_header_lines"]
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
class TestNullString < self
|
|
73
|
+
data(
|
|
74
|
+
"\\N" => "\\N",
|
|
75
|
+
"null" => "null",
|
|
76
|
+
"NULL" => "NULL",
|
|
77
|
+
"#N/A" => "#N/A",
|
|
78
|
+
nil => nil,
|
|
79
|
+
)
|
|
80
|
+
def test_null_string(null)
|
|
81
|
+
actual = guess([
|
|
82
|
+
"1\tfoo\t#{null}",
|
|
83
|
+
"2\tbar\t#{null}",
|
|
84
|
+
])
|
|
85
|
+
assert_equal null, actual["parser"]["null_string"]
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
class TestTrim < self
|
|
90
|
+
def test_trim_flag_when_will_be_long_if_strip_arround_space
|
|
91
|
+
actual = guess([
|
|
92
|
+
" 1 \tfoo",
|
|
93
|
+
" 2 \tfoo",
|
|
94
|
+
" 3 \tfoo",
|
|
95
|
+
])
|
|
96
|
+
assert_equal true, actual["parser"]["trim_if_not_quoted"]
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
class TestCommentLineMarker < self
|
|
101
|
+
data(
|
|
102
|
+
"#" => "#",
|
|
103
|
+
"//" => "//",
|
|
104
|
+
)
|
|
105
|
+
def test_comment_line_marker(marker)
|
|
106
|
+
actual = guess([
|
|
107
|
+
"foo\t 1\tother",
|
|
108
|
+
"#{marker} foo\t 2\tother",
|
|
109
|
+
"foo\t 3\tother",
|
|
110
|
+
])
|
|
111
|
+
assert_equal marker, actual["parser"]["comment_line_marker"]
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
class TestColumns < self
|
|
116
|
+
def test_columns_without_header
|
|
117
|
+
actual = guess([
|
|
118
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
|
119
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
|
120
|
+
])
|
|
121
|
+
expected = [
|
|
122
|
+
{"name" => "c0", "type" => "long"},
|
|
123
|
+
{"name" => "c1", "type" => "string"},
|
|
124
|
+
{"name" => "c2", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
|
|
125
|
+
]
|
|
126
|
+
assert_equal expected, actual["parser"]["columns"]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def test_columns_with_header
|
|
130
|
+
actual = guess([
|
|
131
|
+
"num\tstr\ttime",
|
|
132
|
+
"1\tfoo\t2000-01-01T00:00:00+0900",
|
|
133
|
+
"2\tbar\t2000-01-01T00:00:00+0900",
|
|
134
|
+
])
|
|
135
|
+
expected = [
|
|
136
|
+
{"name" => "num", "type" => "long"},
|
|
137
|
+
{"name" => "str", "type" => "string"},
|
|
138
|
+
{"name" => "time", "type" => "timestamp", "format"=>"%Y-%m-%dT%H:%M:%S%z"},
|
|
139
|
+
]
|
|
140
|
+
assert_equal expected, actual["parser"]["columns"]
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_complex_line
|
|
144
|
+
actual = guess([
|
|
145
|
+
%Q(this is useless header),
|
|
146
|
+
%Q(and more),
|
|
147
|
+
%Q(num,str,quoted_num,time),
|
|
148
|
+
%Q(1, "value with space "" and quote in it", "123",21150312000000Z),
|
|
149
|
+
%Q(2),
|
|
150
|
+
%Q(# 3, "this is commented out" ,"1",21150312000000Z),
|
|
151
|
+
])
|
|
152
|
+
expected = [
|
|
153
|
+
{"name" => "num", "type" => "long"},
|
|
154
|
+
{"name" => "str", "type" => "string"},
|
|
155
|
+
{"name" => "quoted_num", "type" => "long"},
|
|
156
|
+
{"name" => "time", "type" => "timestamp", "format"=>"%Y%m%d%H%M%S%z"},
|
|
157
|
+
]
|
|
158
|
+
assert_equal expected, actual["parser"]["columns"]
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def guess(texts)
|
|
163
|
+
conf = Embulk::DataSource.new({
|
|
164
|
+
parser: {
|
|
165
|
+
type: "csv"
|
|
166
|
+
}
|
|
167
|
+
})
|
|
168
|
+
Embulk::Guess::CsvGuessPlugin.new.guess_lines(conf, Array(texts))
|
|
169
|
+
end
|
|
170
|
+
end
|