embulk-parser-mysqldump_tab 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/embulk-parser-mysqldump_tab.gemspec +1 -1
- data/lib/embulk/parser/mysqldump_tab.rb +58 -21
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 910ce8115b6c62894610139f06e36fa86fb9e283
|
4
|
+
data.tar.gz: d4d008238a8b8753710b87617a39474493e9a785
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34792cf6ea893eaff9d82d1e8c276e8769e53db621df45df597d0d096e07f7a533954f50004c95ee88261510f7c23f4c64c97cbe943870c46e12d6a553e52f5a
|
7
|
+
data.tar.gz: 2fba37e57d21e33a1bcfbf67bf53c73ea01b43864e034e99441f264e261b307a5b5bfe1792a9d2f97d1d5b93b3f0abe6cb20e240f64372f231a1f9962964cce4
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-mysqldump_tab"
|
4
|
-
spec.version = "0.1.
|
4
|
+
spec.version = "0.1.1"
|
5
5
|
spec.authors = ["inouet"]
|
6
6
|
spec.summary = "Mysqldump Tab parser plugin for Embulk"
|
7
7
|
spec.description = "Embulk parser plugin for mysqldump file that dumped with the --tab option."
|
@@ -13,17 +13,13 @@ module Embulk
|
|
13
13
|
|
14
14
|
def self.transaction(config, &control)
|
15
15
|
# configuration code:
|
16
|
-
|
17
16
|
parser_task = config.load_config(Java::LineDecoder::DecoderTask)
|
18
17
|
|
19
18
|
task = {
|
20
19
|
"decoder_task" => DataSource.from_java(parser_task.dump)
|
21
|
-
# "option1" => config.param("option1", :integer), # integer, required
|
22
|
-
# "option2" => config.param("option2", :string, default: "myvalue"), # string, optional
|
23
|
-
# "option3" => config.param("option3", :string, default: nil), # string, optional
|
24
20
|
}
|
25
21
|
|
26
|
-
# https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
|
22
|
+
# see https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
|
27
23
|
attributes = {}
|
28
24
|
columns = config.param(:columns, :array).map do |column|
|
29
25
|
name = column["name"]
|
@@ -33,20 +29,13 @@ module Embulk
|
|
33
29
|
end
|
34
30
|
|
35
31
|
task[:attributes] = attributes
|
36
|
-
|
37
|
-
# parser option
|
38
|
-
# task[:option1] = config['option1']
|
39
|
-
# task[:option1] = config.param(:option1, :integer, default: 5)
|
32
|
+
task[:columns] = columns
|
40
33
|
|
41
34
|
yield(task, columns)
|
42
35
|
end
|
43
36
|
|
44
37
|
def init
|
45
38
|
# initialization code:
|
46
|
-
# @option1 = task["option1"]
|
47
|
-
# @option2 = task["option2"]
|
48
|
-
# @option3 = task["option3"]
|
49
|
-
|
50
39
|
@decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
|
51
40
|
end
|
52
41
|
|
@@ -76,6 +65,7 @@ module Embulk
|
|
76
65
|
page_builder.finish
|
77
66
|
end
|
78
67
|
|
68
|
+
private
|
79
69
|
def parse_line(line)
|
80
70
|
# Escape "escaped TAB" temporarily
|
81
71
|
line = line.gsub(/\\#{FIELDS_TERMINATED_BY}/, DUMMY_STRING)
|
@@ -84,22 +74,69 @@ module Embulk
|
|
84
74
|
cols = line.split(FIELDS_TERMINATED_BY)
|
85
75
|
cols.map! { |item| item.gsub(/#{DUMMY_STRING}/, FIELDS_TERMINATED_BY) }
|
86
76
|
|
87
|
-
|
88
|
-
cols = adjust_column(cols, len)
|
89
|
-
return cols
|
77
|
+
cols = make_record(cols)
|
90
78
|
end
|
91
79
|
|
92
80
|
def in_column?(line)
|
93
81
|
/#{Regexp.escape(FIELDS_ESCAPED_BY)}$/.match(line) ? true : false # escaped new line
|
94
82
|
end
|
95
83
|
|
96
|
-
#
|
97
|
-
def
|
98
|
-
|
99
|
-
|
84
|
+
# see https://github.com/takumakanari/embulk-parser-json/blob/master/lib/embulk/parser/jsonpath.rb#L43
|
85
|
+
def make_record(arr)
|
86
|
+
columns = @task[:columns]
|
87
|
+
record = columns.map.with_index do |col, i|
|
88
|
+
val = cast_value(arr[i], col)
|
89
|
+
end
|
100
90
|
end
|
101
91
|
|
102
|
-
|
92
|
+
def cast_value(val, col)
|
93
|
+
type = col["type"]
|
94
|
+
fmt = col["format"]
|
95
|
+
|
96
|
+
case type
|
97
|
+
when "string"
|
98
|
+
val
|
99
|
+
when "long"
|
100
|
+
val.to_i
|
101
|
+
when "double"
|
102
|
+
val.to_f
|
103
|
+
when "json"
|
104
|
+
val
|
105
|
+
when "boolean"
|
106
|
+
if kind_of_boolean?(val)
|
107
|
+
val
|
108
|
+
elsif val.nil? || val.empty?
|
109
|
+
nil
|
110
|
+
elsif val.kind_of?(String)
|
111
|
+
["yes", "true", "1"].include?(val.downcase)
|
112
|
+
elsif val.kind_of?(Numeric)
|
113
|
+
!val.zero?
|
114
|
+
else
|
115
|
+
!!val
|
116
|
+
end
|
117
|
+
when "timestamp"
|
118
|
+
if val.nil? || val.empty?
|
119
|
+
nil
|
120
|
+
else
|
121
|
+
begin
|
122
|
+
Time.strptime(val, fmt)
|
123
|
+
rescue ArgumentError => e
|
124
|
+
#raise DataParseError.new e
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
end
|
128
|
+
else
|
129
|
+
raise "Unsupported type #{type}"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def kind_of_boolean?(val)
|
134
|
+
val.kind_of?(TrueClass) || val.kind_of?(FalseClass)
|
135
|
+
end
|
103
136
|
|
137
|
+
class DataParseError < StandardError
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
104
141
|
end
|
105
142
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-mysqldump_tab
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- inouet
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|