embulk-parser-mysqldump_tab 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/embulk-parser-mysqldump_tab.gemspec +1 -1
- data/lib/embulk/parser/mysqldump_tab.rb +58 -21
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 910ce8115b6c62894610139f06e36fa86fb9e283
|
4
|
+
data.tar.gz: d4d008238a8b8753710b87617a39474493e9a785
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34792cf6ea893eaff9d82d1e8c276e8769e53db621df45df597d0d096e07f7a533954f50004c95ee88261510f7c23f4c64c97cbe943870c46e12d6a553e52f5a
|
7
|
+
data.tar.gz: 2fba37e57d21e33a1bcfbf67bf53c73ea01b43864e034e99441f264e261b307a5b5bfe1792a9d2f97d1d5b93b3f0abe6cb20e240f64372f231a1f9962964cce4
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-mysqldump_tab"
|
4
|
-
spec.version = "0.1.
|
4
|
+
spec.version = "0.1.1"
|
5
5
|
spec.authors = ["inouet"]
|
6
6
|
spec.summary = "Mysqldump Tab parser plugin for Embulk"
|
7
7
|
spec.description = "Embulk parser plugin for mysqldump file that dumped with the --tab option."
|
@@ -13,17 +13,13 @@ module Embulk
|
|
13
13
|
|
14
14
|
def self.transaction(config, &control)
|
15
15
|
# configuration code:
|
16
|
-
|
17
16
|
parser_task = config.load_config(Java::LineDecoder::DecoderTask)
|
18
17
|
|
19
18
|
task = {
|
20
19
|
"decoder_task" => DataSource.from_java(parser_task.dump)
|
21
|
-
# "option1" => config.param("option1", :integer), # integer, required
|
22
|
-
# "option2" => config.param("option2", :string, default: "myvalue"), # string, optional
|
23
|
-
# "option3" => config.param("option3", :string, default: nil), # string, optional
|
24
20
|
}
|
25
21
|
|
26
|
-
# https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
|
22
|
+
# see https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
|
27
23
|
attributes = {}
|
28
24
|
columns = config.param(:columns, :array).map do |column|
|
29
25
|
name = column["name"]
|
@@ -33,20 +29,13 @@ module Embulk
|
|
33
29
|
end
|
34
30
|
|
35
31
|
task[:attributes] = attributes
|
36
|
-
|
37
|
-
# parser option
|
38
|
-
# task[:option1] = config['option1']
|
39
|
-
# task[:option1] = config.param(:option1, :integer, default: 5)
|
32
|
+
task[:columns] = columns
|
40
33
|
|
41
34
|
yield(task, columns)
|
42
35
|
end
|
43
36
|
|
44
37
|
def init
|
45
38
|
# initialization code:
|
46
|
-
# @option1 = task["option1"]
|
47
|
-
# @option2 = task["option2"]
|
48
|
-
# @option3 = task["option3"]
|
49
|
-
|
50
39
|
@decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
|
51
40
|
end
|
52
41
|
|
@@ -76,6 +65,7 @@ module Embulk
|
|
76
65
|
page_builder.finish
|
77
66
|
end
|
78
67
|
|
68
|
+
private
|
79
69
|
def parse_line(line)
|
80
70
|
# Escape "escaped TAB" temporarily
|
81
71
|
line = line.gsub(/\\#{FIELDS_TERMINATED_BY}/, DUMMY_STRING)
|
@@ -84,22 +74,69 @@ module Embulk
|
|
84
74
|
cols = line.split(FIELDS_TERMINATED_BY)
|
85
75
|
cols.map! { |item| item.gsub(/#{DUMMY_STRING}/, FIELDS_TERMINATED_BY) }
|
86
76
|
|
87
|
-
|
88
|
-
cols = adjust_column(cols, len)
|
89
|
-
return cols
|
77
|
+
cols = make_record(cols)
|
90
78
|
end
|
91
79
|
|
92
80
|
def in_column?(line)
|
93
81
|
/#{Regexp.escape(FIELDS_ESCAPED_BY)}$/.match(line) ? true : false # escaped new line
|
94
82
|
end
|
95
83
|
|
96
|
-
#
|
97
|
-
def
|
98
|
-
|
99
|
-
|
84
|
+
# see https://github.com/takumakanari/embulk-parser-json/blob/master/lib/embulk/parser/jsonpath.rb#L43
|
85
|
+
def make_record(arr)
|
86
|
+
columns = @task[:columns]
|
87
|
+
record = columns.map.with_index do |col, i|
|
88
|
+
val = cast_value(arr[i], col)
|
89
|
+
end
|
100
90
|
end
|
101
91
|
|
102
|
-
|
92
|
+
def cast_value(val, col)
|
93
|
+
type = col["type"]
|
94
|
+
fmt = col["format"]
|
95
|
+
|
96
|
+
case type
|
97
|
+
when "string"
|
98
|
+
val
|
99
|
+
when "long"
|
100
|
+
val.to_i
|
101
|
+
when "double"
|
102
|
+
val.to_f
|
103
|
+
when "json"
|
104
|
+
val
|
105
|
+
when "boolean"
|
106
|
+
if kind_of_boolean?(val)
|
107
|
+
val
|
108
|
+
elsif val.nil? || val.empty?
|
109
|
+
nil
|
110
|
+
elsif val.kind_of?(String)
|
111
|
+
["yes", "true", "1"].include?(val.downcase)
|
112
|
+
elsif val.kind_of?(Numeric)
|
113
|
+
!val.zero?
|
114
|
+
else
|
115
|
+
!!val
|
116
|
+
end
|
117
|
+
when "timestamp"
|
118
|
+
if val.nil? || val.empty?
|
119
|
+
nil
|
120
|
+
else
|
121
|
+
begin
|
122
|
+
Time.strptime(val, fmt)
|
123
|
+
rescue ArgumentError => e
|
124
|
+
#raise DataParseError.new e
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
end
|
128
|
+
else
|
129
|
+
raise "Unsupported type #{type}"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def kind_of_boolean?(val)
|
134
|
+
val.kind_of?(TrueClass) || val.kind_of?(FalseClass)
|
135
|
+
end
|
103
136
|
|
137
|
+
class DataParseError < StandardError
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
104
141
|
end
|
105
142
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-mysqldump_tab
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- inouet
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|