embulk-parser-mysqldump_tab 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 540119b437ab3189dafba2b577bd0ef444c72583
4
- data.tar.gz: ac15037cbdaab0a87a50a392837f28b14e742e65
3
+ metadata.gz: 910ce8115b6c62894610139f06e36fa86fb9e283
4
+ data.tar.gz: d4d008238a8b8753710b87617a39474493e9a785
5
5
  SHA512:
6
- metadata.gz: a5f756f601edcb1bd9f2eec7845cb9775b165c1c65ce99426160542d24f0b19ee604487364bdbe78614c7c6bfb129ba045ce211780697ac7eda13df0ead88653
7
- data.tar.gz: 53ea4300b9ed686a312d4ce8a8da9d82a843c14eeaaa59274a669d12c209310fe0399088ec43126c87c264e5ba289f1ef4b78518d02dc3c5a73e578415c497ca
6
+ metadata.gz: 34792cf6ea893eaff9d82d1e8c276e8769e53db621df45df597d0d096e07f7a533954f50004c95ee88261510f7c23f4c64c97cbe943870c46e12d6a553e52f5a
7
+ data.tar.gz: 2fba37e57d21e33a1bcfbf67bf53c73ea01b43864e034e99441f264e261b307a5b5bfe1792a9d2f97d1d5b93b3f0abe6cb20e240f64372f231a1f9962964cce4
data/README.md CHANGED
@@ -22,6 +22,7 @@ in:
22
22
  - {name: id, type: long}
23
23
  - {name: name, type: string}
24
24
  - {name: email, type: string}
25
+ - {name: created, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
25
26
  out:
26
27
  type: stdout
27
28
  ```
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-parser-mysqldump_tab"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.1.1"
5
5
  spec.authors = ["inouet"]
6
6
  spec.summary = "Mysqldump Tab parser plugin for Embulk"
7
7
  spec.description = "Embulk parser plugin for mysqldump file that dumped with the --tab option."
@@ -13,17 +13,13 @@ module Embulk
13
13
 
14
14
  def self.transaction(config, &control)
15
15
  # configuration code:
16
-
17
16
  parser_task = config.load_config(Java::LineDecoder::DecoderTask)
18
17
 
19
18
  task = {
20
19
  "decoder_task" => DataSource.from_java(parser_task.dump)
21
- # "option1" => config.param("option1", :integer), # integer, required
22
- # "option2" => config.param("option2", :string, default: "myvalue"), # string, optional
23
- # "option3" => config.param("option3", :string, default: nil), # string, optional
24
20
  }
25
21
 
26
- # https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
22
+ # see https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
27
23
  attributes = {}
28
24
  columns = config.param(:columns, :array).map do |column|
29
25
  name = column["name"]
@@ -33,20 +29,13 @@ module Embulk
33
29
  end
34
30
 
35
31
  task[:attributes] = attributes
36
-
37
- # parser option
38
- # task[:option1] = config['option1']
39
- # task[:option1] = config.param(:option1, :integer, default: 5)
32
+ task[:columns] = columns
40
33
 
41
34
  yield(task, columns)
42
35
  end
43
36
 
44
37
  def init
45
38
  # initialization code:
46
- # @option1 = task["option1"]
47
- # @option2 = task["option2"]
48
- # @option3 = task["option3"]
49
-
50
39
  @decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
51
40
  end
52
41
 
@@ -76,6 +65,7 @@ module Embulk
76
65
  page_builder.finish
77
66
  end
78
67
 
68
+ private
79
69
  def parse_line(line)
80
70
  # Escape "escaped TAB" temporarily
81
71
  line = line.gsub(/\\#{FIELDS_TERMINATED_BY}/, DUMMY_STRING)
@@ -84,22 +74,69 @@ module Embulk
84
74
  cols = line.split(FIELDS_TERMINATED_BY)
85
75
  cols.map! { |item| item.gsub(/#{DUMMY_STRING}/, FIELDS_TERMINATED_BY) }
86
76
 
87
- len = task[:attributes].length
88
- cols = adjust_column(cols, len)
89
- return cols
77
+ cols = make_record(cols)
90
78
  end
91
79
 
92
80
  def in_column?(line)
93
81
  /#{Regexp.escape(FIELDS_ESCAPED_BY)}$/.match(line) ? true : false # escaped new line
94
82
  end
95
83
 
96
- # Adjust array length
97
- def adjust_column(arr, len)
98
- arr = arr.slice(0, len) # Truncate if more than len
99
- arr.fill(0, len) { |i| arr[i] } # If it is less than len, fill it with nil
84
+ # see https://github.com/takumakanari/embulk-parser-json/blob/master/lib/embulk/parser/jsonpath.rb#L43
85
+ def make_record(arr)
86
+ columns = @task[:columns]
87
+ record = columns.map.with_index do |col, i|
88
+ val = cast_value(arr[i], col)
89
+ end
100
90
  end
101
91
 
102
- end
92
+ def cast_value(val, col)
93
+ type = col["type"]
94
+ fmt = col["format"]
95
+
96
+ case type
97
+ when "string"
98
+ val
99
+ when "long"
100
+ val.to_i
101
+ when "double"
102
+ val.to_f
103
+ when "json"
104
+ val
105
+ when "boolean"
106
+ if kind_of_boolean?(val)
107
+ val
108
+ elsif val.nil? || val.empty?
109
+ nil
110
+ elsif val.kind_of?(String)
111
+ ["yes", "true", "1"].include?(val.downcase)
112
+ elsif val.kind_of?(Numeric)
113
+ !val.zero?
114
+ else
115
+ !!val
116
+ end
117
+ when "timestamp"
118
+ if val.nil? || val.empty?
119
+ nil
120
+ else
121
+ begin
122
+ Time.strptime(val, fmt)
123
+ rescue ArgumentError => e
124
+ #raise DataParseError.new e
125
+ nil
126
+ end
127
+ end
128
+ else
129
+ raise "Unsupported type #{type}"
130
+ end
131
+ end
132
+
133
+ def kind_of_boolean?(val)
134
+ val.kind_of?(TrueClass) || val.kind_of?(FalseClass)
135
+ end
103
136
 
137
+ class DataParseError < StandardError
138
+ end
139
+
140
+ end
104
141
  end
105
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-mysqldump_tab
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - inouet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-06 00:00:00.000000000 Z
11
+ date: 2017-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk