embulk-parser-mysqldump_tab 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 540119b437ab3189dafba2b577bd0ef444c72583
4
- data.tar.gz: ac15037cbdaab0a87a50a392837f28b14e742e65
3
+ metadata.gz: 910ce8115b6c62894610139f06e36fa86fb9e283
4
+ data.tar.gz: d4d008238a8b8753710b87617a39474493e9a785
5
5
  SHA512:
6
- metadata.gz: a5f756f601edcb1bd9f2eec7845cb9775b165c1c65ce99426160542d24f0b19ee604487364bdbe78614c7c6bfb129ba045ce211780697ac7eda13df0ead88653
7
- data.tar.gz: 53ea4300b9ed686a312d4ce8a8da9d82a843c14eeaaa59274a669d12c209310fe0399088ec43126c87c264e5ba289f1ef4b78518d02dc3c5a73e578415c497ca
6
+ metadata.gz: 34792cf6ea893eaff9d82d1e8c276e8769e53db621df45df597d0d096e07f7a533954f50004c95ee88261510f7c23f4c64c97cbe943870c46e12d6a553e52f5a
7
+ data.tar.gz: 2fba37e57d21e33a1bcfbf67bf53c73ea01b43864e034e99441f264e261b307a5b5bfe1792a9d2f97d1d5b93b3f0abe6cb20e240f64372f231a1f9962964cce4
data/README.md CHANGED
@@ -22,6 +22,7 @@ in:
22
22
  - {name: id, type: long}
23
23
  - {name: name, type: string}
24
24
  - {name: email, type: string}
25
+ - {name: created, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
25
26
  out:
26
27
  type: stdout
27
28
  ```
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-parser-mysqldump_tab"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.1.1"
5
5
  spec.authors = ["inouet"]
6
6
  spec.summary = "Mysqldump Tab parser plugin for Embulk"
7
7
  spec.description = "Embulk parser plugin for mysqldump file that dumped with the --tab option."
@@ -13,17 +13,13 @@ module Embulk
13
13
 
14
14
  def self.transaction(config, &control)
15
15
  # configuration code:
16
-
17
16
  parser_task = config.load_config(Java::LineDecoder::DecoderTask)
18
17
 
19
18
  task = {
20
19
  "decoder_task" => DataSource.from_java(parser_task.dump)
21
- # "option1" => config.param("option1", :integer), # integer, required
22
- # "option2" => config.param("option2", :string, default: "myvalue"), # string, optional
23
- # "option3" => config.param("option3", :string, default: nil), # string, optional
24
20
  }
25
21
 
26
- # https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
22
+ # see https://github.com/treasure-data/embulk-input-jira/blob/master/lib/embulk/input/jira.rb#L22
27
23
  attributes = {}
28
24
  columns = config.param(:columns, :array).map do |column|
29
25
  name = column["name"]
@@ -33,20 +29,13 @@ module Embulk
33
29
  end
34
30
 
35
31
  task[:attributes] = attributes
36
-
37
- # parser option
38
- # task[:option1] = config['option1']
39
- # task[:option1] = config.param(:option1, :integer, default: 5)
32
+ task[:columns] = columns
40
33
 
41
34
  yield(task, columns)
42
35
  end
43
36
 
44
37
  def init
45
38
  # initialization code:
46
- # @option1 = task["option1"]
47
- # @option2 = task["option2"]
48
- # @option3 = task["option3"]
49
-
50
39
  @decoder_task = task.param("decoder_task", :hash).load_task(Java::LineDecoder::DecoderTask)
51
40
  end
52
41
 
@@ -76,6 +65,7 @@ module Embulk
76
65
  page_builder.finish
77
66
  end
78
67
 
68
+ private
79
69
  def parse_line(line)
80
70
  # Escape "escaped TAB" temporarily
81
71
  line = line.gsub(/\\#{FIELDS_TERMINATED_BY}/, DUMMY_STRING)
@@ -84,22 +74,69 @@ module Embulk
84
74
  cols = line.split(FIELDS_TERMINATED_BY)
85
75
  cols.map! { |item| item.gsub(/#{DUMMY_STRING}/, FIELDS_TERMINATED_BY) }
86
76
 
87
- len = task[:attributes].length
88
- cols = adjust_column(cols, len)
89
- return cols
77
+ cols = make_record(cols)
90
78
  end
91
79
 
92
80
  def in_column?(line)
93
81
  /#{Regexp.escape(FIELDS_ESCAPED_BY)}$/.match(line) ? true : false # escaped new line
94
82
  end
95
83
 
96
- # Adjust array length
97
- def adjust_column(arr, len)
98
- arr = arr.slice(0, len) # Truncate if more than len
99
- arr.fill(0, len) { |i| arr[i] } # If it is less than len, fill it with nil
84
+ # see https://github.com/takumakanari/embulk-parser-json/blob/master/lib/embulk/parser/jsonpath.rb#L43
85
+ def make_record(arr)
86
+ columns = @task[:columns]
87
+ record = columns.map.with_index do |col, i|
88
+ val = cast_value(arr[i], col)
89
+ end
100
90
  end
101
91
 
102
- end
92
+ def cast_value(val, col)
93
+ type = col["type"]
94
+ fmt = col["format"]
95
+
96
+ case type
97
+ when "string"
98
+ val
99
+ when "long"
100
+ val.to_i
101
+ when "double"
102
+ val.to_f
103
+ when "json"
104
+ val
105
+ when "boolean"
106
+ if kind_of_boolean?(val)
107
+ val
108
+ elsif val.nil? || val.empty?
109
+ nil
110
+ elsif val.kind_of?(String)
111
+ ["yes", "true", "1"].include?(val.downcase)
112
+ elsif val.kind_of?(Numeric)
113
+ !val.zero?
114
+ else
115
+ !!val
116
+ end
117
+ when "timestamp"
118
+ if val.nil? || val.empty?
119
+ nil
120
+ else
121
+ begin
122
+ Time.strptime(val, fmt)
123
+ rescue ArgumentError => e
124
+ #raise DataParseError.new e
125
+ nil
126
+ end
127
+ end
128
+ else
129
+ raise "Unsupported type #{type}"
130
+ end
131
+ end
132
+
133
+ def kind_of_boolean?(val)
134
+ val.kind_of?(TrueClass) || val.kind_of?(FalseClass)
135
+ end
103
136
 
137
+ class DataParseError < StandardError
138
+ end
139
+
140
+ end
104
141
  end
105
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-mysqldump_tab
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - inouet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-06 00:00:00.000000000 Z
11
+ date: 2017-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk