embulk-input-presto 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a57f6dc5a7064378aba3325b8765ffc3e3f1cdd
4
- data.tar.gz: f272abf3aad3768b0603366e404685cf33d4e61d
3
+ metadata.gz: 042574a4df2a41bc869a625661001f50d0b371e6
4
+ data.tar.gz: 5e9536ea331fa538d6a09d390fa7cf07ea057320
5
5
  SHA512:
6
- metadata.gz: 13ce34fd728876372f389ac13cf422d23b8facba8c3069b60f946f4762cc37e003e2493ec5c1129ca10fdab20a8ed0ed860028a9109e02a5afd6bb62e9e30ccc
7
- data.tar.gz: 1898c4f776b9570ecf9813372cfd80ff2c62c339f0872e633d85a722e8a844d911b1bb0716489cf243cde6b25aa6fc2452a8c628d3877cc6b13399a8924b279a
6
+ metadata.gz: 17baab4b601e67d21c8b9fe91c23924594f1ccffa5ed22cd65a937ad3909edbf7cf3d070d0c7b1eb2bad965d757d3f2ea1ea6870748972990b9da4014578007f
7
+ data.tar.gz: 51b93b3c578d298f35399b1127a236cc64ea393aee7bef3088c62032c6b7e64640a61205ca5f4b27c5f4862a153d0212566080aec05255aca423f1f1e2bdcf84
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ cache: bundler
3
+ rvm:
4
+ - jruby-9.0.5.0
5
+ - jruby-head
6
+ jdk:
7
+ - openjdk7
8
+ before_install:
9
+ - gem install bundler
10
+ matrix:
11
+ allow_failures:
12
+ - rvm: jruby-head
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Facebook Presto input plugin for Embulk
1
+ # Facebook Presto input plugin for Embulk [![Build Status](https://secure.travis-ci.org/toyama0919/embulk-input-presto.png?branch=master)](http://travis-ci.org/toyama0919/embulk-input-presto)
2
2
 
3
3
  Facebook Presto input plugin for Embulk.
4
4
  [see](https://prestodb.io/).
@@ -18,7 +18,11 @@ Facebook Presto input plugin for Embulk.
18
18
  - **catalog**: catalog (string, default: `"native"`)
19
19
  - **query**: query (string, required)
20
20
  - **user**: user (string, default: `"embulk"`)
21
- - **columns**: columns (array, required)
21
+ - **columns**(**deprecated**): columns (array, required)
22
+ - **name**: name (string, required)
23
+ - **type**: type (string, required)
24
+
25
+ **Warning** : **columns** deprecated since over v0.2.0. Support auto fetch schema.
22
26
 
23
27
  ## Example
24
28
 
@@ -39,23 +43,17 @@ in:
39
43
  group by keyword
40
44
  having count(*) >= 10
41
45
  order by count(*) desc
42
- columns:
43
- - {name: keyword, type: string}
44
- - {name: count, type: long}
45
46
  out:
46
47
  type: stdout
47
48
  ```
48
49
 
49
- ## Limited
50
- * Only the data type that Embulk supports is possible.
51
- * TIMESTAMP
52
- * LONG
53
- * DOUBLE
54
- * BOOLEAN
55
- * STRING
56
-
57
- * Presto is not support Prepared statement.
58
- * Can't fetch schema by sql
50
+ ## Support type
51
+ * TIMESTAMP
52
+ * LONG
53
+ * DOUBLE
54
+ * BOOLEAN
55
+ * STRING
56
+ * JSON
59
57
 
60
58
  ## Build
61
59
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-presto"
4
- spec.version = "0.1.2"
4
+ spec.version = "0.2.0"
5
5
  spec.authors = ["toyama0919"]
6
6
  spec.summary = "Facebook Presto input plugin for Embulk"
7
7
  spec.description = "Facebook Presto input plugin for Embulk."
@@ -1,3 +1,7 @@
1
+ require_relative 'presto/type_converter'
2
+ require_relative 'presto/explain_parser'
3
+ require_relative 'presto/connection'
4
+
1
5
  module Embulk
2
6
  module Input
3
7
  class Presto < InputPlugin
@@ -13,11 +17,15 @@ module Embulk
13
17
  "catalog" => config.param("catalog", :string, default: "native"),
14
18
  "query" => config.param("query", :string),
15
19
  "user" => config.param("user", :string, default: "embulk"),
16
- "columns" => config.param("columns", :array)
20
+ "columns" => config.param("columns", :array, default: nil)
17
21
  }
18
22
 
19
- columns = task['columns'].each_with_index.map do |c, i|
20
- Column.new(i, c["name"], c["type"].to_sym)
23
+ columns = if task['columns']
24
+ task['columns'].each_with_index.map do |c, i|
25
+ Column.new(i, c["name"], c["type"].to_sym)
26
+ end
27
+ else
28
+ build_output_columns(task)
21
29
  end
22
30
 
23
31
  resume(task, columns, 1, &control)
@@ -30,14 +38,22 @@ module Embulk
30
38
  return next_config_diff
31
39
  end
32
40
 
41
+ def self.build_output_columns(task)
42
+ explain_query = "explain (FORMAT TEXT) " + task["query"]
43
+ Embulk.logger.debug("SQL: #{explain_query}")
44
+ explain_result = Connection.get_client(task).run("explain (FORMAT TEXT) " + task["query"])
45
+
46
+ columns = []
47
+ ExplainParser.parse(explain_result).each_with_index do |(name, type), i|
48
+ columns << Column.new(i, name, TypeConverter.get_type(type))
49
+ end
50
+ columns
51
+ end
52
+
33
53
  def init
34
- @client = ::Presto::Client.new(
35
- server: "#{task['host']}:#{task['port']}",
36
- catalog: task['catalog'],
37
- user: task['user'],
38
- schema: task['schema']
39
- )
54
+ @client = Connection.get_client(task)
40
55
  @query = task["query"]
56
+ @type_converter = TypeConverter.new
41
57
 
42
58
  Embulk.logger.info "SQL: #{@query}"
43
59
  end
@@ -46,7 +62,7 @@ module Embulk
46
62
  size = 0
47
63
  @client.query(@query) do |q|
48
64
  q.each_row {|row|
49
- converted_values = row.map.with_index { |value,i| convert_value(value, schema[i]) }
65
+ converted_values = row.map.with_index { |value,i| @type_converter.convert_value(value, schema[i]) }
50
66
  page_builder.add(converted_values)
51
67
  }
52
68
  size = q.rows.size
@@ -57,37 +73,6 @@ module Embulk
57
73
  task_report = { size: size }
58
74
  return task_report
59
75
  end
60
-
61
- def convert_value(value, field)
62
- return nil if value.nil?
63
- case field["type"]
64
- when :string
65
- value
66
- when :long
67
- value.to_i
68
- when :double
69
- value.to_f
70
- when :boolean
71
- if value.is_a?(TrueClass) || value.is_a?(FalseClass)
72
- value
73
- else
74
- downcased_val = value.downcase
75
- case downcased_val
76
- when 'true' then true
77
- when 'false' then false
78
- when '1' then true
79
- when '0' then false
80
- else nil
81
- end
82
- end
83
- when :timestamp
84
- Time.parse(value)
85
- when :json
86
- value
87
- else
88
- raise "Unsupported type #{field['type']}"
89
- end
90
- end
91
76
  end
92
77
  end
93
78
  end
@@ -0,0 +1,16 @@
1
+ module Embulk
2
+ module Input
3
+ class Presto < InputPlugin
4
+ class Connection
5
+ def self.get_client(task)
6
+ ::Presto::Client.new(
7
+ server: "#{task['host']}:#{task['port']}",
8
+ catalog: task['catalog'],
9
+ user: task['user'],
10
+ schema: task['schema']
11
+ )
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ module Embulk
2
+ module Input
3
+ class Presto < InputPlugin
4
+ class ExplainParser
5
+ def self.parse(explain_result)
6
+ explain_text = explain_result.flatten.last.lines.first
7
+ column_name_raw, column_type_raw = explain_text.split(' => ')
8
+ names = column_name_raw.split('[').last.split(']').first.split(',').map{ |name| name.strip }
9
+ types = column_type_raw.split('[').last.split(']').first.split(',').map{ |info| info.split(':').last }
10
+ Hash[*names.zip(types).flatten]
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,80 @@
1
+ module Embulk
2
+ module Input
3
+ class Presto < InputPlugin
4
+ class TypeConverter
5
+
6
+ def initialize
7
+ end
8
+
9
+ def convert_value(value, field)
10
+ return nil if value.nil?
11
+ case field["type"]
12
+ when :string
13
+ value
14
+ when :long
15
+ value.to_i
16
+ when :double
17
+ value.to_f
18
+ when :boolean
19
+ if value.is_a?(TrueClass) || value.is_a?(FalseClass)
20
+ value
21
+ else
22
+ downcased_val = value.downcase
23
+ case downcased_val
24
+ when 'true' then true
25
+ when 'false' then false
26
+ when '1' then true
27
+ when '0' then false
28
+ else nil
29
+ end
30
+ end
31
+ when :timestamp
32
+ Time.parse(value)
33
+ when :json
34
+ value
35
+ else
36
+ raise "Unsupported type #{field['type']}"
37
+ end
38
+ end
39
+
40
+ def self.get_type(type)
41
+ if type.start_with?("boolean")
42
+ :boolean
43
+ elsif type.start_with?("bigint")
44
+ :long
45
+ elsif type.start_with?("double")
46
+ :double
47
+ elsif type.start_with?("decimal")
48
+ :double
49
+ elsif type.start_with?("varchar")
50
+ :string
51
+ elsif type.start_with?("varbinary")
52
+ :string
53
+ elsif type.start_with?("json")
54
+ :json
55
+ elsif type.start_with?("date")
56
+ :timestamp
57
+ elsif type.start_with?("time")
58
+ :timestamp
59
+ elsif type.start_with?("time with time zone")
60
+ :timestamp
61
+ elsif type.start_with?("timestamp")
62
+ :timestamp
63
+ elsif type.start_with?("timestamp with time zone")
64
+ :timestamp
65
+ elsif type.start_with?("interval year to month")
66
+ :timestamp
67
+ elsif type.start_with?("interval day to second")
68
+ :timestamp
69
+ elsif type.start_with?("array")
70
+ :json
71
+ elsif type.start_with?("map")
72
+ :json
73
+ elsif type.start_with?("row")
74
+ :json
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-presto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-22 00:00:00.000000000 Z
11
+ date: 2016-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -89,12 +89,16 @@ extra_rdoc_files: []
89
89
  files:
90
90
  - ".gitignore"
91
91
  - ".ruby-version"
92
+ - ".travis.yml"
92
93
  - Gemfile
93
94
  - LICENSE.txt
94
95
  - README.md
95
96
  - Rakefile
96
97
  - embulk-input-presto.gemspec
97
98
  - lib/embulk/input/presto.rb
99
+ - lib/embulk/input/presto/connection.rb
100
+ - lib/embulk/input/presto/explain_parser.rb
101
+ - lib/embulk/input/presto/type_converter.rb
98
102
  - test/helper.rb
99
103
  - test/test_transaction.rb
100
104
  homepage: https://github.com/toyama0919/embulk-input-presto