embulk-input-presto 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a57f6dc5a7064378aba3325b8765ffc3e3f1cdd
4
- data.tar.gz: f272abf3aad3768b0603366e404685cf33d4e61d
3
+ metadata.gz: 042574a4df2a41bc869a625661001f50d0b371e6
4
+ data.tar.gz: 5e9536ea331fa538d6a09d390fa7cf07ea057320
5
5
  SHA512:
6
- metadata.gz: 13ce34fd728876372f389ac13cf422d23b8facba8c3069b60f946f4762cc37e003e2493ec5c1129ca10fdab20a8ed0ed860028a9109e02a5afd6bb62e9e30ccc
7
- data.tar.gz: 1898c4f776b9570ecf9813372cfd80ff2c62c339f0872e633d85a722e8a844d911b1bb0716489cf243cde6b25aa6fc2452a8c628d3877cc6b13399a8924b279a
6
+ metadata.gz: 17baab4b601e67d21c8b9fe91c23924594f1ccffa5ed22cd65a937ad3909edbf7cf3d070d0c7b1eb2bad965d757d3f2ea1ea6870748972990b9da4014578007f
7
+ data.tar.gz: 51b93b3c578d298f35399b1127a236cc64ea393aee7bef3088c62032c6b7e64640a61205ca5f4b27c5f4862a153d0212566080aec05255aca423f1f1e2bdcf84
data/.travis.yml ADDED
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ cache: bundler
3
+ rvm:
4
+ - jruby-9.0.5.0
5
+ - jruby-head
6
+ jdk:
7
+ - openjdk7
8
+ before_install:
9
+ - gem install bundler
10
+ matrix:
11
+ allow_failures:
12
+ - rvm: jruby-head
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Facebook Presto input plugin for Embulk
1
+ # Facebook Presto input plugin for Embulk [![Build Status](https://secure.travis-ci.org/toyama0919/embulk-input-presto.png?branch=master)](http://travis-ci.org/toyama0919/embulk-input-presto)
2
2
 
3
3
  Facebook Presto input plugin for Embulk.
4
4
  [see](https://prestodb.io/).
@@ -18,7 +18,11 @@ Facebook Presto input plugin for Embulk.
18
18
  - **catalog**: catalog (string, default: `"native"`)
19
19
  - **query**: query (string, required)
20
20
  - **user**: user (string, default: `"embulk"`)
21
- - **columns**: columns (array, required)
21
+ - **columns**(**deprecated**): columns (array, required)
22
+ - **name**: name (string, required)
23
+ - **type**: type (string, required)
24
+
25
+ **Warning** : **columns** deprecated since over v0.2.0. Support auto fetch schema.
22
26
 
23
27
  ## Example
24
28
 
@@ -39,23 +43,17 @@ in:
39
43
  group by keyword
40
44
  having count(*) >= 10
41
45
  order by count(*) desc
42
- columns:
43
- - {name: keyword, type: string}
44
- - {name: count, type: long}
45
46
  out:
46
47
  type: stdout
47
48
  ```
48
49
 
49
- ## Limited
50
- * Only the data type that Embulk supports is possible.
51
- * TIMESTAMP
52
- * LONG
53
- * DOUBLE
54
- * BOOLEAN
55
- * STRING
56
-
57
- * Presto is not support Prepared statement.
58
- * Can't fetch schema by sql
50
+ ## Support type
51
+ * TIMESTAMP
52
+ * LONG
53
+ * DOUBLE
54
+ * BOOLEAN
55
+ * STRING
56
+ * JSON
59
57
 
60
58
  ## Build
61
59
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-presto"
4
- spec.version = "0.1.2"
4
+ spec.version = "0.2.0"
5
5
  spec.authors = ["toyama0919"]
6
6
  spec.summary = "Facebook Presto input plugin for Embulk"
7
7
  spec.description = "Facebook Presto input plugin for Embulk."
@@ -1,3 +1,7 @@
1
+ require_relative 'presto/type_converter'
2
+ require_relative 'presto/explain_parser'
3
+ require_relative 'presto/connection'
4
+
1
5
  module Embulk
2
6
  module Input
3
7
  class Presto < InputPlugin
@@ -13,11 +17,15 @@ module Embulk
13
17
  "catalog" => config.param("catalog", :string, default: "native"),
14
18
  "query" => config.param("query", :string),
15
19
  "user" => config.param("user", :string, default: "embulk"),
16
- "columns" => config.param("columns", :array)
20
+ "columns" => config.param("columns", :array, default: nil)
17
21
  }
18
22
 
19
- columns = task['columns'].each_with_index.map do |c, i|
20
- Column.new(i, c["name"], c["type"].to_sym)
23
+ columns = if task['columns']
24
+ task['columns'].each_with_index.map do |c, i|
25
+ Column.new(i, c["name"], c["type"].to_sym)
26
+ end
27
+ else
28
+ build_output_columns(task)
21
29
  end
22
30
 
23
31
  resume(task, columns, 1, &control)
@@ -30,14 +38,22 @@ module Embulk
30
38
  return next_config_diff
31
39
  end
32
40
 
41
+ def self.build_output_columns(task)
42
+ explain_query = "explain (FORMAT TEXT) " + task["query"]
43
+ Embulk.logger.debug("SQL: #{explain_query}")
44
+ explain_result = Connection.get_client(task).run("explain (FORMAT TEXT) " + task["query"])
45
+
46
+ columns = []
47
+ ExplainParser.parse(explain_result).each_with_index do |(name, type), i|
48
+ columns << Column.new(i, name, TypeConverter.get_type(type))
49
+ end
50
+ columns
51
+ end
52
+
33
53
  def init
34
- @client = ::Presto::Client.new(
35
- server: "#{task['host']}:#{task['port']}",
36
- catalog: task['catalog'],
37
- user: task['user'],
38
- schema: task['schema']
39
- )
54
+ @client = Connection.get_client(task)
40
55
  @query = task["query"]
56
+ @type_converter = TypeConverter.new
41
57
 
42
58
  Embulk.logger.info "SQL: #{@query}"
43
59
  end
@@ -46,7 +62,7 @@ module Embulk
46
62
  size = 0
47
63
  @client.query(@query) do |q|
48
64
  q.each_row {|row|
49
- converted_values = row.map.with_index { |value,i| convert_value(value, schema[i]) }
65
+ converted_values = row.map.with_index { |value,i| @type_converter.convert_value(value, schema[i]) }
50
66
  page_builder.add(converted_values)
51
67
  }
52
68
  size = q.rows.size
@@ -57,37 +73,6 @@ module Embulk
57
73
  task_report = { size: size }
58
74
  return task_report
59
75
  end
60
-
61
- def convert_value(value, field)
62
- return nil if value.nil?
63
- case field["type"]
64
- when :string
65
- value
66
- when :long
67
- value.to_i
68
- when :double
69
- value.to_f
70
- when :boolean
71
- if value.is_a?(TrueClass) || value.is_a?(FalseClass)
72
- value
73
- else
74
- downcased_val = value.downcase
75
- case downcased_val
76
- when 'true' then true
77
- when 'false' then false
78
- when '1' then true
79
- when '0' then false
80
- else nil
81
- end
82
- end
83
- when :timestamp
84
- Time.parse(value)
85
- when :json
86
- value
87
- else
88
- raise "Unsupported type #{field['type']}"
89
- end
90
- end
91
76
  end
92
77
  end
93
78
  end
@@ -0,0 +1,16 @@
1
+ module Embulk
2
+ module Input
3
+ class Presto < InputPlugin
4
+ class Connection
5
+ def self.get_client(task)
6
+ ::Presto::Client.new(
7
+ server: "#{task['host']}:#{task['port']}",
8
+ catalog: task['catalog'],
9
+ user: task['user'],
10
+ schema: task['schema']
11
+ )
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ module Embulk
2
+ module Input
3
+ class Presto < InputPlugin
4
+ class ExplainParser
5
+ def self.parse(explain_result)
6
+ explain_text = explain_result.flatten.last.lines.first
7
+ column_name_raw, column_type_raw = explain_text.split(' => ')
8
+ names = column_name_raw.split('[').last.split(']').first.split(',').map{ |name| name.strip }
9
+ types = column_type_raw.split('[').last.split(']').first.split(',').map{ |info| info.split(':').last }
10
+ Hash[*names.zip(types).flatten]
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,80 @@
1
+ module Embulk
2
+ module Input
3
+ class Presto < InputPlugin
4
+ class TypeConverter
5
+
6
+ def initialize
7
+ end
8
+
9
+ def convert_value(value, field)
10
+ return nil if value.nil?
11
+ case field["type"]
12
+ when :string
13
+ value
14
+ when :long
15
+ value.to_i
16
+ when :double
17
+ value.to_f
18
+ when :boolean
19
+ if value.is_a?(TrueClass) || value.is_a?(FalseClass)
20
+ value
21
+ else
22
+ downcased_val = value.downcase
23
+ case downcased_val
24
+ when 'true' then true
25
+ when 'false' then false
26
+ when '1' then true
27
+ when '0' then false
28
+ else nil
29
+ end
30
+ end
31
+ when :timestamp
32
+ Time.parse(value)
33
+ when :json
34
+ value
35
+ else
36
+ raise "Unsupported type #{field['type']}"
37
+ end
38
+ end
39
+
40
+ def self.get_type(type)
41
+ if type.start_with?("boolean")
42
+ :boolean
43
+ elsif type.start_with?("bigint")
44
+ :long
45
+ elsif type.start_with?("double")
46
+ :double
47
+ elsif type.start_with?("decimal")
48
+ :double
49
+ elsif type.start_with?("varchar")
50
+ :string
51
+ elsif type.start_with?("varbinary")
52
+ :string
53
+ elsif type.start_with?("json")
54
+ :json
55
+ elsif type.start_with?("date")
56
+ :timestamp
57
+ elsif type.start_with?("time")
58
+ :timestamp
59
+ elsif type.start_with?("time with time zone")
60
+ :timestamp
61
+ elsif type.start_with?("timestamp")
62
+ :timestamp
63
+ elsif type.start_with?("timestamp with time zone")
64
+ :timestamp
65
+ elsif type.start_with?("interval year to month")
66
+ :timestamp
67
+ elsif type.start_with?("interval day to second")
68
+ :timestamp
69
+ elsif type.start_with?("array")
70
+ :json
71
+ elsif type.start_with?("map")
72
+ :json
73
+ elsif type.start_with?("row")
74
+ :json
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-presto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - toyama0919
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-22 00:00:00.000000000 Z
11
+ date: 2016-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -89,12 +89,16 @@ extra_rdoc_files: []
89
89
  files:
90
90
  - ".gitignore"
91
91
  - ".ruby-version"
92
+ - ".travis.yml"
92
93
  - Gemfile
93
94
  - LICENSE.txt
94
95
  - README.md
95
96
  - Rakefile
96
97
  - embulk-input-presto.gemspec
97
98
  - lib/embulk/input/presto.rb
99
+ - lib/embulk/input/presto/connection.rb
100
+ - lib/embulk/input/presto/explain_parser.rb
101
+ - lib/embulk/input/presto/type_converter.rb
98
102
  - test/helper.rb
99
103
  - test/test_transaction.rb
100
104
  homepage: https://github.com/toyama0919/embulk-input-presto