embulk-input-presto 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +13 -15
- data/embulk-input-presto.gemspec +1 -1
- data/lib/embulk/input/presto.rb +26 -41
- data/lib/embulk/input/presto/connection.rb +16 -0
- data/lib/embulk/input/presto/explain_parser.rb +15 -0
- data/lib/embulk/input/presto/type_converter.rb +80 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 042574a4df2a41bc869a625661001f50d0b371e6
|
4
|
+
data.tar.gz: 5e9536ea331fa538d6a09d390fa7cf07ea057320
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 17baab4b601e67d21c8b9fe91c23924594f1ccffa5ed22cd65a937ad3909edbf7cf3d070d0c7b1eb2bad965d757d3f2ea1ea6870748972990b9da4014578007f
|
7
|
+
data.tar.gz: 51b93b3c578d298f35399b1127a236cc64ea393aee7bef3088c62032c6b7e64640a61205ca5f4b27c5f4862a153d0212566080aec05255aca423f1f1e2bdcf84
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Facebook Presto input plugin for Embulk
|
1
|
+
# Facebook Presto input plugin for Embulk [![Build Status](https://secure.travis-ci.org/toyama0919/embulk-input-presto.png?branch=master)](http://travis-ci.org/toyama0919/embulk-input-presto)
|
2
2
|
|
3
3
|
Facebook Presto input plugin for Embulk.
|
4
4
|
[see](https://prestodb.io/).
|
@@ -18,7 +18,11 @@ Facebook Presto input plugin for Embulk.
|
|
18
18
|
- **catalog**: catalog (string, default: `"native"`)
|
19
19
|
- **query**: query (string, required)
|
20
20
|
- **user**: user (string, default: `"embulk"`)
|
21
|
-
- **columns
|
21
|
+
- **columns**(**deprecated**): columns (array, required)
|
22
|
+
- **name**: name (string, required)
|
23
|
+
- **type**: type (string, required)
|
24
|
+
|
25
|
+
**Warning** : **columns** deprecated since over v0.2.0. Support auto fetch schema.
|
22
26
|
|
23
27
|
## Example
|
24
28
|
|
@@ -39,23 +43,17 @@ in:
|
|
39
43
|
group by keyword
|
40
44
|
having count(*) >= 10
|
41
45
|
order by count(*) desc
|
42
|
-
columns:
|
43
|
-
- {name: keyword, type: string}
|
44
|
-
- {name: count, type: long}
|
45
46
|
out:
|
46
47
|
type: stdout
|
47
48
|
```
|
48
49
|
|
49
|
-
##
|
50
|
-
*
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
* Presto is not support Prepared statement.
|
58
|
-
* Can't fetch schema by sql
|
50
|
+
## Support type
|
51
|
+
* TIMESTAMP
|
52
|
+
* LONG
|
53
|
+
* DOUBLE
|
54
|
+
* BOOLEAN
|
55
|
+
* STRING
|
56
|
+
* JSON
|
59
57
|
|
60
58
|
## Build
|
61
59
|
|
data/embulk-input-presto.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-presto"
|
4
|
-
spec.version = "0.
|
4
|
+
spec.version = "0.2.0"
|
5
5
|
spec.authors = ["toyama0919"]
|
6
6
|
spec.summary = "Facebook Presto input plugin for Embulk"
|
7
7
|
spec.description = "Facebook Presto input plugin for Embulk."
|
data/lib/embulk/input/presto.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
require_relative 'presto/type_converter'
|
2
|
+
require_relative 'presto/explain_parser'
|
3
|
+
require_relative 'presto/connection'
|
4
|
+
|
1
5
|
module Embulk
|
2
6
|
module Input
|
3
7
|
class Presto < InputPlugin
|
@@ -13,11 +17,15 @@ module Embulk
|
|
13
17
|
"catalog" => config.param("catalog", :string, default: "native"),
|
14
18
|
"query" => config.param("query", :string),
|
15
19
|
"user" => config.param("user", :string, default: "embulk"),
|
16
|
-
"columns" => config.param("columns", :array)
|
20
|
+
"columns" => config.param("columns", :array, default: nil)
|
17
21
|
}
|
18
22
|
|
19
|
-
columns = task['columns']
|
20
|
-
|
23
|
+
columns = if task['columns']
|
24
|
+
task['columns'].each_with_index.map do |c, i|
|
25
|
+
Column.new(i, c["name"], c["type"].to_sym)
|
26
|
+
end
|
27
|
+
else
|
28
|
+
build_output_columns(task)
|
21
29
|
end
|
22
30
|
|
23
31
|
resume(task, columns, 1, &control)
|
@@ -30,14 +38,22 @@ module Embulk
|
|
30
38
|
return next_config_diff
|
31
39
|
end
|
32
40
|
|
41
|
+
def self.build_output_columns(task)
|
42
|
+
explain_query = "explain (FORMAT TEXT) " + task["query"]
|
43
|
+
Embulk.logger.debug("SQL: #{explain_query}")
|
44
|
+
explain_result = Connection.get_client(task).run("explain (FORMAT TEXT) " + task["query"])
|
45
|
+
|
46
|
+
columns = []
|
47
|
+
ExplainParser.parse(explain_result).each_with_index do |(name, type), i|
|
48
|
+
columns << Column.new(i, name, TypeConverter.get_type(type))
|
49
|
+
end
|
50
|
+
columns
|
51
|
+
end
|
52
|
+
|
33
53
|
def init
|
34
|
-
@client =
|
35
|
-
server: "#{task['host']}:#{task['port']}",
|
36
|
-
catalog: task['catalog'],
|
37
|
-
user: task['user'],
|
38
|
-
schema: task['schema']
|
39
|
-
)
|
54
|
+
@client = Connection.get_client(task)
|
40
55
|
@query = task["query"]
|
56
|
+
@type_converter = TypeConverter.new
|
41
57
|
|
42
58
|
Embulk.logger.info "SQL: #{@query}"
|
43
59
|
end
|
@@ -46,7 +62,7 @@ module Embulk
|
|
46
62
|
size = 0
|
47
63
|
@client.query(@query) do |q|
|
48
64
|
q.each_row {|row|
|
49
|
-
converted_values = row.map.with_index { |value,i| convert_value(value, schema[i]) }
|
65
|
+
converted_values = row.map.with_index { |value,i| @type_converter.convert_value(value, schema[i]) }
|
50
66
|
page_builder.add(converted_values)
|
51
67
|
}
|
52
68
|
size = q.rows.size
|
@@ -57,37 +73,6 @@ module Embulk
|
|
57
73
|
task_report = { size: size }
|
58
74
|
return task_report
|
59
75
|
end
|
60
|
-
|
61
|
-
def convert_value(value, field)
|
62
|
-
return nil if value.nil?
|
63
|
-
case field["type"]
|
64
|
-
when :string
|
65
|
-
value
|
66
|
-
when :long
|
67
|
-
value.to_i
|
68
|
-
when :double
|
69
|
-
value.to_f
|
70
|
-
when :boolean
|
71
|
-
if value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
72
|
-
value
|
73
|
-
else
|
74
|
-
downcased_val = value.downcase
|
75
|
-
case downcased_val
|
76
|
-
when 'true' then true
|
77
|
-
when 'false' then false
|
78
|
-
when '1' then true
|
79
|
-
when '0' then false
|
80
|
-
else nil
|
81
|
-
end
|
82
|
-
end
|
83
|
-
when :timestamp
|
84
|
-
Time.parse(value)
|
85
|
-
when :json
|
86
|
-
value
|
87
|
-
else
|
88
|
-
raise "Unsupported type #{field['type']}"
|
89
|
-
end
|
90
|
-
end
|
91
76
|
end
|
92
77
|
end
|
93
78
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Input
|
3
|
+
class Presto < InputPlugin
|
4
|
+
class Connection
|
5
|
+
def self.get_client(task)
|
6
|
+
::Presto::Client.new(
|
7
|
+
server: "#{task['host']}:#{task['port']}",
|
8
|
+
catalog: task['catalog'],
|
9
|
+
user: task['user'],
|
10
|
+
schema: task['schema']
|
11
|
+
)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Input
|
3
|
+
class Presto < InputPlugin
|
4
|
+
class ExplainParser
|
5
|
+
def self.parse(explain_result)
|
6
|
+
explain_text = explain_result.flatten.last.lines.first
|
7
|
+
column_name_raw, column_type_raw = explain_text.split(' => ')
|
8
|
+
names = column_name_raw.split('[').last.split(']').first.split(',').map{ |name| name.strip }
|
9
|
+
types = column_type_raw.split('[').last.split(']').first.split(',').map{ |info| info.split(':').last }
|
10
|
+
Hash[*names.zip(types).flatten]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Input
|
3
|
+
class Presto < InputPlugin
|
4
|
+
class TypeConverter
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
end
|
8
|
+
|
9
|
+
def convert_value(value, field)
|
10
|
+
return nil if value.nil?
|
11
|
+
case field["type"]
|
12
|
+
when :string
|
13
|
+
value
|
14
|
+
when :long
|
15
|
+
value.to_i
|
16
|
+
when :double
|
17
|
+
value.to_f
|
18
|
+
when :boolean
|
19
|
+
if value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
20
|
+
value
|
21
|
+
else
|
22
|
+
downcased_val = value.downcase
|
23
|
+
case downcased_val
|
24
|
+
when 'true' then true
|
25
|
+
when 'false' then false
|
26
|
+
when '1' then true
|
27
|
+
when '0' then false
|
28
|
+
else nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
when :timestamp
|
32
|
+
Time.parse(value)
|
33
|
+
when :json
|
34
|
+
value
|
35
|
+
else
|
36
|
+
raise "Unsupported type #{field['type']}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.get_type(type)
|
41
|
+
if type.start_with?("boolean")
|
42
|
+
:boolean
|
43
|
+
elsif type.start_with?("bigint")
|
44
|
+
:long
|
45
|
+
elsif type.start_with?("double")
|
46
|
+
:double
|
47
|
+
elsif type.start_with?("decimal")
|
48
|
+
:double
|
49
|
+
elsif type.start_with?("varchar")
|
50
|
+
:string
|
51
|
+
elsif type.start_with?("varbinary")
|
52
|
+
:string
|
53
|
+
elsif type.start_with?("json")
|
54
|
+
:json
|
55
|
+
elsif type.start_with?("date")
|
56
|
+
:timestamp
|
57
|
+
elsif type.start_with?("time")
|
58
|
+
:timestamp
|
59
|
+
elsif type.start_with?("time with time zone")
|
60
|
+
:timestamp
|
61
|
+
elsif type.start_with?("timestamp")
|
62
|
+
:timestamp
|
63
|
+
elsif type.start_with?("timestamp with time zone")
|
64
|
+
:timestamp
|
65
|
+
elsif type.start_with?("interval year to month")
|
66
|
+
:timestamp
|
67
|
+
elsif type.start_with?("interval day to second")
|
68
|
+
:timestamp
|
69
|
+
elsif type.start_with?("array")
|
70
|
+
:json
|
71
|
+
elsif type.start_with?("map")
|
72
|
+
:json
|
73
|
+
elsif type.start_with?("row")
|
74
|
+
:json
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-presto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,12 +89,16 @@ extra_rdoc_files: []
|
|
89
89
|
files:
|
90
90
|
- ".gitignore"
|
91
91
|
- ".ruby-version"
|
92
|
+
- ".travis.yml"
|
92
93
|
- Gemfile
|
93
94
|
- LICENSE.txt
|
94
95
|
- README.md
|
95
96
|
- Rakefile
|
96
97
|
- embulk-input-presto.gemspec
|
97
98
|
- lib/embulk/input/presto.rb
|
99
|
+
- lib/embulk/input/presto/connection.rb
|
100
|
+
- lib/embulk/input/presto/explain_parser.rb
|
101
|
+
- lib/embulk/input/presto/type_converter.rb
|
98
102
|
- test/helper.rb
|
99
103
|
- test/test_transaction.rb
|
100
104
|
homepage: https://github.com/toyama0919/embulk-input-presto
|