embulk-input-presto 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +13 -15
- data/embulk-input-presto.gemspec +1 -1
- data/lib/embulk/input/presto.rb +26 -41
- data/lib/embulk/input/presto/connection.rb +16 -0
- data/lib/embulk/input/presto/explain_parser.rb +15 -0
- data/lib/embulk/input/presto/type_converter.rb +80 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 042574a4df2a41bc869a625661001f50d0b371e6
|
4
|
+
data.tar.gz: 5e9536ea331fa538d6a09d390fa7cf07ea057320
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 17baab4b601e67d21c8b9fe91c23924594f1ccffa5ed22cd65a937ad3909edbf7cf3d070d0c7b1eb2bad965d757d3f2ea1ea6870748972990b9da4014578007f
|
7
|
+
data.tar.gz: 51b93b3c578d298f35399b1127a236cc64ea393aee7bef3088c62032c6b7e64640a61205ca5f4b27c5f4862a153d0212566080aec05255aca423f1f1e2bdcf84
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Facebook Presto input plugin for Embulk
|
1
|
+
# Facebook Presto input plugin for Embulk [](http://travis-ci.org/toyama0919/embulk-input-presto)
|
2
2
|
|
3
3
|
Facebook Presto input plugin for Embulk.
|
4
4
|
[see](https://prestodb.io/).
|
@@ -18,7 +18,11 @@ Facebook Presto input plugin for Embulk.
|
|
18
18
|
- **catalog**: catalog (string, default: `"native"`)
|
19
19
|
- **query**: query (string, required)
|
20
20
|
- **user**: user (string, default: `"embulk"`)
|
21
|
-
- **columns
|
21
|
+
- **columns**(**deprecated**): columns (array, required)
|
22
|
+
- **name**: name (string, required)
|
23
|
+
- **type**: type (string, required)
|
24
|
+
|
25
|
+
**Warning** : **columns** deprecated since over v0.2.0. Support auto fetch schema.
|
22
26
|
|
23
27
|
## Example
|
24
28
|
|
@@ -39,23 +43,17 @@ in:
|
|
39
43
|
group by keyword
|
40
44
|
having count(*) >= 10
|
41
45
|
order by count(*) desc
|
42
|
-
columns:
|
43
|
-
- {name: keyword, type: string}
|
44
|
-
- {name: count, type: long}
|
45
46
|
out:
|
46
47
|
type: stdout
|
47
48
|
```
|
48
49
|
|
49
|
-
##
|
50
|
-
*
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
* Presto is not support Prepared statement.
|
58
|
-
* Can't fetch schema by sql
|
50
|
+
## Support type
|
51
|
+
* TIMESTAMP
|
52
|
+
* LONG
|
53
|
+
* DOUBLE
|
54
|
+
* BOOLEAN
|
55
|
+
* STRING
|
56
|
+
* JSON
|
59
57
|
|
60
58
|
## Build
|
61
59
|
|
data/embulk-input-presto.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-presto"
|
4
|
-
spec.version = "0.
|
4
|
+
spec.version = "0.2.0"
|
5
5
|
spec.authors = ["toyama0919"]
|
6
6
|
spec.summary = "Facebook Presto input plugin for Embulk"
|
7
7
|
spec.description = "Facebook Presto input plugin for Embulk."
|
data/lib/embulk/input/presto.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
require_relative 'presto/type_converter'
|
2
|
+
require_relative 'presto/explain_parser'
|
3
|
+
require_relative 'presto/connection'
|
4
|
+
|
1
5
|
module Embulk
|
2
6
|
module Input
|
3
7
|
class Presto < InputPlugin
|
@@ -13,11 +17,15 @@ module Embulk
|
|
13
17
|
"catalog" => config.param("catalog", :string, default: "native"),
|
14
18
|
"query" => config.param("query", :string),
|
15
19
|
"user" => config.param("user", :string, default: "embulk"),
|
16
|
-
"columns" => config.param("columns", :array)
|
20
|
+
"columns" => config.param("columns", :array, default: nil)
|
17
21
|
}
|
18
22
|
|
19
|
-
columns = task['columns']
|
20
|
-
|
23
|
+
columns = if task['columns']
|
24
|
+
task['columns'].each_with_index.map do |c, i|
|
25
|
+
Column.new(i, c["name"], c["type"].to_sym)
|
26
|
+
end
|
27
|
+
else
|
28
|
+
build_output_columns(task)
|
21
29
|
end
|
22
30
|
|
23
31
|
resume(task, columns, 1, &control)
|
@@ -30,14 +38,22 @@ module Embulk
|
|
30
38
|
return next_config_diff
|
31
39
|
end
|
32
40
|
|
41
|
+
def self.build_output_columns(task)
|
42
|
+
explain_query = "explain (FORMAT TEXT) " + task["query"]
|
43
|
+
Embulk.logger.debug("SQL: #{explain_query}")
|
44
|
+
explain_result = Connection.get_client(task).run("explain (FORMAT TEXT) " + task["query"])
|
45
|
+
|
46
|
+
columns = []
|
47
|
+
ExplainParser.parse(explain_result).each_with_index do |(name, type), i|
|
48
|
+
columns << Column.new(i, name, TypeConverter.get_type(type))
|
49
|
+
end
|
50
|
+
columns
|
51
|
+
end
|
52
|
+
|
33
53
|
def init
|
34
|
-
@client =
|
35
|
-
server: "#{task['host']}:#{task['port']}",
|
36
|
-
catalog: task['catalog'],
|
37
|
-
user: task['user'],
|
38
|
-
schema: task['schema']
|
39
|
-
)
|
54
|
+
@client = Connection.get_client(task)
|
40
55
|
@query = task["query"]
|
56
|
+
@type_converter = TypeConverter.new
|
41
57
|
|
42
58
|
Embulk.logger.info "SQL: #{@query}"
|
43
59
|
end
|
@@ -46,7 +62,7 @@ module Embulk
|
|
46
62
|
size = 0
|
47
63
|
@client.query(@query) do |q|
|
48
64
|
q.each_row {|row|
|
49
|
-
converted_values = row.map.with_index { |value,i| convert_value(value, schema[i]) }
|
65
|
+
converted_values = row.map.with_index { |value,i| @type_converter.convert_value(value, schema[i]) }
|
50
66
|
page_builder.add(converted_values)
|
51
67
|
}
|
52
68
|
size = q.rows.size
|
@@ -57,37 +73,6 @@ module Embulk
|
|
57
73
|
task_report = { size: size }
|
58
74
|
return task_report
|
59
75
|
end
|
60
|
-
|
61
|
-
def convert_value(value, field)
|
62
|
-
return nil if value.nil?
|
63
|
-
case field["type"]
|
64
|
-
when :string
|
65
|
-
value
|
66
|
-
when :long
|
67
|
-
value.to_i
|
68
|
-
when :double
|
69
|
-
value.to_f
|
70
|
-
when :boolean
|
71
|
-
if value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
72
|
-
value
|
73
|
-
else
|
74
|
-
downcased_val = value.downcase
|
75
|
-
case downcased_val
|
76
|
-
when 'true' then true
|
77
|
-
when 'false' then false
|
78
|
-
when '1' then true
|
79
|
-
when '0' then false
|
80
|
-
else nil
|
81
|
-
end
|
82
|
-
end
|
83
|
-
when :timestamp
|
84
|
-
Time.parse(value)
|
85
|
-
when :json
|
86
|
-
value
|
87
|
-
else
|
88
|
-
raise "Unsupported type #{field['type']}"
|
89
|
-
end
|
90
|
-
end
|
91
76
|
end
|
92
77
|
end
|
93
78
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Input
|
3
|
+
class Presto < InputPlugin
|
4
|
+
class Connection
|
5
|
+
def self.get_client(task)
|
6
|
+
::Presto::Client.new(
|
7
|
+
server: "#{task['host']}:#{task['port']}",
|
8
|
+
catalog: task['catalog'],
|
9
|
+
user: task['user'],
|
10
|
+
schema: task['schema']
|
11
|
+
)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Input
|
3
|
+
class Presto < InputPlugin
|
4
|
+
class ExplainParser
|
5
|
+
def self.parse(explain_result)
|
6
|
+
explain_text = explain_result.flatten.last.lines.first
|
7
|
+
column_name_raw, column_type_raw = explain_text.split(' => ')
|
8
|
+
names = column_name_raw.split('[').last.split(']').first.split(',').map{ |name| name.strip }
|
9
|
+
types = column_type_raw.split('[').last.split(']').first.split(',').map{ |info| info.split(':').last }
|
10
|
+
Hash[*names.zip(types).flatten]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Input
|
3
|
+
class Presto < InputPlugin
|
4
|
+
class TypeConverter
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
end
|
8
|
+
|
9
|
+
def convert_value(value, field)
|
10
|
+
return nil if value.nil?
|
11
|
+
case field["type"]
|
12
|
+
when :string
|
13
|
+
value
|
14
|
+
when :long
|
15
|
+
value.to_i
|
16
|
+
when :double
|
17
|
+
value.to_f
|
18
|
+
when :boolean
|
19
|
+
if value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
20
|
+
value
|
21
|
+
else
|
22
|
+
downcased_val = value.downcase
|
23
|
+
case downcased_val
|
24
|
+
when 'true' then true
|
25
|
+
when 'false' then false
|
26
|
+
when '1' then true
|
27
|
+
when '0' then false
|
28
|
+
else nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
when :timestamp
|
32
|
+
Time.parse(value)
|
33
|
+
when :json
|
34
|
+
value
|
35
|
+
else
|
36
|
+
raise "Unsupported type #{field['type']}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.get_type(type)
|
41
|
+
if type.start_with?("boolean")
|
42
|
+
:boolean
|
43
|
+
elsif type.start_with?("bigint")
|
44
|
+
:long
|
45
|
+
elsif type.start_with?("double")
|
46
|
+
:double
|
47
|
+
elsif type.start_with?("decimal")
|
48
|
+
:double
|
49
|
+
elsif type.start_with?("varchar")
|
50
|
+
:string
|
51
|
+
elsif type.start_with?("varbinary")
|
52
|
+
:string
|
53
|
+
elsif type.start_with?("json")
|
54
|
+
:json
|
55
|
+
elsif type.start_with?("date")
|
56
|
+
:timestamp
|
57
|
+
elsif type.start_with?("time")
|
58
|
+
:timestamp
|
59
|
+
elsif type.start_with?("time with time zone")
|
60
|
+
:timestamp
|
61
|
+
elsif type.start_with?("timestamp")
|
62
|
+
:timestamp
|
63
|
+
elsif type.start_with?("timestamp with time zone")
|
64
|
+
:timestamp
|
65
|
+
elsif type.start_with?("interval year to month")
|
66
|
+
:timestamp
|
67
|
+
elsif type.start_with?("interval day to second")
|
68
|
+
:timestamp
|
69
|
+
elsif type.start_with?("array")
|
70
|
+
:json
|
71
|
+
elsif type.start_with?("map")
|
72
|
+
:json
|
73
|
+
elsif type.start_with?("row")
|
74
|
+
:json
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-presto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,12 +89,16 @@ extra_rdoc_files: []
|
|
89
89
|
files:
|
90
90
|
- ".gitignore"
|
91
91
|
- ".ruby-version"
|
92
|
+
- ".travis.yml"
|
92
93
|
- Gemfile
|
93
94
|
- LICENSE.txt
|
94
95
|
- README.md
|
95
96
|
- Rakefile
|
96
97
|
- embulk-input-presto.gemspec
|
97
98
|
- lib/embulk/input/presto.rb
|
99
|
+
- lib/embulk/input/presto/connection.rb
|
100
|
+
- lib/embulk/input/presto/explain_parser.rb
|
101
|
+
- lib/embulk/input/presto/type_converter.rb
|
98
102
|
- test/helper.rb
|
99
103
|
- test/test_transaction.rb
|
100
104
|
homepage: https://github.com/toyama0919/embulk-input-presto
|