prestogres 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/ChangeLog +8 -0
- data/README.md +8 -7
- data/VERSION +1 -1
- data/config/pool_hba.conf +6 -6
- data/pgpool2/pool_proto_modules.c +27 -0
- data/pgpool2/pool_query_context.c +12 -48
- data/pgpool2/pool_rewrite_query.c +74 -0
- data/pgpool2/pool_rewrite_query.h +11 -0
- data/pgsql/prestogres.py +50 -23
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
!binary "U0hBMQ==":
|
|
3
3
|
metadata.gz: !binary |-
|
|
4
|
-
|
|
4
|
+
MDhmMGY1ZTM1NzgwODg0OWY2MGY4MjZlOWRkYTIxZDA1NDg0ZTEyMg==
|
|
5
5
|
data.tar.gz: !binary |-
|
|
6
|
-
|
|
6
|
+
YTYyOTUxYjQwM2MzZWE1MjA4NzAzYmNhMDVjZjNjYmNhZTJlMTRiZg==
|
|
7
7
|
SHA512:
|
|
8
8
|
metadata.gz: !binary |-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
ODYxY2FmYWIzYjllNDJmZGRhNTY0ZDM2MzA2NDNhNzUxOGRhMDc3ODgzNGUy
|
|
10
|
+
MjRhZTU0YTM3Y2VhMDFiMTc0NjdkOTlkMTI4NDRjNTBjNTdlNGYyNmZkMjc2
|
|
11
|
+
MjRjNDRmNzkzYjg2NzRiOTczMzNhYWUxZTNlNGFjZWRiZWRmN2M=
|
|
12
12
|
data.tar.gz: !binary |-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
YzhlYmVlMzM4NmVmY2U1ZWRiMzhhMTNiZTQwYTllMmIxNDYwOTE2NGQ4NmZh
|
|
14
|
+
MWM2ZTFmMzNmZjc5NzFmMWZhZTcyMzA4ZDFkOGYwODc0ZGMyMzFmYmVlYjcz
|
|
15
|
+
YzQxYTM5MmI2ZWMyM2Q5MzFjYTM0MDVmOWRjNjlhY2JhZWQwOGM=
|
data/ChangeLog
CHANGED
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
|
|
2
|
+
2014-06-25 version 0.4.6:
|
|
3
|
+
|
|
4
|
+
* Supports python-2.6 and CentOS 6 (@wyukawa++)
|
|
5
|
+
* Ignores "invalid session state" error which was causing exception before
|
|
6
|
+
* Ignores BEGIN and COMMIT commands which were causing exception before
|
|
7
|
+
* Fixed error handling with protocol version 2 to send message back to clients
|
|
8
|
+
|
|
9
|
+
|
|
2
10
|
2014-06-11 version 0.4.5:
|
|
3
11
|
|
|
4
12
|
* Added prestogres_trust auth method
|
data/README.md
CHANGED
|
@@ -52,8 +52,9 @@ In fact there're some other tricks. See [pgsql/prestogres.py](pgsql/prestogres.p
|
|
|
52
52
|
|
|
53
53
|
* Extended query is not supported ([PostgreSQL Frontend/Backend Protocol](http://www.postgresql.org/docs/9.3/static/protocol.html))
|
|
54
54
|
* ODBC driver needs to set:
|
|
55
|
-
* **
|
|
56
|
-
* **
|
|
55
|
+
* **Server side prepare = no** property (UseServerSidePrepare=0 at .ini file)
|
|
56
|
+
* **Use Declare/Fetch = no** property (UseDeclareFetch=0 at .ini file)
|
|
57
|
+
* **Level of rollback on errors = Nop** property (Protocol=7.4-0 or Protocol=6.4 at .ini file)
|
|
57
58
|
* **Unicode** mode
|
|
58
59
|
* JDBC driver needs to set:
|
|
59
60
|
* **protocolVersion=2** property
|
|
@@ -86,7 +87,7 @@ sudo apt-get install ruby ruby-dev
|
|
|
86
87
|
# add yum source
|
|
87
88
|
sudo yum install http://yum.postgresql.org/9.3/redhat/rhel-6-x86_64/pgdg-redhat93-9.3-1.noarch.rpm
|
|
88
89
|
# install PostgreSQL
|
|
89
|
-
sudo yum install postgresql93-server postgresql93-contrib postgresql93-devel
|
|
90
|
+
sudo yum install postgresql93-server postgresql93-contrib postgresql93-devel postgresql93-plpython
|
|
90
91
|
# install other dependencies
|
|
91
92
|
sudo yum install gcc make openssl-devel pcre-devel
|
|
92
93
|
sudo yum install ruby ruby-devel
|
|
@@ -175,14 +176,14 @@ Please read [pgpool-II documentation](http://www.pgpool.net/docs/latest/pgpool-e
|
|
|
175
176
|
Following parameters are unique to Prestogres:
|
|
176
177
|
|
|
177
178
|
* **presto_server**: Default address:port of Presto server.
|
|
178
|
-
* **presto_catalog**: Default catalog
|
|
179
|
+
* **presto_catalog**: Default catalog name of Presto such as `hive`, etc.
|
|
179
180
|
* **presto_external_auth_prog**: Default path to an external authentication program used by `prestogres_external` authentication moethd. See following Authentication section for details.
|
|
180
181
|
|
|
181
182
|
You can overwrite these parameters for each connecting users. See also following *pool_hba.conf* section.
|
|
182
183
|
|
|
183
184
|
### pool_hba.conf file
|
|
184
185
|
|
|
185
|
-
By default configuration, Prestogres accepts all connections from localhost without password and rejects any other connections. You can change this behavior by updating **\<data_dir\>/
|
|
186
|
+
By default configuration, Prestogres accepts all connections from localhost without password and rejects any other connections. You can change this behavior by updating **\<data_dir\>/pgpool/pool_hba.conf** file.
|
|
186
187
|
|
|
187
188
|
See [sample pool_hba.conf file](https://github.com/treasure-data/prestogres/blob/master/config/pool_hba.conf) for details. Basic syntax is:
|
|
188
189
|
|
|
@@ -196,7 +197,7 @@ host all all 0.0.0.0/0 prestogres_external p
|
|
|
196
197
|
|
|
197
198
|
#### prestogres_md5 method
|
|
198
199
|
|
|
199
|
-
This authentication method uses a password file **\<data_dir\>/
|
|
200
|
+
This authentication method uses a password file **\<data_dir\>/pgpool/pool_passwd** to authenticate an user. You can use `prestogres passwd` command to add an user to this file:
|
|
200
201
|
|
|
201
202
|
```sh
|
|
202
203
|
$ prestogres -D pgdata passwd myuser
|
|
@@ -206,7 +207,7 @@ password: (enter password here)
|
|
|
206
207
|
In pool_hba.conf file, you can set following options to OPTIONS field:
|
|
207
208
|
|
|
208
209
|
* **server**: Address:port of Presto server, which overwrites `presto_servers` parameter in pgpool.conf.
|
|
209
|
-
* **catalog**: Catalog
|
|
210
|
+
* **catalog**: Catalog name of Presto, which overwrites `presto_catalog` parameter in pgpool.conf.
|
|
210
211
|
* **schema**: Default schema name of Presto. By default, Prestogres uses the same name with the database name used to login to pgpool-II. Following `pg_database` parameter doesn't overwrite affect this parameter.
|
|
211
212
|
* **user**: User name to run queries on Presto. By default, Prestogres uses the same user name used to login to pgpool-II. Following `pg_user` parameter doesn't overwrite affect this parameter.
|
|
212
213
|
* **pg_database**: Overwrite database to connect to PostgreSQL. The value should be `postgres` in most of cases.
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.4.
|
|
1
|
+
0.4.6
|
data/config/pool_hba.conf
CHANGED
|
@@ -60,25 +60,25 @@
|
|
|
60
60
|
# TYPE DATABASE USER CIDR-ADDRESS METHOD OPTIONS
|
|
61
61
|
|
|
62
62
|
# "local" is for Unix domain socket connections only
|
|
63
|
-
local all all prestogres_trust pg_database:postgres
|
|
63
|
+
local all all prestogres_trust pg_database:postgres,pg_user:pg
|
|
64
64
|
|
|
65
65
|
# IPv4 local connections:
|
|
66
|
-
host all all 127.0.0.1/32 prestogres_trust pg_database:postgres
|
|
66
|
+
host all all 127.0.0.1/32 prestogres_trust pg_database:postgres,pg_user:pg
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
# Examples
|
|
70
70
|
# ----------------------------------
|
|
71
71
|
#
|
|
72
72
|
## Trust all connection from localhost:
|
|
73
|
-
#host all all 127.0.0.1/32 prestogres_trust pg_database:postgres
|
|
73
|
+
#host all all 127.0.0.1/32 prestogres_trust pg_database:postgres,pg_user:pg
|
|
74
74
|
#
|
|
75
75
|
## MD5 authorization using pool_passwd file:
|
|
76
|
-
#host all all 0.0.0.0/0 prestogres_md5 pg_database:postgres
|
|
76
|
+
#host all all 0.0.0.0/0 prestogres_md5 pg_database:postgres,pg_user:pg
|
|
77
77
|
#
|
|
78
78
|
## MD5 authorization and overwrites presto_server and
|
|
79
79
|
## presto_catalog parameters in pgpool.config:
|
|
80
|
-
#host all all 0.0.0.0/0 prestogres_md5 server:localhost:8190,catalog:hive
|
|
80
|
+
#host all all 0.0.0.0/0 prestogres_md5 pg_database:postgres,pg_user:pg,server:localhost:8190,catalog:hive
|
|
81
81
|
#
|
|
82
82
|
## Authorization using an external program:
|
|
83
|
-
#host all all 0.0.0.0/0 prestogres_external auth_prog:/path/to/prog
|
|
83
|
+
#host all all 0.0.0.0/0 prestogres_external pg_database:postgres,pg_user:pg,auth_prog:/path/to/prog
|
|
84
84
|
#
|
|
@@ -96,6 +96,30 @@ static int extract_ntuples(char *message);
|
|
|
96
96
|
static POOL_STATUS close_standby_transactions(POOL_CONNECTION *frontend,
|
|
97
97
|
POOL_CONNECTION_POOL *backend);
|
|
98
98
|
|
|
99
|
+
#define SQL_SPACE_PATTERN "(?:(?:--[^\\n]*\\n)|\\s)*"
|
|
100
|
+
#define SQL_REMOVE_BEGIN_AND_COMMIT_PATTERN \
|
|
101
|
+
"(?:\\A" SQL_SPACE_PATTERN "begin" SQL_SPACE_PATTERN "(?:;|" SQL_SPACE_PATTERN "\\z))?" SQL_SPACE_PATTERN \
|
|
102
|
+
"(" \
|
|
103
|
+
"(?:(?!commit" SQL_SPACE_PATTERN ";?" SQL_SPACE_PATTERN "\\z).)*" \
|
|
104
|
+
")"
|
|
105
|
+
|
|
106
|
+
#define DO_NOTHING_SQL "RESET geqo;"
|
|
107
|
+
|
|
108
|
+
static pool_regexp_context REMOVE_BEGIN_AND_COMMIT_REGEXP = {0};
|
|
109
|
+
|
|
110
|
+
static int remove_begin_and_commit(char* contents)
|
|
111
|
+
{
|
|
112
|
+
int len;
|
|
113
|
+
pool_regexp_extract(SQL_REMOVE_BEGIN_AND_COMMIT_PATTERN, &REMOVE_BEGIN_AND_COMMIT_REGEXP, contents, 1);
|
|
114
|
+
len = strlen(contents);
|
|
115
|
+
if (len == 0) {
|
|
116
|
+
strcpy(contents, DO_NOTHING_SQL);
|
|
117
|
+
len = strlen(contents);
|
|
118
|
+
}
|
|
119
|
+
pool_debug("prestogres rewrite statement: '%s'", contents);
|
|
120
|
+
return len + 1;
|
|
121
|
+
}
|
|
122
|
+
|
|
99
123
|
/*
|
|
100
124
|
* Process Query('Q') message
|
|
101
125
|
* Query messages include an SQL string.
|
|
@@ -146,6 +170,9 @@ POOL_STATUS SimpleQuery(POOL_CONNECTION *frontend,
|
|
|
146
170
|
pool_debug("statement2: %s", contents);
|
|
147
171
|
}
|
|
148
172
|
|
|
173
|
+
/* Prestogres removes BEGIN and COMMIT */
|
|
174
|
+
len = remove_begin_and_commit(contents);
|
|
175
|
+
|
|
149
176
|
/*
|
|
150
177
|
* Fetch memory cache if possible
|
|
151
178
|
*/
|
|
@@ -27,12 +27,12 @@
|
|
|
27
27
|
#include "pool_query_context.h"
|
|
28
28
|
#include "pool_select_walker.h"
|
|
29
29
|
#include "parser/nodes.h"
|
|
30
|
+
#include "pool_rewrite_query.h"
|
|
30
31
|
|
|
31
32
|
#include <string.h>
|
|
32
33
|
#include <netinet/in.h>
|
|
33
34
|
#include <stdlib.h>
|
|
34
35
|
|
|
35
|
-
#include <pcre.h>
|
|
36
36
|
|
|
37
37
|
/*
|
|
38
38
|
* Where to send query
|
|
@@ -415,7 +415,7 @@ static void rewrite_error_query(POOL_QUERY_CONTEXT* query_context, char *message
|
|
|
415
415
|
}
|
|
416
416
|
|
|
417
417
|
if (errcode == NULL) {
|
|
418
|
-
|
|
418
|
+
errcode = "XX000"; /* Internal Error */
|
|
419
419
|
}
|
|
420
420
|
|
|
421
421
|
if (sizeof(rewrite_query_string_buffer) < strlen(message) + static_length) {
|
|
@@ -520,54 +520,16 @@ static void run_and_rewrite_system_catalog_query(POOL_SESSION_CONTEXT* session_c
|
|
|
520
520
|
do_replace_query(query_context, rewrite_query_string_buffer);
|
|
521
521
|
}
|
|
522
522
|
|
|
523
|
-
typedef struct {
|
|
524
|
-
const char* errptr;
|
|
525
|
-
int erroffset;
|
|
526
|
-
pcre* pattern;
|
|
527
|
-
} regexp_context;
|
|
528
|
-
|
|
529
|
-
static bool regexp_match(const char* regexp, regexp_context* context, const char* string)
|
|
530
|
-
{
|
|
531
|
-
int ret;
|
|
532
|
-
int ovec[10];
|
|
533
|
-
|
|
534
|
-
if (context->errptr != NULL) {
|
|
535
|
-
return false;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
if (context->pattern == NULL) {
|
|
539
|
-
pcre* pattern;
|
|
540
|
-
pattern = pcre_compile(regexp, PCRE_CASELESS | PCRE_NO_AUTO_CAPTURE | PCRE_UTF8,
|
|
541
|
-
&context->errptr, &context->erroffset, NULL);
|
|
542
|
-
if (pattern == NULL) {
|
|
543
|
-
pool_error("regexp_match: invalid regexp %s at %d", context->errptr, context->erroffset);
|
|
544
|
-
return false;
|
|
545
|
-
}
|
|
546
|
-
context->pattern = pattern;
|
|
547
|
-
context->errptr = NULL;
|
|
548
|
-
|
|
549
|
-
// TODO pcre_study?
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
ret = pcre_exec(context->pattern, NULL, string, strlen(string), 0, 0, ovec, sizeof(ovec));
|
|
553
|
-
if (ret < 0) {
|
|
554
|
-
// error. pattern didn't match in most of cases
|
|
555
|
-
return false;
|
|
556
|
-
}
|
|
557
|
-
|
|
558
|
-
return true;
|
|
559
|
-
}
|
|
560
|
-
|
|
561
523
|
/*
|
|
562
524
|
* /\A(?!.*select).*\z/i
|
|
563
525
|
*/
|
|
564
526
|
#define LIKELY_PARSE_ERROR "\\A(?!.*select).*\\z"
|
|
565
527
|
|
|
566
|
-
static
|
|
528
|
+
static pool_regexp_context LIKELY_PARSE_ERROR_REGEXP = {0};
|
|
567
529
|
|
|
568
530
|
static bool match_likely_parse_error(const char* query)
|
|
569
531
|
{
|
|
570
|
-
return
|
|
532
|
+
return pool_regexp_match(LIKELY_PARSE_ERROR, &LIKELY_PARSE_ERROR_REGEXP, query);
|
|
571
533
|
}
|
|
572
534
|
|
|
573
535
|
/*
|
|
@@ -575,11 +537,11 @@ static bool match_likely_parse_error(const char* query)
|
|
|
575
537
|
*/
|
|
576
538
|
#define AUTO_LIMIT_QUERY_PATTERN "\\A\\s*select\\s*\\*\\s*from\\s+((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*)(\\.((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*))?\\s*(;|\\z)"
|
|
577
539
|
|
|
578
|
-
static
|
|
540
|
+
static pool_regexp_context AUTO_LIMIT_REGEXP = {0};
|
|
579
541
|
|
|
580
542
|
static bool match_auto_limit_pattern(const char* query)
|
|
581
543
|
{
|
|
582
|
-
return
|
|
544
|
+
return pool_regexp_match(AUTO_LIMIT_QUERY_PATTERN, &AUTO_LIMIT_REGEXP, query);
|
|
583
545
|
}
|
|
584
546
|
|
|
585
547
|
static void run_and_rewrite_presto_query(POOL_SESSION_CONTEXT* session_context, POOL_QUERY_CONTEXT* query_context)
|
|
@@ -798,10 +760,12 @@ void pool_where_to_send(POOL_QUERY_CONTEXT *query_context, char *query, Node *no
|
|
|
798
760
|
* transaction isolation level is not SERIALIZABLE)
|
|
799
761
|
* we might be able to load balance.
|
|
800
762
|
*/
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
763
|
+
/*
|
|
764
|
+
* Prestogres assumes that any transactions can not write data,
|
|
765
|
+
* and runs queries as usual even if the transaction is failed or
|
|
766
|
+
* SERIALIZABLE or writing transaction.
|
|
767
|
+
*/
|
|
768
|
+
if (1)
|
|
805
769
|
{
|
|
806
770
|
BackendInfo *bkinfo = pool_get_node_info(session_context->load_balance_node_id);
|
|
807
771
|
|
|
@@ -713,3 +713,77 @@ POOL_STATUS pool_do_parallel_query(POOL_CONNECTION *frontend,
|
|
|
713
713
|
*parallel = false;
|
|
714
714
|
return POOL_CONTINUE;
|
|
715
715
|
}
|
|
716
|
+
|
|
717
|
+
bool pool_regexp_match(const char* regexp, pool_regexp_context* context, const char* string)
|
|
718
|
+
{
|
|
719
|
+
int ret;
|
|
720
|
+
int ovec[10];
|
|
721
|
+
|
|
722
|
+
if (context->errptr != NULL) {
|
|
723
|
+
return false;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
if (context->pattern == NULL) {
|
|
727
|
+
pcre* pattern;
|
|
728
|
+
pattern = pcre_compile(regexp, PCRE_CASELESS | PCRE_NO_AUTO_CAPTURE | PCRE_UTF8,
|
|
729
|
+
&context->errptr, &context->erroffset, NULL);
|
|
730
|
+
if (pattern == NULL) {
|
|
731
|
+
pool_error("regexp_match: invalid regexp %s at %d", context->errptr, context->erroffset);
|
|
732
|
+
return false;
|
|
733
|
+
}
|
|
734
|
+
context->pattern = pattern;
|
|
735
|
+
context->errptr = NULL;
|
|
736
|
+
|
|
737
|
+
// TODO pcre_study?
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
ret = pcre_exec(context->pattern, NULL, string, strlen(string), 0, 0, ovec, sizeof(ovec));
|
|
741
|
+
if (ret < 0) {
|
|
742
|
+
// error. pattern didn't match in most of cases
|
|
743
|
+
return false;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
return true;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
bool pool_regexp_extract(const char* regexp, pool_regexp_context* context, char* string, int number)
|
|
750
|
+
{
|
|
751
|
+
int ret;
|
|
752
|
+
int ovec[10];
|
|
753
|
+
const char* pos;
|
|
754
|
+
|
|
755
|
+
if (context->errptr != NULL) {
|
|
756
|
+
return false;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
if (context->pattern == NULL) {
|
|
760
|
+
pcre* pattern;
|
|
761
|
+
pattern = pcre_compile(regexp, PCRE_CASELESS | PCRE_UTF8,
|
|
762
|
+
&context->errptr, &context->erroffset, NULL);
|
|
763
|
+
if (pattern == NULL) {
|
|
764
|
+
pool_error("regexp_match: invalid regexp %s at %d", context->errptr, context->erroffset);
|
|
765
|
+
return false;
|
|
766
|
+
}
|
|
767
|
+
context->pattern = pattern;
|
|
768
|
+
context->errptr = NULL;
|
|
769
|
+
|
|
770
|
+
// TODO pcre_study?
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
ret = pcre_exec(context->pattern, NULL, string, strlen(string), 0, 0, ovec, sizeof(ovec));
|
|
774
|
+
if (ret < 0) {
|
|
775
|
+
// error. pattern didn't match in most of cases
|
|
776
|
+
return false;
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
ret = pcre_get_substring(string, ovec, ret, number, &pos);
|
|
780
|
+
if (ret < 0) {
|
|
781
|
+
// number-th group does not match
|
|
782
|
+
return false;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
strlcpy(string, pos, ret+1);
|
|
786
|
+
pcre_free_substring(pos);
|
|
787
|
+
return true;
|
|
788
|
+
}
|
|
789
|
+
|
|
@@ -32,6 +32,8 @@
|
|
|
32
32
|
#include "parser/pool_memory.h"
|
|
33
33
|
#include "parser/pool_string.h"
|
|
34
34
|
|
|
35
|
+
#include <pcre.h>
|
|
36
|
+
|
|
35
37
|
/* return code set */
|
|
36
38
|
#define INSERT_SQL_RESTRICTION 1
|
|
37
39
|
#define SELECT_INIT 2
|
|
@@ -188,5 +190,14 @@ POOL_STATUS pool_do_parallel_query(POOL_CONNECTION *frontend,
|
|
|
188
190
|
POOL_CONNECTION_POOL *backend,
|
|
189
191
|
Node *node, bool *parallel, char **string, int *len);
|
|
190
192
|
|
|
193
|
+
typedef struct {
|
|
194
|
+
const char* errptr;
|
|
195
|
+
int erroffset;
|
|
196
|
+
pcre* pattern;
|
|
197
|
+
} pool_regexp_context;
|
|
198
|
+
|
|
199
|
+
bool pool_regexp_match(const char* regexp, pool_regexp_context* context, const char* string);
|
|
200
|
+
bool pool_regexp_extract(const char* regexp, pool_regexp_context* context, char* string, int number);
|
|
201
|
+
|
|
191
202
|
#endif /* POOL_REWRITE_QUERY_H */
|
|
192
203
|
|
data/pgsql/prestogres.py
CHANGED
|
@@ -31,6 +31,21 @@ def _pg_table_type(presto_type):
|
|
|
31
31
|
# assuming Presto and PostgreSQL use the same SQL standard name
|
|
32
32
|
return presto_type
|
|
33
33
|
|
|
34
|
+
# queries can include same column name twice but tables can't.
|
|
35
|
+
def _rename_duplicated_column_names(column_names):
|
|
36
|
+
renamed = []
|
|
37
|
+
used_names = set()
|
|
38
|
+
for original_name in column_names:
|
|
39
|
+
name = original_name
|
|
40
|
+
while name in used_names:
|
|
41
|
+
name += "_"
|
|
42
|
+
if name != original_name:
|
|
43
|
+
plpy.warning("Result column %s is renamed to %s because the name appears twice in a query result" % \
|
|
44
|
+
(plpy.quote_ident(original_name), plpy.quote_ident(name)))
|
|
45
|
+
used_names.add(name)
|
|
46
|
+
renamed.append(name)
|
|
47
|
+
return renamed
|
|
48
|
+
|
|
34
49
|
# build CREATE TEMPORARY TABLE statement
|
|
35
50
|
def _build_create_temp_table_sql(table_name, column_names, column_types):
|
|
36
51
|
create_sql = ["create temporary table %s (\n " % plpy.quote_ident(table_name)]
|
|
@@ -68,7 +83,7 @@ def _build_alter_table_holder_sql(schema_name, table_name, column_names, column_
|
|
|
68
83
|
return ''.join(alter_sql)
|
|
69
84
|
|
|
70
85
|
# build INSERT INTO statement and string format to build VALUES (..), ...
|
|
71
|
-
def _build_insert_into_sql(table_name, column_names
|
|
86
|
+
def _build_insert_into_sql(table_name, column_names):
|
|
72
87
|
# INSERT INTO table_name (column_name, column_name, ...)
|
|
73
88
|
insert_sql = ["insert into %s (\n " % plpy.quote_ident(table_name)]
|
|
74
89
|
|
|
@@ -83,32 +98,38 @@ def _build_insert_into_sql(table_name, column_names, column_types):
|
|
|
83
98
|
|
|
84
99
|
insert_sql.append("\n) values\n")
|
|
85
100
|
|
|
86
|
-
|
|
87
|
-
|
|
101
|
+
return ''.join(insert_sql)
|
|
102
|
+
|
|
103
|
+
# create a prepared statement for batch INSERT
|
|
104
|
+
def _plan_batch(insert_sql, column_types, batch_size):
|
|
105
|
+
index = 1 # PostgreSQL's place holder begins from 1
|
|
88
106
|
|
|
107
|
+
# append value list (...), (...), ... at the end of insert_sql
|
|
108
|
+
batch_isnert_sql = [insert_sql]
|
|
89
109
|
first = True
|
|
90
|
-
for
|
|
110
|
+
for i in range(batch_size):
|
|
91
111
|
if first:
|
|
92
112
|
first = False
|
|
93
113
|
else:
|
|
94
|
-
|
|
114
|
+
batch_isnert_sql.append(", ")
|
|
95
115
|
|
|
96
|
-
|
|
97
|
-
|
|
116
|
+
# ($1::column_type, $2::column_type, ...)
|
|
117
|
+
batch_isnert_sql.append("(")
|
|
118
|
+
value_first = True
|
|
119
|
+
for column_type in column_types:
|
|
120
|
+
if value_first:
|
|
121
|
+
value_first = False
|
|
122
|
+
else:
|
|
123
|
+
batch_isnert_sql.append(", ")
|
|
98
124
|
|
|
99
|
-
|
|
125
|
+
batch_isnert_sql.append("$%s::%s" % (index, column_type))
|
|
126
|
+
index += 1
|
|
127
|
+
batch_isnert_sql.append(")")
|
|
100
128
|
|
|
101
|
-
return (''.join(
|
|
102
|
-
|
|
103
|
-
# create a prepared statement for batch INSERT
|
|
104
|
-
def _plan_batch(insert_sql, values_sql_format, column_types, batch_size):
|
|
105
|
-
# format string 'values ($1, $2), ($3, $4) ...'
|
|
106
|
-
values_sql = (", ".join([values_sql_format] * batch_size)).format(*range(1, batch_size * len(column_types) + 1))
|
|
107
|
-
batch_insert_sql = insert_sql + values_sql
|
|
108
|
-
return plpy.prepare(batch_insert_sql, column_types * batch_size)
|
|
129
|
+
return plpy.prepare(''.join(batch_isnert_sql), column_types * batch_size)
|
|
109
130
|
|
|
110
131
|
# run batch INSERT
|
|
111
|
-
def _batch_insert(insert_sql,
|
|
132
|
+
def _batch_insert(insert_sql, batch_size, column_types, rows):
|
|
112
133
|
full_batch_plan = None
|
|
113
134
|
|
|
114
135
|
batch = []
|
|
@@ -117,12 +138,12 @@ def _batch_insert(insert_sql, values_sql_format, batch_size, column_types, rows)
|
|
|
117
138
|
batch_len = len(batch)
|
|
118
139
|
if batch_len >= batch_size:
|
|
119
140
|
if full_batch_plan is None:
|
|
120
|
-
full_batch_plan = _plan_batch(insert_sql,
|
|
141
|
+
full_batch_plan = _plan_batch(insert_sql, column_types, batch_len)
|
|
121
142
|
plpy.execute(full_batch_plan, [item for sublist in batch for item in sublist])
|
|
122
143
|
del batch[:]
|
|
123
144
|
|
|
124
145
|
if batch:
|
|
125
|
-
plan = _plan_batch(insert_sql,
|
|
146
|
+
plan = _plan_batch(insert_sql, column_types, len(batch))
|
|
126
147
|
plpy.execute(plan, [item for sublist in batch for item in sublist])
|
|
127
148
|
|
|
128
149
|
class SchemaCache(object):
|
|
@@ -192,15 +213,16 @@ def run_presto_as_temp_table(server, user, catalog, schema, result_table, query)
|
|
|
192
213
|
column_types.append(_pg_result_type(column.type))
|
|
193
214
|
|
|
194
215
|
# build SQL
|
|
216
|
+
column_names = _rename_duplicated_column_names(column_names)
|
|
195
217
|
create_sql = _build_create_temp_table_sql(result_table, column_names, column_types)
|
|
196
|
-
insert_sql
|
|
218
|
+
insert_sql = _build_insert_into_sql(result_table, column_names)
|
|
197
219
|
|
|
198
220
|
# run CREATE TABLE
|
|
199
221
|
plpy.execute("drop table if exists " + plpy.quote_ident(result_table))
|
|
200
222
|
plpy.execute(create_sql)
|
|
201
223
|
|
|
202
224
|
# run INSERT
|
|
203
|
-
_batch_insert(insert_sql,
|
|
225
|
+
_batch_insert(insert_sql, 10, column_types, q.results())
|
|
204
226
|
finally:
|
|
205
227
|
q.close()
|
|
206
228
|
|
|
@@ -346,6 +368,10 @@ def run_system_catalog_as_temp_table(server, user, catalog, schema, result_table
|
|
|
346
368
|
for row in plpy.cursor(sql):
|
|
347
369
|
plpy.execute("drop schema %s" % plpy.quote_ident(row["schema_name"]))
|
|
348
370
|
|
|
371
|
+
# update pg_database
|
|
372
|
+
plan = plpy.prepare("update pg_database set datname=$1 where datname=current_database()", ['name'])
|
|
373
|
+
plpy.execute(plan, [schema])
|
|
374
|
+
|
|
349
375
|
# run the actual query and save result
|
|
350
376
|
metadata = plpy.execute(query)
|
|
351
377
|
column_names = metadata.colnames()
|
|
@@ -363,13 +389,14 @@ def run_system_catalog_as_temp_table(server, user, catalog, schema, result_table
|
|
|
363
389
|
# rollback subtransaction
|
|
364
390
|
subxact.exit("rollback subtransaction", None, None)
|
|
365
391
|
|
|
392
|
+
column_names = _rename_duplicated_column_names(column_names)
|
|
366
393
|
create_sql = _build_create_temp_table_sql(result_table, column_names, column_types)
|
|
367
|
-
insert_sql
|
|
394
|
+
insert_sql = _build_insert_into_sql(result_table, column_names)
|
|
368
395
|
|
|
369
396
|
# run CREATE TABLE and INSERT
|
|
370
397
|
plpy.execute("drop table if exists " + plpy.quote_ident(result_table))
|
|
371
398
|
plpy.execute(create_sql)
|
|
372
|
-
_batch_insert(insert_sql,
|
|
399
|
+
_batch_insert(insert_sql, 10, column_types, result)
|
|
373
400
|
|
|
374
401
|
except (plpy.SPIError, presto_client.PrestoException) as e:
|
|
375
402
|
# Set __module__ = "__module__" to generate pretty messages.
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: prestogres
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sadayuki Furuhashi
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-06-
|
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -484,3 +484,4 @@ signing_key:
|
|
|
484
484
|
specification_version: 4
|
|
485
485
|
summary: Presto PostgreSQL protocol gateway
|
|
486
486
|
test_files: []
|
|
487
|
+
has_rdoc: false
|