prestogres 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -1,4 +1,13 @@
1
1
 
2
+ 2014-03-19 version 0.4.2:
3
+
4
+ * Send queries to PostgreSQL if they most likely cause parse erorr because
5
+ Presto doesn't return appropriate error code (42601 SYNTAX ERROR).
6
+ * Cast Presto query results to PostgreSQL types explicitly when it inserts
7
+ themn to a temporary table to not cause "value too long for type character
8
+ varying(255)" exceptions
9
+
10
+
2
11
  2014-03-05 version 0.4.1:
3
12
 
4
13
  * Send queries to Presto if they include Presto's SQL syntax extension
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.1
1
+ 0.4.2
@@ -508,40 +508,68 @@ static void run_and_rewrite_system_catalog_query(POOL_SESSION_CONTEXT* session_c
508
508
  do_replace_query(query_context, rewrite_query_string_buffer);
509
509
  }
510
510
 
511
- /*
512
- * /\A\s*select\s*\*\s*from\s+(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*)(\.(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*))?\s*(;|\z)/.to_s
513
- */
514
- #define AUTO_LIMIT_QUERY_PATTERN "\\A\\s*select\\s*\\*\\s*from\\s+((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*)(\\.((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*))?\\s*(;|\\z)"
515
-
516
- static bool match_auto_limit_pattern(const char* query)
517
- {
511
+ typedef struct {
518
512
  const char* errptr;
519
513
  int erroffset;
520
514
  pcre* pattern;
515
+ } regexp_context;
516
+
517
+ static bool regexp_match(const char* regexp, regexp_context* context, const char* string)
518
+ {
521
519
  int ret;
522
520
  int ovec[10];
523
521
 
524
- pattern = pcre_compile(AUTO_LIMIT_QUERY_PATTERN,
525
- PCRE_CASELESS | PCRE_NO_AUTO_CAPTURE | PCRE_UTF8, &errptr, &erroffset, NULL);
526
- if (pattern == NULL) {
527
- // TODO pcre pattern should be precompiled. see also pcre_study.
528
- pool_error("match_auto_limit_pattern: invalid regexp %s at %d", errptr, erroffset);
529
- pcre_free(pattern);
522
+ if (context->errptr != NULL) {
530
523
  return false;
531
524
  }
532
525
 
533
- ret = pcre_exec(pattern, NULL, query, strlen(query), 0, 0, &ovec, sizeof(ovec));
526
+ if (context->pattern == NULL) {
527
+ pcre* pattern;
528
+ pattern = pcre_compile(regexp, PCRE_CASELESS | PCRE_NO_AUTO_CAPTURE | PCRE_UTF8,
529
+ &context->errptr, &context->erroffset, NULL);
530
+ if (pattern == NULL) {
531
+ pool_error("regexp_match: invalid regexp %s at %d", context->errptr, context->erroffset);
532
+ return false;
533
+ }
534
+ context->pattern = pattern;
535
+ context->errptr = NULL;
536
+
537
+ // TODO pcre_study?
538
+ }
539
+
540
+ ret = pcre_exec(context->pattern, NULL, string, strlen(string), 0, 0, ovec, sizeof(ovec));
534
541
  if (ret < 0) {
535
542
  // error. pattern didn't match in most of cases
536
- pcre_free(pattern);
537
543
  return false;
538
544
  }
539
545
 
540
- pcre_free(pattern);
541
-
542
546
  return true;
543
547
  }
544
548
 
549
+ /*
550
+ * /\A(?!.*select).*\z/i
551
+ */
552
+ #define LIKELY_PARSE_ERROR "\\A(?!.*select).*\\z"
553
+
554
+ static regexp_context LIKELY_PARSE_ERROR_REGEXP = {0};
555
+
556
+ static bool match_likely_parse_error(const char* query)
557
+ {
558
+ return regexp_match(LIKELY_PARSE_ERROR, &LIKELY_PARSE_ERROR_REGEXP, query);
559
+ }
560
+
561
+ /*
562
+ * /\A\s*select\s*\*\s*from\s+(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*)(\.(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*))?\s*(;|\z)/i
563
+ */
564
+ #define AUTO_LIMIT_QUERY_PATTERN "\\A\\s*select\\s*\\*\\s*from\\s+((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*)(\\.((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*))?\\s*(;|\\z)"
565
+
566
+ static regexp_context AUTO_LIMIT_REGEXP = {0};
567
+
568
+ static bool match_auto_limit_pattern(const char* query)
569
+ {
570
+ return regexp_match(AUTO_LIMIT_QUERY_PATTERN, &AUTO_LIMIT_REGEXP, query);
571
+ }
572
+
545
573
  static void run_and_rewrite_presto_query(POOL_SESSION_CONTEXT* session_context, POOL_QUERY_CONTEXT* query_context)
546
574
  {
547
575
  char *buffer, *bufend;
@@ -721,7 +749,7 @@ void pool_where_to_send(POOL_QUERY_CONTEXT *query_context, char *query, Node *no
721
749
  * If failed to parse the query, run it on Presto because
722
750
  * it may include Presto's SQL syntax extensions.
723
751
  */
724
- if (query_context->is_parse_error)
752
+ if (query_context->is_parse_error && !match_likely_parse_error(query_context->original_query))
725
753
  {
726
754
  pool_debug("prestogres: send_to_where: parse-error");
727
755
  pool_set_node_to_be_sent(query_context,
@@ -31,58 +31,72 @@ def _pg_table_type(presto_type):
31
31
 
32
32
  # build CREATE TEMPORARY TABLE statement
33
33
  def _build_create_temp_table_sql(table_name, column_names, column_types):
34
- create_sql = "create temporary table %s (\n " % plpy.quote_ident(table_name)
34
+ create_sql = ["create temporary table %s (\n " % plpy.quote_ident(table_name)]
35
35
 
36
36
  first = True
37
37
  for column_name, column_type in zip(column_names, column_types):
38
38
  if first:
39
39
  first = False
40
40
  else:
41
- create_sql += ",\n "
41
+ create_sql.append(",\n ")
42
42
 
43
- create_sql += plpy.quote_ident(column_name)
44
- create_sql += " "
45
- create_sql += column_type
43
+ create_sql.append(plpy.quote_ident(column_name))
44
+ create_sql.append(" ")
45
+ create_sql.append(column_type)
46
46
 
47
- create_sql += "\n)"
48
- return create_sql
47
+ create_sql.append("\n)")
48
+ return ''.join(create_sql)
49
49
 
50
50
  # build CREATE TABLE statement
51
51
  def _build_alter_table_holder_sql(schema_name, table_name, column_names, column_types, not_nulls):
52
- alter_sql = "alter table %s.%s \n " % (plpy.quote_ident(schema_name), plpy.quote_ident(table_name))
52
+ alter_sql = ["alter table %s.%s \n " % (plpy.quote_ident(schema_name), plpy.quote_ident(table_name))]
53
53
 
54
54
  first = True
55
55
  for column_name, column_type, not_null in zip(column_names, column_types, not_nulls):
56
56
  if first:
57
57
  first = False
58
58
  else:
59
- alter_sql += ",\n "
59
+ alter_sql.append(",\n ")
60
60
 
61
- alter_sql += "add %s %s" % (plpy.quote_ident(column_name), column_type)
61
+ alter_sql.append("add %s %s" % (plpy.quote_ident(column_name), column_type))
62
62
 
63
63
  if not_null:
64
- alter_sql += " not null"
64
+ alter_sql.append(" not null")
65
65
 
66
- return alter_sql
66
+ return ''.join(alter_sql)
67
67
 
68
68
  # build INSERT INTO statement and string format to build VALUES (..), ...
69
- def _build_insert_into_sql(table_name, column_names):
70
- insert_sql = "insert into %s (\n " % plpy.quote_ident(table_name)
69
+ def _build_insert_into_sql(table_name, column_names, column_types):
70
+ # INSERT INTO table_name (column_name, column_name, ...)
71
+ insert_sql = ["insert into %s (\n " % plpy.quote_ident(table_name)]
71
72
 
72
73
  first = True
73
74
  for column_name in column_names:
74
75
  if first:
75
76
  first = False
76
77
  else:
77
- insert_sql += ",\n "
78
+ insert_sql.append(",\n ")
78
79
 
79
- insert_sql += plpy.quote_ident(column_name)
80
+ insert_sql.append(plpy.quote_ident(column_name))
80
81
 
81
- insert_sql += "\n) values\n"
82
+ insert_sql.append("\n) values\n")
82
83
 
83
- values_sql_format = "(%s)" % (", ".join(["${}"] * len(column_names)))
84
+ # VALUES (${}::column_type, ${}::column_type, ...)
85
+ values_sql_format = ["("]
84
86
 
85
- return (insert_sql, values_sql_format)
87
+ first = True
88
+ for column_type in column_types:
89
+ if first:
90
+ first = False
91
+ else:
92
+ values_sql_format.append(", ")
93
+
94
+ values_sql_format.append("${}::")
95
+ values_sql_format.append(column_type)
96
+
97
+ values_sql_format.append(")")
98
+
99
+ return (''.join(insert_sql), ''.join(values_sql_format))
86
100
 
87
101
  # create a prepared statement for batch INSERT
88
102
  def _plan_batch(insert_sql, values_sql_format, column_types, batch_size):
@@ -173,7 +187,7 @@ def run_presto_as_temp_table(server, user, catalog, schema, result_table, query)
173
187
 
174
188
  # build SQL
175
189
  create_sql = _build_create_temp_table_sql(result_table, column_names, column_types)
176
- insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names)
190
+ insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names, column_types)
177
191
 
178
192
  # run CREATE TABLE
179
193
  plpy.execute("drop table if exists " + plpy.quote_ident(result_table))
@@ -326,7 +340,7 @@ def run_system_catalog_as_temp_table(server, user, catalog, schema, result_table
326
340
  subxact.exit("rollback subtransaction", None, None)
327
341
 
328
342
  create_sql = _build_create_temp_table_sql(result_table, column_names, column_types)
329
- insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names)
343
+ insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names, column_types)
330
344
 
331
345
  # run CREATE TABLE and INSERT
332
346
  plpy.execute("drop table if exists " + plpy.quote_ident(result_table))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prestogres
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-03-05 00:00:00.000000000 Z
12
+ date: 2014-03-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler