prestogres 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -1,4 +1,13 @@
1
1
 
2
+ 2014-03-19 version 0.4.2:
3
+
4
+ * Send queries to PostgreSQL if they most likely cause parse erorr because
5
+ Presto doesn't return appropriate error code (42601 SYNTAX ERROR).
6
+ * Cast Presto query results to PostgreSQL types explicitly when it inserts
7
+ themn to a temporary table to not cause "value too long for type character
8
+ varying(255)" exceptions
9
+
10
+
2
11
  2014-03-05 version 0.4.1:
3
12
 
4
13
  * Send queries to Presto if they include Presto's SQL syntax extension
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.1
1
+ 0.4.2
@@ -508,40 +508,68 @@ static void run_and_rewrite_system_catalog_query(POOL_SESSION_CONTEXT* session_c
508
508
  do_replace_query(query_context, rewrite_query_string_buffer);
509
509
  }
510
510
 
511
- /*
512
- * /\A\s*select\s*\*\s*from\s+(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*)(\.(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*))?\s*(;|\z)/.to_s
513
- */
514
- #define AUTO_LIMIT_QUERY_PATTERN "\\A\\s*select\\s*\\*\\s*from\\s+((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*)(\\.((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*))?\\s*(;|\\z)"
515
-
516
- static bool match_auto_limit_pattern(const char* query)
517
- {
511
+ typedef struct {
518
512
  const char* errptr;
519
513
  int erroffset;
520
514
  pcre* pattern;
515
+ } regexp_context;
516
+
517
+ static bool regexp_match(const char* regexp, regexp_context* context, const char* string)
518
+ {
521
519
  int ret;
522
520
  int ovec[10];
523
521
 
524
- pattern = pcre_compile(AUTO_LIMIT_QUERY_PATTERN,
525
- PCRE_CASELESS | PCRE_NO_AUTO_CAPTURE | PCRE_UTF8, &errptr, &erroffset, NULL);
526
- if (pattern == NULL) {
527
- // TODO pcre pattern should be precompiled. see also pcre_study.
528
- pool_error("match_auto_limit_pattern: invalid regexp %s at %d", errptr, erroffset);
529
- pcre_free(pattern);
522
+ if (context->errptr != NULL) {
530
523
  return false;
531
524
  }
532
525
 
533
- ret = pcre_exec(pattern, NULL, query, strlen(query), 0, 0, &ovec, sizeof(ovec));
526
+ if (context->pattern == NULL) {
527
+ pcre* pattern;
528
+ pattern = pcre_compile(regexp, PCRE_CASELESS | PCRE_NO_AUTO_CAPTURE | PCRE_UTF8,
529
+ &context->errptr, &context->erroffset, NULL);
530
+ if (pattern == NULL) {
531
+ pool_error("regexp_match: invalid regexp %s at %d", context->errptr, context->erroffset);
532
+ return false;
533
+ }
534
+ context->pattern = pattern;
535
+ context->errptr = NULL;
536
+
537
+ // TODO pcre_study?
538
+ }
539
+
540
+ ret = pcre_exec(context->pattern, NULL, string, strlen(string), 0, 0, ovec, sizeof(ovec));
534
541
  if (ret < 0) {
535
542
  // error. pattern didn't match in most of cases
536
- pcre_free(pattern);
537
543
  return false;
538
544
  }
539
545
 
540
- pcre_free(pattern);
541
-
542
546
  return true;
543
547
  }
544
548
 
549
+ /*
550
+ * /\A(?!.*select).*\z/i
551
+ */
552
+ #define LIKELY_PARSE_ERROR "\\A(?!.*select).*\\z"
553
+
554
+ static regexp_context LIKELY_PARSE_ERROR_REGEXP = {0};
555
+
556
+ static bool match_likely_parse_error(const char* query)
557
+ {
558
+ return regexp_match(LIKELY_PARSE_ERROR, &LIKELY_PARSE_ERROR_REGEXP, query);
559
+ }
560
+
561
+ /*
562
+ * /\A\s*select\s*\*\s*from\s+(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*)(\.(("[^\\"]*([\\"][^\\"]*)*")|[a-zA-Z_][a-zA-Z0-9_]*))?\s*(;|\z)/i
563
+ */
564
+ #define AUTO_LIMIT_QUERY_PATTERN "\\A\\s*select\\s*\\*\\s*from\\s+((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*)(\\.((\"[^\\\\\"]*([\\\\\"][^\\\\\"]*)*\")|[a-zA-Z_][a-zA-Z0-9_]*))?\\s*(;|\\z)"
565
+
566
+ static regexp_context AUTO_LIMIT_REGEXP = {0};
567
+
568
+ static bool match_auto_limit_pattern(const char* query)
569
+ {
570
+ return regexp_match(AUTO_LIMIT_QUERY_PATTERN, &AUTO_LIMIT_REGEXP, query);
571
+ }
572
+
545
573
  static void run_and_rewrite_presto_query(POOL_SESSION_CONTEXT* session_context, POOL_QUERY_CONTEXT* query_context)
546
574
  {
547
575
  char *buffer, *bufend;
@@ -721,7 +749,7 @@ void pool_where_to_send(POOL_QUERY_CONTEXT *query_context, char *query, Node *no
721
749
  * If failed to parse the query, run it on Presto because
722
750
  * it may include Presto's SQL syntax extensions.
723
751
  */
724
- if (query_context->is_parse_error)
752
+ if (query_context->is_parse_error && !match_likely_parse_error(query_context->original_query))
725
753
  {
726
754
  pool_debug("prestogres: send_to_where: parse-error");
727
755
  pool_set_node_to_be_sent(query_context,
@@ -31,58 +31,72 @@ def _pg_table_type(presto_type):
31
31
 
32
32
  # build CREATE TEMPORARY TABLE statement
33
33
  def _build_create_temp_table_sql(table_name, column_names, column_types):
34
- create_sql = "create temporary table %s (\n " % plpy.quote_ident(table_name)
34
+ create_sql = ["create temporary table %s (\n " % plpy.quote_ident(table_name)]
35
35
 
36
36
  first = True
37
37
  for column_name, column_type in zip(column_names, column_types):
38
38
  if first:
39
39
  first = False
40
40
  else:
41
- create_sql += ",\n "
41
+ create_sql.append(",\n ")
42
42
 
43
- create_sql += plpy.quote_ident(column_name)
44
- create_sql += " "
45
- create_sql += column_type
43
+ create_sql.append(plpy.quote_ident(column_name))
44
+ create_sql.append(" ")
45
+ create_sql.append(column_type)
46
46
 
47
- create_sql += "\n)"
48
- return create_sql
47
+ create_sql.append("\n)")
48
+ return ''.join(create_sql)
49
49
 
50
50
  # build CREATE TABLE statement
51
51
  def _build_alter_table_holder_sql(schema_name, table_name, column_names, column_types, not_nulls):
52
- alter_sql = "alter table %s.%s \n " % (plpy.quote_ident(schema_name), plpy.quote_ident(table_name))
52
+ alter_sql = ["alter table %s.%s \n " % (plpy.quote_ident(schema_name), plpy.quote_ident(table_name))]
53
53
 
54
54
  first = True
55
55
  for column_name, column_type, not_null in zip(column_names, column_types, not_nulls):
56
56
  if first:
57
57
  first = False
58
58
  else:
59
- alter_sql += ",\n "
59
+ alter_sql.append(",\n ")
60
60
 
61
- alter_sql += "add %s %s" % (plpy.quote_ident(column_name), column_type)
61
+ alter_sql.append("add %s %s" % (plpy.quote_ident(column_name), column_type))
62
62
 
63
63
  if not_null:
64
- alter_sql += " not null"
64
+ alter_sql.append(" not null")
65
65
 
66
- return alter_sql
66
+ return ''.join(alter_sql)
67
67
 
68
68
  # build INSERT INTO statement and string format to build VALUES (..), ...
69
- def _build_insert_into_sql(table_name, column_names):
70
- insert_sql = "insert into %s (\n " % plpy.quote_ident(table_name)
69
+ def _build_insert_into_sql(table_name, column_names, column_types):
70
+ # INSERT INTO table_name (column_name, column_name, ...)
71
+ insert_sql = ["insert into %s (\n " % plpy.quote_ident(table_name)]
71
72
 
72
73
  first = True
73
74
  for column_name in column_names:
74
75
  if first:
75
76
  first = False
76
77
  else:
77
- insert_sql += ",\n "
78
+ insert_sql.append(",\n ")
78
79
 
79
- insert_sql += plpy.quote_ident(column_name)
80
+ insert_sql.append(plpy.quote_ident(column_name))
80
81
 
81
- insert_sql += "\n) values\n"
82
+ insert_sql.append("\n) values\n")
82
83
 
83
- values_sql_format = "(%s)" % (", ".join(["${}"] * len(column_names)))
84
+ # VALUES (${}::column_type, ${}::column_type, ...)
85
+ values_sql_format = ["("]
84
86
 
85
- return (insert_sql, values_sql_format)
87
+ first = True
88
+ for column_type in column_types:
89
+ if first:
90
+ first = False
91
+ else:
92
+ values_sql_format.append(", ")
93
+
94
+ values_sql_format.append("${}::")
95
+ values_sql_format.append(column_type)
96
+
97
+ values_sql_format.append(")")
98
+
99
+ return (''.join(insert_sql), ''.join(values_sql_format))
86
100
 
87
101
  # create a prepared statement for batch INSERT
88
102
  def _plan_batch(insert_sql, values_sql_format, column_types, batch_size):
@@ -173,7 +187,7 @@ def run_presto_as_temp_table(server, user, catalog, schema, result_table, query)
173
187
 
174
188
  # build SQL
175
189
  create_sql = _build_create_temp_table_sql(result_table, column_names, column_types)
176
- insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names)
190
+ insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names, column_types)
177
191
 
178
192
  # run CREATE TABLE
179
193
  plpy.execute("drop table if exists " + plpy.quote_ident(result_table))
@@ -326,7 +340,7 @@ def run_system_catalog_as_temp_table(server, user, catalog, schema, result_table
326
340
  subxact.exit("rollback subtransaction", None, None)
327
341
 
328
342
  create_sql = _build_create_temp_table_sql(result_table, column_names, column_types)
329
- insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names)
343
+ insert_sql, values_sql_format = _build_insert_into_sql(result_table, column_names, column_types)
330
344
 
331
345
  # run CREATE TABLE and INSERT
332
346
  plpy.execute("drop table if exists " + plpy.quote_ident(result_table))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prestogres
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-03-05 00:00:00.000000000 Z
12
+ date: 2014-03-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler