pg_query 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 31d33e3f025beef9defb3beb7c471774873ca41c
4
- data.tar.gz: 6d2bf5bf6fda9dcd91bf9ecadc4d9a4c3d4af230
3
+ metadata.gz: e4626094955c34766049a41a21623847e80cb0d8
4
+ data.tar.gz: ea9c73a5e6e410f14dd3baa3971dbb87046efab3
5
5
  SHA512:
6
- metadata.gz: 8d1053da24fa8ec75b06797d44578ea9d438ce71a00d9626d1b0f2c2ad7e3b4fe993e6c5d0cb6e1264c87c607ec31640abba0013690b113d097350a4fdee606c
7
- data.tar.gz: 0302e42540073577d6e8e02f21c65c2ca71c15621555be41c7dee50432b57374573b695696260ff2c91ab422c54511e8cbf5d8fed3dc072faaf9f0bd46cd0494
6
+ metadata.gz: eb545f475347e8add8714fd95b9c5022d79c24296c16c73a7990e1742704de0778d713faa47ec0cb34de609ffa7e05a8a60e0fe3357910034e9d3ec633bc50b2
7
+ data.tar.gz: c4981b0e1107bf5ce207376207c8d7b90244536cc480b2d0a7a3c2f311938cb7bf95bfaf7eae171e4448993da335fbd65984dadc37af0543c2970b5178d98f9c
data/Rakefile CHANGED
@@ -1,20 +1,23 @@
1
- require "bundler/gem_tasks"
2
- require "rake/extensiontask"
3
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/extensiontask'
3
+ require 'rspec/core/rake_task'
4
+ require 'rubocop/rake_task'
4
5
 
5
- Rake::ExtensionTask.new "pg_query" do |ext|
6
- ext.lib_dir = "lib/pg_query"
6
+ Rake::ExtensionTask.new 'pg_query' do |ext|
7
+ ext.lib_dir = 'lib/pg_query'
7
8
  end
8
9
 
9
10
  RSpec::Core::RakeTask.new
11
+ RuboCop::RakeTask.new
10
12
 
11
13
  task spec: :compile
12
14
 
13
- task default: :spec
15
+ task default: [:lint, :spec]
14
16
  task test: :spec
17
+ task lint: :rubocop
15
18
 
16
19
  task :clean do
17
- FileUtils.rm_rf File.join(File.dirname(__FILE__), "tmp/")
18
- FileUtils.rm_f Dir.glob(File.join(File.dirname(__FILE__), "ext/pg_query/*.o"))
19
- FileUtils.rm_f File.join(File.dirname(__FILE__), "lib/pg_query/pg_query.bundle")
20
- end
20
+ FileUtils.rm_rf File.join(File.dirname(__FILE__), 'tmp/')
21
+ FileUtils.rm_f Dir.glob(File.join(File.dirname(__FILE__), 'ext/pg_query/*.o'))
22
+ FileUtils.rm_f File.join(File.dirname(__FILE__), 'lib/pg_query/pg_query.bundle')
23
+ end
@@ -1,8 +1,10 @@
1
+ # rubocop:disable Style/GlobalVars
2
+
1
3
  require 'mkmf'
2
4
  require 'open-uri'
3
5
 
4
6
  workdir = Dir.pwd
5
- pgdir = File.join(workdir, "postgres")
7
+ pgdir = File.join(workdir, 'postgres')
6
8
 
7
9
  # Limit the objects we build to speed up compilation times
8
10
  PG_OBJS = {
@@ -16,43 +18,43 @@ PG_OBJS = {
16
18
  'gram.o', 'parser.o', 'keywords.o', 'kwlookup.o', 'scansup.o'
17
19
  ],
18
20
  'backend/nodes' => [
19
- 'nodeFuncs.o', 'makefuncs.o', 'value.o', 'list.o', 'outfuncs_json.o'
21
+ 'copyfuncs.o', 'nodeFuncs.o', 'makefuncs.o', 'value.o', 'list.o', 'outfuncs_json.o'
20
22
  ],
21
23
  'backend/lib' => ['stringinfo.o'],
22
24
  'port' => ['qsort.o'],
23
25
  'common' => ['psprintf.o'],
24
- 'timezone' => ['pgtz.o'],
26
+ 'timezone' => ['pgtz.o']
25
27
  }
26
28
 
27
29
  # Download & compile PostgreSQL if we don't have it yet
28
30
  #
29
31
  # Note: We intentionally use a patched version that fixes bugs in outfuncs.c
30
- if !Dir.exists?(pgdir)
31
- unless File.exists?("#{workdir}/postgres.tar.gz")
32
- File.open("#{workdir}/postgres.tar.gz", "wb") do |target_file|
33
- open("https://codeload.github.com/pganalyze/postgres/tar.gz/pg_query", "rb") do |read_file|
32
+ unless Dir.exist?(pgdir)
33
+ unless File.exist?("#{workdir}/postgres.tar.gz")
34
+ File.open("#{workdir}/postgres.tar.gz", 'wb') do |target_file|
35
+ open('https://codeload.github.com/pganalyze/postgres/tar.gz/pg_query', 'rb') do |read_file|
34
36
  target_file.write(read_file.read)
35
37
  end
36
38
  end
37
39
  end
38
- system("tar -xf #{workdir}/postgres.tar.gz") || raise("ERROR")
39
- system("mv #{workdir}/postgres-pg_query #{pgdir}") || raise("ERROR")
40
- system("cd #{pgdir}; CFLAGS=-fPIC ./configure -q") || raise("ERROR")
40
+ system("tar -xf #{workdir}/postgres.tar.gz") || fail('ERROR')
41
+ system("mv #{workdir}/postgres-pg_query #{pgdir}") || fail('ERROR')
42
+ system("cd #{pgdir}; CFLAGS=-fPIC ./configure -q") || fail('ERROR')
41
43
  system("cd #{pgdir}; make -C src/backend lib-recursive") # Ensures headers are generated
42
44
  PG_OBJS.each do |directory, objs|
43
- system("cd #{pgdir}; make -C src/#{directory} #{objs.join(' ')}") || raise("ERROR")
45
+ system("cd #{pgdir}; make -C src/#{directory} #{objs.join(' ')}") || fail('ERROR')
44
46
  end
45
47
  end
46
48
 
47
49
  $objs = PG_OBJS.map { |directory, objs| objs.map { |obj| "#{pgdir}/src/#{directory}/#{obj}" } }.flatten
48
- $objs += ["pg_query.o", "pg_polyfills.o"]
50
+ $objs += %w(pg_query.o pg_query_parse.o pg_query_normalize.o pg_polyfills.o)
49
51
 
50
52
  $CFLAGS << " -I #{pgdir}/src/include"
51
53
 
52
54
  # Similar to those used by PostgreSQL
53
- $CFLAGS << " -O2 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv"
55
+ $CFLAGS << ' -O2 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv'
54
56
 
55
- SYMFILE = File.join(File.dirname(__FILE__), "pg_query.sym")
57
+ SYMFILE = File.join(File.dirname(__FILE__), 'pg_query.sym')
56
58
  if RUBY_PLATFORM =~ /darwin/
57
59
  $DLDFLAGS << " -Wl,-exported_symbols_list #{SYMFILE}" unless defined?(::Rubinius)
58
60
  else
@@ -1,478 +1,15 @@
1
- #include "postgres.h"
2
- #include "utils/memutils.h"
3
- #include "parser/parser.h"
4
- #include "parser/scanner.h"
5
- #include "parser/scansup.h"
6
- #include "nodes/print.h"
7
- #include "nodes/nodeFuncs.h"
8
- #include "mb/pg_wchar.h"
9
-
10
- #include <unistd.h>
11
- #include <fcntl.h>
12
-
13
- #include <ruby.h>
1
+ #include "pg_query.h"
14
2
 
15
3
  const char* progname = "pg_query";
16
4
 
17
- void Init_pg_query(void);
18
-
19
- static VALUE new_parse_error(ErrorData* error)
20
- {
21
- VALUE cPgQuery, cParseError;
22
- VALUE args[2];
23
-
24
- cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
25
- cParseError = rb_const_get_at(cPgQuery, rb_intern("ParseError"));
26
-
27
- args[0] = rb_str_new2(error->message);
28
- args[1] = INT2NUM(error->cursorpos);
29
-
30
- return rb_class_new_instance(2, args, cParseError);
31
- }
32
-
33
- #define STDERR_BUFFER_LEN 4096
34
- //#define DEBUG
35
-
36
- static VALUE pg_query_raw_parse(VALUE self, VALUE input)
37
- {
38
- Check_Type(input, T_STRING);
39
-
40
- MemoryContext ctx = NULL;
41
- VALUE result = Qnil;
42
- VALUE error = Qnil;
43
- char stderr_buffer[STDERR_BUFFER_LEN + 1] = {0};
44
- #ifndef DEBUG
45
- int stderr_global;
46
- int stderr_pipe[2];
47
- #endif
48
-
49
- ctx = AllocSetContextCreate(TopMemoryContext,
50
- "pg_query_raw_parse",
51
- ALLOCSET_DEFAULT_MINSIZE,
52
- ALLOCSET_DEFAULT_INITSIZE,
53
- ALLOCSET_DEFAULT_MAXSIZE);
54
- MemoryContextSwitchTo(ctx);
55
-
56
- #ifndef DEBUG
57
- // Setup pipe for stderr redirection
58
- if (pipe(stderr_pipe) != 0)
59
- rb_raise(rb_eIOError, "Failed to open pipe, too many open file descriptors");
60
-
61
- fcntl(stderr_pipe[0], F_SETFL, fcntl(stderr_pipe[0], F_GETFL) | O_NONBLOCK);
62
-
63
- // Redirect stderr to the pipe
64
- stderr_global = dup(STDERR_FILENO);
65
- dup2(stderr_pipe[1], STDERR_FILENO);
66
- close(stderr_pipe[1]);
67
- #endif
68
-
69
- // Parse it!
70
- PG_TRY();
71
- {
72
- List *tree;
73
- char *str;
74
-
75
- str = StringValueCStr(input);
76
- tree = raw_parser(str);
77
-
78
- str = nodeToJSONString(tree);
79
-
80
- #ifndef DEBUG
81
- // Save stderr for result
82
- read(stderr_pipe[0], stderr_buffer, STDERR_BUFFER_LEN);
83
- #endif
84
-
85
- result = rb_ary_new();
86
- rb_ary_push(result, rb_str_new2(str));
87
- rb_ary_push(result, rb_str_new2(stderr_buffer));
88
-
89
- pfree(str);
90
- }
91
- PG_CATCH();
92
- {
93
- ErrorData* error_data = CopyErrorData();
94
- error = new_parse_error(error_data);
95
- FlushErrorState();
96
- }
97
- PG_END_TRY();
98
-
99
- #ifndef DEBUG
100
- // Restore stderr, close pipe
101
- dup2(stderr_global, STDERR_FILENO);
102
- close(stderr_pipe[0]);
103
- close(stderr_global);
104
- #endif
105
-
106
- // Return to previous PostgreSQL memory context
107
- MemoryContextSwitchTo(TopMemoryContext);
108
- MemoryContextDelete(ctx);
109
-
110
- // If we got an error, throw it
111
- if (!NIL_P(error)) rb_exc_raise(error);
112
-
113
- return result;
114
- }
115
-
116
- /*
117
- * Struct for tracking locations/lengths of constants during normalization
118
- */
119
- typedef struct pgssLocationLen
120
- {
121
- int location; /* start offset in query text */
122
- int length; /* length in bytes, or -1 to ignore */
123
- } pgssLocationLen;
124
-
125
- /*
126
- * Working state for constant tree walker
127
- */
128
- typedef struct pgssConstLocations
129
- {
130
- /* Array of locations of constants that should be removed */
131
- pgssLocationLen *clocations;
132
-
133
- /* Allocated length of clocations array */
134
- int clocations_buf_size;
135
-
136
- /* Current number of valid entries in clocations array */
137
- int clocations_count;
138
- } pgssConstLocations;
139
-
140
- /*
141
- * comp_location: comparator for qsorting pgssLocationLen structs by location
142
- */
143
- static int
144
- comp_location(const void *a, const void *b)
145
- {
146
- int l = ((const pgssLocationLen *) a)->location;
147
- int r = ((const pgssLocationLen *) b)->location;
148
-
149
- if (l < r)
150
- return -1;
151
- else if (l > r)
152
- return +1;
153
- else
154
- return 0;
155
- }
156
-
157
- /*
158
- * Given a valid SQL string and an array of constant-location records,
159
- * fill in the textual lengths of those constants.
160
- *
161
- * The constants may use any allowed constant syntax, such as float literals,
162
- * bit-strings, single-quoted strings and dollar-quoted strings. This is
163
- * accomplished by using the public API for the core scanner.
164
- *
165
- * It is the caller's job to ensure that the string is a valid SQL statement
166
- * with constants at the indicated locations. Since in practice the string
167
- * has already been parsed, and the locations that the caller provides will
168
- * have originated from within the authoritative parser, this should not be
169
- * a problem.
170
- *
171
- * Duplicate constant pointers are possible, and will have their lengths
172
- * marked as '-1', so that they are later ignored. (Actually, we assume the
173
- * lengths were initialized as -1 to start with, and don't change them here.)
174
- *
175
- * N.B. There is an assumption that a '-' character at a Const location begins
176
- * a negative numeric constant. This precludes there ever being another
177
- * reason for a constant to start with a '-'.
178
- */
179
- static void
180
- fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
181
- {
182
- pgssLocationLen *locs;
183
- core_yyscan_t yyscanner;
184
- core_yy_extra_type yyextra;
185
- core_YYSTYPE yylval;
186
- YYLTYPE yylloc;
187
- int last_loc = -1;
188
- int i;
189
-
190
- /*
191
- * Sort the records by location so that we can process them in order while
192
- * scanning the query text.
193
- */
194
- if (jstate->clocations_count > 1)
195
- qsort(jstate->clocations, jstate->clocations_count,
196
- sizeof(pgssLocationLen), comp_location);
197
- locs = jstate->clocations;
198
-
199
- /* initialize the flex scanner --- should match raw_parser() */
200
- yyscanner = scanner_init(query,
201
- &yyextra,
202
- ScanKeywords,
203
- NumScanKeywords);
204
-
205
- /* Search for each constant, in sequence */
206
- for (i = 0; i < jstate->clocations_count; i++)
207
- {
208
- int loc = locs[i].location;
209
- int tok;
210
-
211
- Assert(loc >= 0);
212
-
213
- if (loc <= last_loc)
214
- continue; /* Duplicate constant, ignore */
215
-
216
- /* Lex tokens until we find the desired constant */
217
- for (;;)
218
- {
219
- tok = core_yylex(&yylval, &yylloc, yyscanner);
220
-
221
- /* We should not hit end-of-string, but if we do, behave sanely */
222
- if (tok == 0)
223
- break; /* out of inner for-loop */
224
-
225
- /*
226
- * We should find the token position exactly, but if we somehow
227
- * run past it, work with that.
228
- */
229
- if (yylloc >= loc)
230
- {
231
- if (query[loc] == '-')
232
- {
233
- /*
234
- * It's a negative value - this is the one and only case
235
- * where we replace more than a single token.
236
- *
237
- * Do not compensate for the core system's special-case
238
- * adjustment of location to that of the leading '-'
239
- * operator in the event of a negative constant. It is
240
- * also useful for our purposes to start from the minus
241
- * symbol. In this way, queries like "select * from foo
242
- * where bar = 1" and "select * from foo where bar = -2"
243
- * will have identical normalized query strings.
244
- */
245
- tok = core_yylex(&yylval, &yylloc, yyscanner);
246
- if (tok == 0)
247
- break; /* out of inner for-loop */
248
- }
249
-
250
- /*
251
- * We now rely on the assumption that flex has placed a zero
252
- * byte after the text of the current token in scanbuf.
253
- */
254
- locs[i].length = (int) strlen(yyextra.scanbuf + loc);
255
-
256
- /* Quoted string with Unicode escapes
257
- *
258
- * The lexer consumes trailing whitespace in order to find UESCAPE, but if there
259
- * is no UESCAPE it has still consumed it - don't include it in constant length.
260
- */
261
- if (locs[i].length > 4 && /* U&'' */
262
- (yyextra.scanbuf[loc] == 'u' || yyextra.scanbuf[loc] == 'U') &&
263
- yyextra.scanbuf[loc + 1] == '&' && yyextra.scanbuf[loc + 2] == '\'')
264
- {
265
- int j = locs[i].length - 1; /* Skip the \0 */
266
- for (; j >= 0 && scanner_isspace(yyextra.scanbuf[loc + j]); j--) {}
267
- locs[i].length = j + 1; /* Count the \0 */
268
- }
269
-
270
- break; /* out of inner for-loop */
271
- }
272
- }
273
-
274
- /* If we hit end-of-string, give up, leaving remaining lengths -1 */
275
- if (tok == 0)
276
- break;
277
-
278
- last_loc = loc;
279
- }
280
-
281
- scanner_finish(yyscanner);
282
- }
283
-
284
- /*
285
- * Generate a normalized version of the query string that will be used to
286
- * represent all similar queries.
287
- *
288
- * Note that the normalized representation may well vary depending on
289
- * just which "equivalent" query is used to create the hashtable entry.
290
- * We assume this is OK.
291
- *
292
- * *query_len_p contains the input string length, and is updated with
293
- * the result string length (which cannot be longer) on exit.
294
- *
295
- * Returns a palloc'd string.
296
- */
297
- static char *
298
- generate_normalized_query(pgssConstLocations *jstate, const char *query,
299
- int *query_len_p, int encoding)
300
- {
301
- char *norm_query;
302
- int query_len = *query_len_p;
303
- int i,
304
- len_to_wrt, /* Length (in bytes) to write */
305
- quer_loc = 0, /* Source query byte location */
306
- n_quer_loc = 0, /* Normalized query byte location */
307
- last_off = 0, /* Offset from start for previous tok */
308
- last_tok_len = 0; /* Length (in bytes) of that tok */
309
-
310
- /*
311
- * Get constants' lengths (core system only gives us locations). Note
312
- * this also ensures the items are sorted by location.
313
- */
314
- fill_in_constant_lengths(jstate, query);
315
-
316
- /* Allocate result buffer */
317
- norm_query = palloc(query_len + 1);
318
-
319
- for (i = 0; i < jstate->clocations_count; i++)
320
- {
321
- int off, /* Offset from start for cur tok */
322
- tok_len; /* Length (in bytes) of that tok */
323
-
324
- off = jstate->clocations[i].location;
325
- tok_len = jstate->clocations[i].length;
326
-
327
- if (tok_len < 0)
328
- continue; /* ignore any duplicates */
329
-
330
- /* Copy next chunk (what precedes the next constant) */
331
- len_to_wrt = off - last_off;
332
- len_to_wrt -= last_tok_len;
333
-
334
- Assert(len_to_wrt >= 0);
335
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
336
- n_quer_loc += len_to_wrt;
337
-
338
- /* And insert a '?' in place of the constant token */
339
- norm_query[n_quer_loc++] = '?';
340
-
341
- quer_loc = off + tok_len;
342
- last_off = off;
343
- last_tok_len = tok_len;
344
- }
345
-
346
- /*
347
- * We've copied up until the last ignorable constant. Copy over the
348
- * remaining bytes of the original query string.
349
- */
350
- len_to_wrt = query_len - quer_loc;
351
-
352
- Assert(len_to_wrt >= 0);
353
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
354
- n_quer_loc += len_to_wrt;
355
-
356
- Assert(n_quer_loc <= query_len);
357
- norm_query[n_quer_loc] = '\0';
358
-
359
- *query_len_p = n_quer_loc;
360
- return norm_query;
361
- }
362
-
363
- static bool const_record_walker(Node *node, pgssConstLocations *jstate)
364
- {
365
- bool result;
366
-
367
- if (node == NULL) return false;
368
-
369
- if (IsA(node, A_Const) && ((A_Const *) node)->location >= 0)
370
- {
371
- /* enlarge array if needed */
372
- if (jstate->clocations_count >= jstate->clocations_buf_size)
373
- {
374
- jstate->clocations_buf_size *= 2;
375
- jstate->clocations = (pgssLocationLen *)
376
- repalloc(jstate->clocations,
377
- jstate->clocations_buf_size *
378
- sizeof(pgssLocationLen));
379
- }
380
- jstate->clocations[jstate->clocations_count].location = ((A_Const *) node)->location;
381
- /* initialize lengths to -1 to simplify fill_in_constant_lengths */
382
- jstate->clocations[jstate->clocations_count].length = -1;
383
- jstate->clocations_count++;
384
- }
385
- else if (IsA(node, VariableSetStmt))
386
- {
387
- return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
388
- }
389
- else if (IsA(node, CopyStmt))
390
- {
391
- return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
392
- }
393
-
394
- PG_TRY();
395
- {
396
- result = raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
397
- }
398
- PG_CATCH();
399
- {
400
- FlushErrorState();
401
- result = false;
402
- }
403
- PG_END_TRY();
404
-
405
- return result;
406
- }
407
-
408
- static VALUE pg_query_normalize(VALUE self, VALUE input)
409
- {
410
- Check_Type(input, T_STRING);
411
-
412
- MemoryContext ctx = NULL;
413
- VALUE result = Qnil;
414
- VALUE error = Qnil;
415
-
416
- ctx = AllocSetContextCreate(TopMemoryContext,
417
- "pg_query_normalize",
418
- ALLOCSET_DEFAULT_MINSIZE,
419
- ALLOCSET_DEFAULT_INITSIZE,
420
- ALLOCSET_DEFAULT_MAXSIZE);
421
- MemoryContextSwitchTo(ctx);
422
-
423
- PG_TRY();
424
- {
425
- List *tree;
426
- char *str;
427
- pgssConstLocations jstate;
428
- int query_len;
429
-
430
- /* Parse query */
431
- str = StringValueCStr(input);
432
- tree = raw_parser(str);
433
-
434
- /* Set up workspace for constant recording */
435
- jstate.clocations_buf_size = 32;
436
- jstate.clocations = (pgssLocationLen *)
437
- palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
438
- jstate.clocations_count = 0;
439
-
440
- /* Walk tree and record const locations */
441
- const_record_walker((Node *) tree, &jstate);
442
-
443
- /* Normalize query */
444
- query_len = (int) strlen(str);
445
- str = generate_normalized_query(&jstate, str, &query_len, PG_UTF8);
446
-
447
- result = rb_str_new2(str);
448
-
449
- pfree(str);
450
- }
451
- PG_CATCH();
452
- {
453
- ErrorData* error_data = CopyErrorData();
454
- error = new_parse_error(error_data);
455
- FlushErrorState();
456
- }
457
- PG_END_TRY();
458
-
459
- MemoryContextSwitchTo(TopMemoryContext);
460
- MemoryContextDelete(ctx);
461
-
462
- // If we got an error, throw it
463
- if (!NIL_P(error)) rb_exc_raise(error);
464
-
465
- return result;
466
- }
467
-
468
5
  void Init_pg_query(void)
469
6
  {
470
7
  VALUE cPgQuery;
471
-
8
+
472
9
  MemoryContextInit();
473
10
 
474
11
  cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
475
12
 
476
13
  rb_define_singleton_method(cPgQuery, "_raw_parse", pg_query_raw_parse, 1);
477
14
  rb_define_singleton_method(cPgQuery, "normalize", pg_query_normalize, 1);
478
- }
15
+ }