pg_query 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 31d33e3f025beef9defb3beb7c471774873ca41c
4
- data.tar.gz: 6d2bf5bf6fda9dcd91bf9ecadc4d9a4c3d4af230
3
+ metadata.gz: e4626094955c34766049a41a21623847e80cb0d8
4
+ data.tar.gz: ea9c73a5e6e410f14dd3baa3971dbb87046efab3
5
5
  SHA512:
6
- metadata.gz: 8d1053da24fa8ec75b06797d44578ea9d438ce71a00d9626d1b0f2c2ad7e3b4fe993e6c5d0cb6e1264c87c607ec31640abba0013690b113d097350a4fdee606c
7
- data.tar.gz: 0302e42540073577d6e8e02f21c65c2ca71c15621555be41c7dee50432b57374573b695696260ff2c91ab422c54511e8cbf5d8fed3dc072faaf9f0bd46cd0494
6
+ metadata.gz: eb545f475347e8add8714fd95b9c5022d79c24296c16c73a7990e1742704de0778d713faa47ec0cb34de609ffa7e05a8a60e0fe3357910034e9d3ec633bc50b2
7
+ data.tar.gz: c4981b0e1107bf5ce207376207c8d7b90244536cc480b2d0a7a3c2f311938cb7bf95bfaf7eae171e4448993da335fbd65984dadc37af0543c2970b5178d98f9c
data/Rakefile CHANGED
@@ -1,20 +1,23 @@
1
- require "bundler/gem_tasks"
2
- require "rake/extensiontask"
3
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/extensiontask'
3
+ require 'rspec/core/rake_task'
4
+ require 'rubocop/rake_task'
4
5
 
5
- Rake::ExtensionTask.new "pg_query" do |ext|
6
- ext.lib_dir = "lib/pg_query"
6
+ Rake::ExtensionTask.new 'pg_query' do |ext|
7
+ ext.lib_dir = 'lib/pg_query'
7
8
  end
8
9
 
9
10
  RSpec::Core::RakeTask.new
11
+ RuboCop::RakeTask.new
10
12
 
11
13
  task spec: :compile
12
14
 
13
- task default: :spec
15
+ task default: [:lint, :spec]
14
16
  task test: :spec
17
+ task lint: :rubocop
15
18
 
16
19
  task :clean do
17
- FileUtils.rm_rf File.join(File.dirname(__FILE__), "tmp/")
18
- FileUtils.rm_f Dir.glob(File.join(File.dirname(__FILE__), "ext/pg_query/*.o"))
19
- FileUtils.rm_f File.join(File.dirname(__FILE__), "lib/pg_query/pg_query.bundle")
20
- end
20
+ FileUtils.rm_rf File.join(File.dirname(__FILE__), 'tmp/')
21
+ FileUtils.rm_f Dir.glob(File.join(File.dirname(__FILE__), 'ext/pg_query/*.o'))
22
+ FileUtils.rm_f File.join(File.dirname(__FILE__), 'lib/pg_query/pg_query.bundle')
23
+ end
@@ -1,8 +1,10 @@
1
+ # rubocop:disable Style/GlobalVars
2
+
1
3
  require 'mkmf'
2
4
  require 'open-uri'
3
5
 
4
6
  workdir = Dir.pwd
5
- pgdir = File.join(workdir, "postgres")
7
+ pgdir = File.join(workdir, 'postgres')
6
8
 
7
9
  # Limit the objects we build to speed up compilation times
8
10
  PG_OBJS = {
@@ -16,43 +18,43 @@ PG_OBJS = {
16
18
  'gram.o', 'parser.o', 'keywords.o', 'kwlookup.o', 'scansup.o'
17
19
  ],
18
20
  'backend/nodes' => [
19
- 'nodeFuncs.o', 'makefuncs.o', 'value.o', 'list.o', 'outfuncs_json.o'
21
+ 'copyfuncs.o', 'nodeFuncs.o', 'makefuncs.o', 'value.o', 'list.o', 'outfuncs_json.o'
20
22
  ],
21
23
  'backend/lib' => ['stringinfo.o'],
22
24
  'port' => ['qsort.o'],
23
25
  'common' => ['psprintf.o'],
24
- 'timezone' => ['pgtz.o'],
26
+ 'timezone' => ['pgtz.o']
25
27
  }
26
28
 
27
29
  # Download & compile PostgreSQL if we don't have it yet
28
30
  #
29
31
  # Note: We intentionally use a patched version that fixes bugs in outfuncs.c
30
- if !Dir.exists?(pgdir)
31
- unless File.exists?("#{workdir}/postgres.tar.gz")
32
- File.open("#{workdir}/postgres.tar.gz", "wb") do |target_file|
33
- open("https://codeload.github.com/pganalyze/postgres/tar.gz/pg_query", "rb") do |read_file|
32
+ unless Dir.exist?(pgdir)
33
+ unless File.exist?("#{workdir}/postgres.tar.gz")
34
+ File.open("#{workdir}/postgres.tar.gz", 'wb') do |target_file|
35
+ open('https://codeload.github.com/pganalyze/postgres/tar.gz/pg_query', 'rb') do |read_file|
34
36
  target_file.write(read_file.read)
35
37
  end
36
38
  end
37
39
  end
38
- system("tar -xf #{workdir}/postgres.tar.gz") || raise("ERROR")
39
- system("mv #{workdir}/postgres-pg_query #{pgdir}") || raise("ERROR")
40
- system("cd #{pgdir}; CFLAGS=-fPIC ./configure -q") || raise("ERROR")
40
+ system("tar -xf #{workdir}/postgres.tar.gz") || fail('ERROR')
41
+ system("mv #{workdir}/postgres-pg_query #{pgdir}") || fail('ERROR')
42
+ system("cd #{pgdir}; CFLAGS=-fPIC ./configure -q") || fail('ERROR')
41
43
  system("cd #{pgdir}; make -C src/backend lib-recursive") # Ensures headers are generated
42
44
  PG_OBJS.each do |directory, objs|
43
- system("cd #{pgdir}; make -C src/#{directory} #{objs.join(' ')}") || raise("ERROR")
45
+ system("cd #{pgdir}; make -C src/#{directory} #{objs.join(' ')}") || fail('ERROR')
44
46
  end
45
47
  end
46
48
 
47
49
  $objs = PG_OBJS.map { |directory, objs| objs.map { |obj| "#{pgdir}/src/#{directory}/#{obj}" } }.flatten
48
- $objs += ["pg_query.o", "pg_polyfills.o"]
50
+ $objs += %w(pg_query.o pg_query_parse.o pg_query_normalize.o pg_polyfills.o)
49
51
 
50
52
  $CFLAGS << " -I #{pgdir}/src/include"
51
53
 
52
54
  # Similar to those used by PostgreSQL
53
- $CFLAGS << " -O2 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv"
55
+ $CFLAGS << ' -O2 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv'
54
56
 
55
- SYMFILE = File.join(File.dirname(__FILE__), "pg_query.sym")
57
+ SYMFILE = File.join(File.dirname(__FILE__), 'pg_query.sym')
56
58
  if RUBY_PLATFORM =~ /darwin/
57
59
  $DLDFLAGS << " -Wl,-exported_symbols_list #{SYMFILE}" unless defined?(::Rubinius)
58
60
  else
@@ -1,478 +1,15 @@
1
- #include "postgres.h"
2
- #include "utils/memutils.h"
3
- #include "parser/parser.h"
4
- #include "parser/scanner.h"
5
- #include "parser/scansup.h"
6
- #include "nodes/print.h"
7
- #include "nodes/nodeFuncs.h"
8
- #include "mb/pg_wchar.h"
9
-
10
- #include <unistd.h>
11
- #include <fcntl.h>
12
-
13
- #include <ruby.h>
1
+ #include "pg_query.h"
14
2
 
15
3
  const char* progname = "pg_query";
16
4
 
17
- void Init_pg_query(void);
18
-
19
- static VALUE new_parse_error(ErrorData* error)
20
- {
21
- VALUE cPgQuery, cParseError;
22
- VALUE args[2];
23
-
24
- cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
25
- cParseError = rb_const_get_at(cPgQuery, rb_intern("ParseError"));
26
-
27
- args[0] = rb_str_new2(error->message);
28
- args[1] = INT2NUM(error->cursorpos);
29
-
30
- return rb_class_new_instance(2, args, cParseError);
31
- }
32
-
33
- #define STDERR_BUFFER_LEN 4096
34
- //#define DEBUG
35
-
36
- static VALUE pg_query_raw_parse(VALUE self, VALUE input)
37
- {
38
- Check_Type(input, T_STRING);
39
-
40
- MemoryContext ctx = NULL;
41
- VALUE result = Qnil;
42
- VALUE error = Qnil;
43
- char stderr_buffer[STDERR_BUFFER_LEN + 1] = {0};
44
- #ifndef DEBUG
45
- int stderr_global;
46
- int stderr_pipe[2];
47
- #endif
48
-
49
- ctx = AllocSetContextCreate(TopMemoryContext,
50
- "pg_query_raw_parse",
51
- ALLOCSET_DEFAULT_MINSIZE,
52
- ALLOCSET_DEFAULT_INITSIZE,
53
- ALLOCSET_DEFAULT_MAXSIZE);
54
- MemoryContextSwitchTo(ctx);
55
-
56
- #ifndef DEBUG
57
- // Setup pipe for stderr redirection
58
- if (pipe(stderr_pipe) != 0)
59
- rb_raise(rb_eIOError, "Failed to open pipe, too many open file descriptors");
60
-
61
- fcntl(stderr_pipe[0], F_SETFL, fcntl(stderr_pipe[0], F_GETFL) | O_NONBLOCK);
62
-
63
- // Redirect stderr to the pipe
64
- stderr_global = dup(STDERR_FILENO);
65
- dup2(stderr_pipe[1], STDERR_FILENO);
66
- close(stderr_pipe[1]);
67
- #endif
68
-
69
- // Parse it!
70
- PG_TRY();
71
- {
72
- List *tree;
73
- char *str;
74
-
75
- str = StringValueCStr(input);
76
- tree = raw_parser(str);
77
-
78
- str = nodeToJSONString(tree);
79
-
80
- #ifndef DEBUG
81
- // Save stderr for result
82
- read(stderr_pipe[0], stderr_buffer, STDERR_BUFFER_LEN);
83
- #endif
84
-
85
- result = rb_ary_new();
86
- rb_ary_push(result, rb_str_new2(str));
87
- rb_ary_push(result, rb_str_new2(stderr_buffer));
88
-
89
- pfree(str);
90
- }
91
- PG_CATCH();
92
- {
93
- ErrorData* error_data = CopyErrorData();
94
- error = new_parse_error(error_data);
95
- FlushErrorState();
96
- }
97
- PG_END_TRY();
98
-
99
- #ifndef DEBUG
100
- // Restore stderr, close pipe
101
- dup2(stderr_global, STDERR_FILENO);
102
- close(stderr_pipe[0]);
103
- close(stderr_global);
104
- #endif
105
-
106
- // Return to previous PostgreSQL memory context
107
- MemoryContextSwitchTo(TopMemoryContext);
108
- MemoryContextDelete(ctx);
109
-
110
- // If we got an error, throw it
111
- if (!NIL_P(error)) rb_exc_raise(error);
112
-
113
- return result;
114
- }
115
-
116
- /*
117
- * Struct for tracking locations/lengths of constants during normalization
118
- */
119
- typedef struct pgssLocationLen
120
- {
121
- int location; /* start offset in query text */
122
- int length; /* length in bytes, or -1 to ignore */
123
- } pgssLocationLen;
124
-
125
- /*
126
- * Working state for constant tree walker
127
- */
128
- typedef struct pgssConstLocations
129
- {
130
- /* Array of locations of constants that should be removed */
131
- pgssLocationLen *clocations;
132
-
133
- /* Allocated length of clocations array */
134
- int clocations_buf_size;
135
-
136
- /* Current number of valid entries in clocations array */
137
- int clocations_count;
138
- } pgssConstLocations;
139
-
140
- /*
141
- * comp_location: comparator for qsorting pgssLocationLen structs by location
142
- */
143
- static int
144
- comp_location(const void *a, const void *b)
145
- {
146
- int l = ((const pgssLocationLen *) a)->location;
147
- int r = ((const pgssLocationLen *) b)->location;
148
-
149
- if (l < r)
150
- return -1;
151
- else if (l > r)
152
- return +1;
153
- else
154
- return 0;
155
- }
156
-
157
- /*
158
- * Given a valid SQL string and an array of constant-location records,
159
- * fill in the textual lengths of those constants.
160
- *
161
- * The constants may use any allowed constant syntax, such as float literals,
162
- * bit-strings, single-quoted strings and dollar-quoted strings. This is
163
- * accomplished by using the public API for the core scanner.
164
- *
165
- * It is the caller's job to ensure that the string is a valid SQL statement
166
- * with constants at the indicated locations. Since in practice the string
167
- * has already been parsed, and the locations that the caller provides will
168
- * have originated from within the authoritative parser, this should not be
169
- * a problem.
170
- *
171
- * Duplicate constant pointers are possible, and will have their lengths
172
- * marked as '-1', so that they are later ignored. (Actually, we assume the
173
- * lengths were initialized as -1 to start with, and don't change them here.)
174
- *
175
- * N.B. There is an assumption that a '-' character at a Const location begins
176
- * a negative numeric constant. This precludes there ever being another
177
- * reason for a constant to start with a '-'.
178
- */
179
- static void
180
- fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
181
- {
182
- pgssLocationLen *locs;
183
- core_yyscan_t yyscanner;
184
- core_yy_extra_type yyextra;
185
- core_YYSTYPE yylval;
186
- YYLTYPE yylloc;
187
- int last_loc = -1;
188
- int i;
189
-
190
- /*
191
- * Sort the records by location so that we can process them in order while
192
- * scanning the query text.
193
- */
194
- if (jstate->clocations_count > 1)
195
- qsort(jstate->clocations, jstate->clocations_count,
196
- sizeof(pgssLocationLen), comp_location);
197
- locs = jstate->clocations;
198
-
199
- /* initialize the flex scanner --- should match raw_parser() */
200
- yyscanner = scanner_init(query,
201
- &yyextra,
202
- ScanKeywords,
203
- NumScanKeywords);
204
-
205
- /* Search for each constant, in sequence */
206
- for (i = 0; i < jstate->clocations_count; i++)
207
- {
208
- int loc = locs[i].location;
209
- int tok;
210
-
211
- Assert(loc >= 0);
212
-
213
- if (loc <= last_loc)
214
- continue; /* Duplicate constant, ignore */
215
-
216
- /* Lex tokens until we find the desired constant */
217
- for (;;)
218
- {
219
- tok = core_yylex(&yylval, &yylloc, yyscanner);
220
-
221
- /* We should not hit end-of-string, but if we do, behave sanely */
222
- if (tok == 0)
223
- break; /* out of inner for-loop */
224
-
225
- /*
226
- * We should find the token position exactly, but if we somehow
227
- * run past it, work with that.
228
- */
229
- if (yylloc >= loc)
230
- {
231
- if (query[loc] == '-')
232
- {
233
- /*
234
- * It's a negative value - this is the one and only case
235
- * where we replace more than a single token.
236
- *
237
- * Do not compensate for the core system's special-case
238
- * adjustment of location to that of the leading '-'
239
- * operator in the event of a negative constant. It is
240
- * also useful for our purposes to start from the minus
241
- * symbol. In this way, queries like "select * from foo
242
- * where bar = 1" and "select * from foo where bar = -2"
243
- * will have identical normalized query strings.
244
- */
245
- tok = core_yylex(&yylval, &yylloc, yyscanner);
246
- if (tok == 0)
247
- break; /* out of inner for-loop */
248
- }
249
-
250
- /*
251
- * We now rely on the assumption that flex has placed a zero
252
- * byte after the text of the current token in scanbuf.
253
- */
254
- locs[i].length = (int) strlen(yyextra.scanbuf + loc);
255
-
256
- /* Quoted string with Unicode escapes
257
- *
258
- * The lexer consumes trailing whitespace in order to find UESCAPE, but if there
259
- * is no UESCAPE it has still consumed it - don't include it in constant length.
260
- */
261
- if (locs[i].length > 4 && /* U&'' */
262
- (yyextra.scanbuf[loc] == 'u' || yyextra.scanbuf[loc] == 'U') &&
263
- yyextra.scanbuf[loc + 1] == '&' && yyextra.scanbuf[loc + 2] == '\'')
264
- {
265
- int j = locs[i].length - 1; /* Skip the \0 */
266
- for (; j >= 0 && scanner_isspace(yyextra.scanbuf[loc + j]); j--) {}
267
- locs[i].length = j + 1; /* Count the \0 */
268
- }
269
-
270
- break; /* out of inner for-loop */
271
- }
272
- }
273
-
274
- /* If we hit end-of-string, give up, leaving remaining lengths -1 */
275
- if (tok == 0)
276
- break;
277
-
278
- last_loc = loc;
279
- }
280
-
281
- scanner_finish(yyscanner);
282
- }
283
-
284
- /*
285
- * Generate a normalized version of the query string that will be used to
286
- * represent all similar queries.
287
- *
288
- * Note that the normalized representation may well vary depending on
289
- * just which "equivalent" query is used to create the hashtable entry.
290
- * We assume this is OK.
291
- *
292
- * *query_len_p contains the input string length, and is updated with
293
- * the result string length (which cannot be longer) on exit.
294
- *
295
- * Returns a palloc'd string.
296
- */
297
- static char *
298
- generate_normalized_query(pgssConstLocations *jstate, const char *query,
299
- int *query_len_p, int encoding)
300
- {
301
- char *norm_query;
302
- int query_len = *query_len_p;
303
- int i,
304
- len_to_wrt, /* Length (in bytes) to write */
305
- quer_loc = 0, /* Source query byte location */
306
- n_quer_loc = 0, /* Normalized query byte location */
307
- last_off = 0, /* Offset from start for previous tok */
308
- last_tok_len = 0; /* Length (in bytes) of that tok */
309
-
310
- /*
311
- * Get constants' lengths (core system only gives us locations). Note
312
- * this also ensures the items are sorted by location.
313
- */
314
- fill_in_constant_lengths(jstate, query);
315
-
316
- /* Allocate result buffer */
317
- norm_query = palloc(query_len + 1);
318
-
319
- for (i = 0; i < jstate->clocations_count; i++)
320
- {
321
- int off, /* Offset from start for cur tok */
322
- tok_len; /* Length (in bytes) of that tok */
323
-
324
- off = jstate->clocations[i].location;
325
- tok_len = jstate->clocations[i].length;
326
-
327
- if (tok_len < 0)
328
- continue; /* ignore any duplicates */
329
-
330
- /* Copy next chunk (what precedes the next constant) */
331
- len_to_wrt = off - last_off;
332
- len_to_wrt -= last_tok_len;
333
-
334
- Assert(len_to_wrt >= 0);
335
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
336
- n_quer_loc += len_to_wrt;
337
-
338
- /* And insert a '?' in place of the constant token */
339
- norm_query[n_quer_loc++] = '?';
340
-
341
- quer_loc = off + tok_len;
342
- last_off = off;
343
- last_tok_len = tok_len;
344
- }
345
-
346
- /*
347
- * We've copied up until the last ignorable constant. Copy over the
348
- * remaining bytes of the original query string.
349
- */
350
- len_to_wrt = query_len - quer_loc;
351
-
352
- Assert(len_to_wrt >= 0);
353
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
354
- n_quer_loc += len_to_wrt;
355
-
356
- Assert(n_quer_loc <= query_len);
357
- norm_query[n_quer_loc] = '\0';
358
-
359
- *query_len_p = n_quer_loc;
360
- return norm_query;
361
- }
362
-
363
- static bool const_record_walker(Node *node, pgssConstLocations *jstate)
364
- {
365
- bool result;
366
-
367
- if (node == NULL) return false;
368
-
369
- if (IsA(node, A_Const) && ((A_Const *) node)->location >= 0)
370
- {
371
- /* enlarge array if needed */
372
- if (jstate->clocations_count >= jstate->clocations_buf_size)
373
- {
374
- jstate->clocations_buf_size *= 2;
375
- jstate->clocations = (pgssLocationLen *)
376
- repalloc(jstate->clocations,
377
- jstate->clocations_buf_size *
378
- sizeof(pgssLocationLen));
379
- }
380
- jstate->clocations[jstate->clocations_count].location = ((A_Const *) node)->location;
381
- /* initialize lengths to -1 to simplify fill_in_constant_lengths */
382
- jstate->clocations[jstate->clocations_count].length = -1;
383
- jstate->clocations_count++;
384
- }
385
- else if (IsA(node, VariableSetStmt))
386
- {
387
- return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
388
- }
389
- else if (IsA(node, CopyStmt))
390
- {
391
- return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
392
- }
393
-
394
- PG_TRY();
395
- {
396
- result = raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
397
- }
398
- PG_CATCH();
399
- {
400
- FlushErrorState();
401
- result = false;
402
- }
403
- PG_END_TRY();
404
-
405
- return result;
406
- }
407
-
408
- static VALUE pg_query_normalize(VALUE self, VALUE input)
409
- {
410
- Check_Type(input, T_STRING);
411
-
412
- MemoryContext ctx = NULL;
413
- VALUE result = Qnil;
414
- VALUE error = Qnil;
415
-
416
- ctx = AllocSetContextCreate(TopMemoryContext,
417
- "pg_query_normalize",
418
- ALLOCSET_DEFAULT_MINSIZE,
419
- ALLOCSET_DEFAULT_INITSIZE,
420
- ALLOCSET_DEFAULT_MAXSIZE);
421
- MemoryContextSwitchTo(ctx);
422
-
423
- PG_TRY();
424
- {
425
- List *tree;
426
- char *str;
427
- pgssConstLocations jstate;
428
- int query_len;
429
-
430
- /* Parse query */
431
- str = StringValueCStr(input);
432
- tree = raw_parser(str);
433
-
434
- /* Set up workspace for constant recording */
435
- jstate.clocations_buf_size = 32;
436
- jstate.clocations = (pgssLocationLen *)
437
- palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
438
- jstate.clocations_count = 0;
439
-
440
- /* Walk tree and record const locations */
441
- const_record_walker((Node *) tree, &jstate);
442
-
443
- /* Normalize query */
444
- query_len = (int) strlen(str);
445
- str = generate_normalized_query(&jstate, str, &query_len, PG_UTF8);
446
-
447
- result = rb_str_new2(str);
448
-
449
- pfree(str);
450
- }
451
- PG_CATCH();
452
- {
453
- ErrorData* error_data = CopyErrorData();
454
- error = new_parse_error(error_data);
455
- FlushErrorState();
456
- }
457
- PG_END_TRY();
458
-
459
- MemoryContextSwitchTo(TopMemoryContext);
460
- MemoryContextDelete(ctx);
461
-
462
- // If we got an error, throw it
463
- if (!NIL_P(error)) rb_exc_raise(error);
464
-
465
- return result;
466
- }
467
-
468
5
  void Init_pg_query(void)
469
6
  {
470
7
  VALUE cPgQuery;
471
-
8
+
472
9
  MemoryContextInit();
473
10
 
474
11
  cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
475
12
 
476
13
  rb_define_singleton_method(cPgQuery, "_raw_parse", pg_query_raw_parse, 1);
477
14
  rb_define_singleton_method(cPgQuery, "normalize", pg_query_normalize, 1);
478
- }
15
+ }