RubyGems - pg_query - Versions diffs - 0.1.2 → 0.2.0 - Mend

pg_query 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +8 -8
data/Rakefile +7 -1
data/ext/pg_query/extconf.rb +2 -2
data/ext/pg_query/pg_query.c +332 -1
data/lib/pg_query.rb +1 -2
data/lib/pg_query/version.rb +1 -1
metadata +2 -3
data/lib/pg_query/parse_normalized.rb +0 -51

checksums.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    ZWQ2ODM2MzRjNWVkODM4ZWJhMjliN2Y1Yjg4NzE4MzljYmUzM2ZkYw==
+    Zjc4NWU4NWY5MzMzNjYxMTA4NzU0ZDg2OWQ2OGQ4ZTQwZDBlMTRhYw==
   data.tar.gz: !binary |-
-    MDMwMjdmODc1MjM1YzAwMTZhNDg4ZDk2Y2YxMjVhZWMzOGNlODNmNw==
+    YzEzNzc3YjlkNzcxNTUxOGEwOGYyZjdjMWM5YzE4Y2EwY2QwODc0NA==
 SHA512:
   metadata.gz: !binary |-
-    MTAzODFhNjEyNWJmNmVhNzIxNGYwYWMzZWY0ZGE3YTAzOWYzOTMzZmVjZTA5
-    YWUyMGViNTgxMGIzOGU3NjRmZDMyZmUwMDBlNDUxMmZiYmYwZTMzNGY1MzE1
-    M2M3YTU3NGJkY2Y2N2I0NDI1NzIzZTEzY2ZlYTAyZWFiNTFmNGQ=
+    ZmFkNDYwYjk1OGU1MGYzNjdkZDJjNzRmOGFjNWVkYzMxNTBlNjk1ZTI2YjQ3
+    OTQxNTYxZTZkNzZhYjEyNzczMDYwMGFlMjdlNmRmNGNlZmU1MGFjMjg5MjU5
+    ZGI5OGFlYWUzYjZhNGQ3MDFmMjQ5NzdjMGM1OGJlNWNiYTI5OGQ=
   data.tar.gz: !binary |-
-    ZWI2YzI0Mjc5NzIzMDU1MGJiNzIyNzIzMGI1Y2UwMmI1MGE4NTk5ZThjYjE4
-    YTliZTkxMDM3MjM2OGQxODZjY2JhZGZkYTNkNWQ1MjMwNGVlMGM4OWEyMzc0
-    YTk4NmRmOTRmOTc0ZTJjOTQ3NDZiMGQxNGYxYjVkMWYzNzIyZWE=
+    MDY3N2U2NGUzNzg2ZjRjMTdmMzhkMDBmYjY4ODBjM2MxMGFhZGYwM2UzMzAx
+    OTFkMzIxMDExODM0MjhhZGY5MzhiNjMxZDg2OTNiMTU4NjE2MzQzMDEzNjhl
+    YTE5ODc2ODRjNDM1ZTI1NTM2YjdkZTQxNDE4ZjljOGRiNThkYjA=

data/Rakefile CHANGED Viewed

@@ -11,4 +11,10 @@ RSpec::Core::RakeTask.new
 task spec: :compile
 task default: :spec
-task test: :spec
+task test: :spec
+task :clean do
+  FileUtils.rm_rf File.join(File.dirname(__FILE__), "tmp/")
+  FileUtils.rm_f Dir.glob(File.join(File.dirname(__FILE__), "ext/pg_query/*.o"))
+  FileUtils.rm_f File.join(File.dirname(__FILE__), "lib/pg_query/pg_query.bundle")
+end

data/ext/pg_query/extconf.rb CHANGED Viewed

@@ -8,10 +8,10 @@ pgdir = File.join(workdir, "postgres")
 # Note: We intentionally use a patched version that fixes bugs in outfuncs.c
 if !Dir.exists?(pgdir)
   unless File.exists?("#{workdir}/postgres.zip")
-    system("curl https://codeload.github.com/pganalyze/postgres/zip/more-outfuncs -o #{workdir}/postgres.zip") || raise("ERROR")
+    system("curl https://codeload.github.com/pganalyze/postgres/zip/pg_query -o #{workdir}/postgres.zip") || raise("ERROR")
   end
   system("unzip -q #{workdir}/postgres.zip -d #{workdir}") || raise("ERROR")
-  system("mv #{workdir}/postgres-more-outfuncs #{pgdir}") || raise("ERROR")
+  system("mv #{workdir}/postgres-pg_query #{pgdir}") || raise("ERROR")
   system("cd #{pgdir}; CFLAGS=-fPIC ./configure") || raise("ERROR")
   system("cd #{pgdir}; make") || raise("ERROR")
 end

data/ext/pg_query/pg_query.c CHANGED Viewed

@@ -1,7 +1,10 @@
 #include "postgres.h"
 #include "utils/memutils.h"
 #include "parser/parser.h"
+#include "parser/scanner.h"
 #include "nodes/print.h"
+#include "nodes/nodeFuncs.h"
+#include "mb/pg_wchar.h"
 #include <unistd.h>
 #include <fcntl.h>
@@ -40,7 +43,7 @@ static VALUE pg_query_raw_parse(VALUE self, VALUE input)
 	int stderr_pipe[2];
 	ctx = AllocSetContextCreate(TopMemoryContext,
-								"RootContext",
+								"pg_query_raw_parse",
 								ALLOCSET_DEFAULT_MINSIZE,
 								ALLOCSET_DEFAULT_INITSIZE,
 								ALLOCSET_DEFAULT_MAXSIZE);
@@ -96,6 +99,333 @@ static VALUE pg_query_raw_parse(VALUE self, VALUE input)
 	return result;
 }
+/*
+ * Struct for tracking locations/lengths of constants during normalization
+ */
+typedef struct pgssLocationLen
+{
+	int			location;		/* start offset in query text */
+	int			length;			/* length in bytes, or -1 to ignore */
+} pgssLocationLen;
+/*
+ * Working state for constant tree walker
+ */
+typedef struct pgssConstLocations
+{
+	/* Array of locations of constants that should be removed */
+	pgssLocationLen *clocations;
+	/* Allocated length of clocations array */
+	int			clocations_buf_size;
+	/* Current number of valid entries in clocations array */
+	int			clocations_count;
+} pgssConstLocations;
+/*
+ * comp_location: comparator for qsorting pgssLocationLen structs by location
+ */
+static int
+comp_location(const void *a, const void *b)
+{
+	int			l = ((const pgssLocationLen *) a)->location;
+	int			r = ((const pgssLocationLen *) b)->location;
+	if (l < r)
+		return -1;
+	else if (l > r)
+		return +1;
+	else
+		return 0;
+}
+/*
+ * Given a valid SQL string and an array of constant-location records,
+ * fill in the textual lengths of those constants.
+ *
+ * The constants may use any allowed constant syntax, such as float literals,
+ * bit-strings, single-quoted strings and dollar-quoted strings.  This is
+ * accomplished by using the public API for the core scanner.
+ *
+ * It is the caller's job to ensure that the string is a valid SQL statement
+ * with constants at the indicated locations.  Since in practice the string
+ * has already been parsed, and the locations that the caller provides will
+ * have originated from within the authoritative parser, this should not be
+ * a problem.
+ *
+ * Duplicate constant pointers are possible, and will have their lengths
+ * marked as '-1', so that they are later ignored.  (Actually, we assume the
+ * lengths were initialized as -1 to start with, and don't change them here.)
+ *
+ * N.B. There is an assumption that a '-' character at a Const location begins
+ * a negative numeric constant.  This precludes there ever being another
+ * reason for a constant to start with a '-'.
+ */
+static void
+fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
+{
+	pgssLocationLen *locs;
+	core_yyscan_t yyscanner;
+	core_yy_extra_type yyextra;
+	core_YYSTYPE yylval;
+	YYLTYPE		yylloc;
+	int			last_loc = -1;
+	int			i;
+	/*
+	 * Sort the records by location so that we can process them in order while
+	 * scanning the query text.
+	 */
+	if (jstate->clocations_count > 1)
+		qsort(jstate->clocations, jstate->clocations_count,
+			  sizeof(pgssLocationLen), comp_location);
+	locs = jstate->clocations;
+	/* initialize the flex scanner --- should match raw_parser() */
+	yyscanner = scanner_init(query,
+							 &yyextra,
+							 ScanKeywords,
+							 NumScanKeywords);
+	/* Search for each constant, in sequence */
+	for (i = 0; i < jstate->clocations_count; i++)
+	{
+		int			loc = locs[i].location;
+		int			tok;
+		Assert(loc >= 0);
+		if (loc <= last_loc)
+			continue;			/* Duplicate constant, ignore */
+		/* Lex tokens until we find the desired constant */
+		for (;;)
+		{
+			tok = core_yylex(&yylval, &yylloc, yyscanner);
+			/* We should not hit end-of-string, but if we do, behave sanely */
+			if (tok == 0)
+				break;			/* out of inner for-loop */
+			/*
+			 * We should find the token position exactly, but if we somehow
+			 * run past it, work with that.
+			 */
+			if (yylloc >= loc)
+			{
+				if (query[loc] == '-')
+				{
+					/*
+					 * It's a negative value - this is the one and only case
+					 * where we replace more than a single token.
+					 *
+					 * Do not compensate for the core system's special-case
+					 * adjustment of location to that of the leading '-'
+					 * operator in the event of a negative constant.  It is
+					 * also useful for our purposes to start from the minus
+					 * symbol.  In this way, queries like "select * from foo
+					 * where bar = 1" and "select * from foo where bar = -2"
+					 * will have identical normalized query strings.
+					 */
+					tok = core_yylex(&yylval, &yylloc, yyscanner);
+					if (tok == 0)
+						break;	/* out of inner for-loop */
+				}
+				/*
+				 * We now rely on the assumption that flex has placed a zero
+				 * byte after the text of the current token in scanbuf.
+				 */
+				locs[i].length = (int) strlen(yyextra.scanbuf + loc);
+				break;			/* out of inner for-loop */
+			}
+		}
+		/* If we hit end-of-string, give up, leaving remaining lengths -1 */
+		if (tok == 0)
+			break;
+		last_loc = loc;
+	}
+	scanner_finish(yyscanner);
+}
+/*
+ * Generate a normalized version of the query string that will be used to
+ * represent all similar queries.
+ *
+ * Note that the normalized representation may well vary depending on
+ * just which "equivalent" query is used to create the hashtable entry.
+ * We assume this is OK.
+ *
+ * *query_len_p contains the input string length, and is updated with
+ * the result string length (which cannot be longer) on exit.
+ *
+ * Returns a palloc'd string.
+ */
+static char *
+generate_normalized_query(pgssConstLocations *jstate, const char *query,
+						  int *query_len_p, int encoding)
+{
+	char	   *norm_query;
+	int			query_len = *query_len_p;
+	int			i,
+				len_to_wrt,		/* Length (in bytes) to write */
+				quer_loc = 0,	/* Source query byte location */
+				n_quer_loc = 0, /* Normalized query byte location */
+				last_off = 0,	/* Offset from start for previous tok */
+				last_tok_len = 0;		/* Length (in bytes) of that tok */
+	/*
+	 * Get constants' lengths (core system only gives us locations).  Note
+	 * this also ensures the items are sorted by location.
+	 */
+	fill_in_constant_lengths(jstate, query);
+	/* Allocate result buffer */
+	norm_query = palloc(query_len + 1);
+	for (i = 0; i < jstate->clocations_count; i++)
+	{
+		int			off,		/* Offset from start for cur tok */
+					tok_len;	/* Length (in bytes) of that tok */
+		off = jstate->clocations[i].location;
+		tok_len = jstate->clocations[i].length;
+		if (tok_len < 0)
+			continue;			/* ignore any duplicates */
+		/* Copy next chunk (what precedes the next constant) */
+		len_to_wrt = off - last_off;
+		len_to_wrt -= last_tok_len;
+		Assert(len_to_wrt >= 0);
+		memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+		n_quer_loc += len_to_wrt;
+		/* And insert a '?' in place of the constant token */
+		norm_query[n_quer_loc++] = '?';
+		quer_loc = off + tok_len;
+		last_off = off;
+		last_tok_len = tok_len;
+	}
+	/*
+	 * We've copied up until the last ignorable constant.  Copy over the
+	 * remaining bytes of the original query string.
+	 */
+	len_to_wrt = query_len - quer_loc;
+	Assert(len_to_wrt >= 0);
+	memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+	n_quer_loc += len_to_wrt;
+	Assert(n_quer_loc <= query_len);
+	norm_query[n_quer_loc] = '\0';
+	*query_len_p = n_quer_loc;
+	return norm_query;
+}
+bool const_record_walker(Node *node, pgssConstLocations *jstate)
+{
+	if (node == NULL) return false;
+	if (IsA(node, A_Const) && ((A_Const *) node)->location >= 0)
+	{
+		/* enlarge array if needed */
+		if (jstate->clocations_count >= jstate->clocations_buf_size)
+		{
+			jstate->clocations_buf_size *= 2;
+			jstate->clocations = (pgssLocationLen *)
+				repalloc(jstate->clocations,
+						 jstate->clocations_buf_size *
+						 sizeof(pgssLocationLen));
+		}
+		jstate->clocations[jstate->clocations_count].location = ((A_Const *) node)->location;
+		/* initialize lengths to -1 to simplify fill_in_constant_lengths */
+		jstate->clocations[jstate->clocations_count].length = -1;
+		jstate->clocations_count++;
+	}
+	//else if (isA(node, Query))
+	//{
+	//	return query_tree_walker(node, const_record_walker, jstate, 0);
+	//}
+	PG_TRY();
+	{
+		return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
+	}
+	PG_CATCH();
+	{
+		return false;
+	}
+	PG_END_TRY();
+}
+static VALUE pg_query_normalize(VALUE self, VALUE input)
+{
+	Check_Type(input, T_STRING);
+	MemoryContext ctx = NULL;
+	VALUE result;
+	ErrorData* error = NULL;
+	ctx = AllocSetContextCreate(TopMemoryContext,
+								"pg_query_normalize",
+								ALLOCSET_DEFAULT_MINSIZE,
+								ALLOCSET_DEFAULT_INITSIZE,
+								ALLOCSET_DEFAULT_MAXSIZE);
+	MemoryContextSwitchTo(ctx);
+	PG_TRY();
+	{
+		List *tree;
+		char *str;
+		pgssConstLocations jstate;
+		int query_len;
+		/* Parse query */
+		str = StringValueCStr(input);
+		tree = raw_parser(str);
+		/* Set up workspace for constant recording */
+		jstate.clocations_buf_size = 32;
+		jstate.clocations = (pgssLocationLen *)
+			palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
+		jstate.clocations_count = 0;
+		/* Walk tree and record const locations */
+		const_record_walker((Node *) tree, &jstate);
+		/* Normalize query */
+		query_len = (int) strlen(str);
+		str = generate_normalized_query(&jstate, str, &query_len, PG_UTF8);
+		result = rb_tainted_str_new_cstr(str);
+		pfree(str);
+	}
+	PG_CATCH();
+	{
+		error = CopyErrorData();
+		FlushErrorState();
+	}
+	PG_END_TRY();
+	MemoryContextSwitchTo(TopMemoryContext);
+	MemoryContextDelete(ctx);
+	// If we got an error, throw a ParseError exception
+	if (error) raise_parse_error(error);
+	return result;
+}
 void Init_pg_query(void)
 {
 	VALUE cPgQuery;
@@ -105,4 +435,5 @@ void Init_pg_query(void)
 	cPgQuery = rb_const_get(rb_cObject, rb_intern("PgQuery"));
 	rb_define_singleton_method(cPgQuery, "_raw_parse", pg_query_raw_parse, 1);
+	rb_define_singleton_method(cPgQuery, "normalize", pg_query_normalize, 1);
 }

data/lib/pg_query.rb CHANGED Viewed

@@ -2,5 +2,4 @@ require 'pg_query/version'
 require 'pg_query/parse_error'
 require 'pg_query/pg_query'
-require 'pg_query/parse'
-require 'pg_query/parse_normalized'
+require 'pg_query/parse'

data/lib/pg_query/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class PgQuery
-  VERSION = '0.1.2'
+  VERSION = '0.2.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: pg_query
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.2.0
 platform: ruby
 authors:
 - Lukas Fittl
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-05-13 00:00:00.000000000 Z
+date: 2014-05-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake-compiler
@@ -67,7 +67,6 @@ files:
 - lib/pg_query.rb
 - lib/pg_query/parse.rb
 - lib/pg_query/parse_error.rb
-- lib/pg_query/parse_normalized.rb
 - lib/pg_query/version.rb
 homepage: http://github.com/pganalyze/pg_query
 licenses:

data/lib/pg_query/parse_normalized.rb DELETED Viewed

@@ -1,51 +0,0 @@
-class PgQuery
-  # Parses a query that has been normalized by pg_stat_statements
-  def self.parse_normalized(original_query)
-    # Transform ? into \uFFED
-    query = normalized_to_parseable_query(original_query)
-    # Parse it!
-    result = parse(query)
-    # Transform \uFFED references as if they were $0
-    parsed_to_normalized_parsetree!(result.parsetree)
-    PgQuery.new(original_query, result.parsetree, result.warnings)
-  end
-protected
-  # The PostgreSQL parser doesn't understand pg_stat_statements replacement characters,
-  # change them into a fake column reference to an unusual unicode character \uFFED
-  def self.normalized_to_parseable_query(query)
-    regexps = [
-      'INTERVAL ?',
-      /\$[0-9]+\?/,
-      '?.?',
-      /(?<!\\)\?/, # Replace all ?, unless they are escaped by a backslash
-    ]
-    regexps.each do |re|
-      query = query.gsub(re) {|m| "\uFFED" * m.size }
-    end
-    query
-  end
-  # Modifies the passed in parsetree to have paramrefs to $0 instead of columnref to \uFFED
-  def self.parsed_to_normalized_parsetree!(parsetree)
-    expressions = parsetree.dup
-    loop do
-      break unless expression = expressions.shift
-      if expression.is_a?(Array)
-        expressions += expression.compact
-      elsif expression.is_a?(Hash)
-        value = expression['COLUMNREF'] && expression['COLUMNREF']['fields']
-        if value && value.size == 1 && value[0].is_a?(String) && value[0].chars.to_a.uniq == ["\uFFED"]
-          expression.replace('PARAMREF' => {'number' => 0,
-                                            'location' => expression['COLUMNREF']['location']})
-        else
-          expressions += expression.values.compact
-        end
-      end
-    end
-  end
-end