pg_query 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +13 -10
- data/ext/pg_query/extconf.rb +16 -14
- data/ext/pg_query/pg_query.c +3 -466
- data/ext/pg_query/pg_query.h +18 -0
- data/ext/pg_query/pg_query_normalize.c +363 -0
- data/ext/pg_query/pg_query_parse.c +102 -0
- data/lib/pg_query.rb +4 -0
- data/lib/pg_query/filter_columns.rb +52 -39
- data/lib/pg_query/fingerprint.rb +25 -50
- data/lib/pg_query/param_refs.rb +37 -32
- data/lib/pg_query/parse.rb +65 -45
- data/lib/pg_query/parse_error.rb +1 -1
- data/lib/pg_query/version.rb +1 -1
- metadata +35 -4
data/lib/pg_query.rb
CHANGED
@@ -3,6 +3,10 @@ require 'pg_query/parse_error'
|
|
3
3
|
|
4
4
|
require 'pg_query/pg_query'
|
5
5
|
require 'pg_query/parse'
|
6
|
+
require 'pg_query/treewalker'
|
7
|
+
|
6
8
|
require 'pg_query/filter_columns'
|
7
9
|
require 'pg_query/fingerprint'
|
8
10
|
require 'pg_query/param_refs'
|
11
|
+
require 'pg_query/deparse'
|
12
|
+
require 'pg_query/truncate'
|
@@ -3,7 +3,7 @@ class PgQuery
|
|
3
3
|
# target list, but includes things like JOIN condition and WHERE clause.
|
4
4
|
#
|
5
5
|
# Note: This also traverses into sub-selects.
|
6
|
-
def filter_columns
|
6
|
+
def filter_columns # rubocop:disable Metrics/CyclomaticComplexity
|
7
7
|
load_tables_and_aliases! if @aliases.nil?
|
8
8
|
|
9
9
|
# Get condition items from the parsetree
|
@@ -11,53 +11,63 @@ class PgQuery
|
|
11
11
|
condition_items = []
|
12
12
|
filter_columns = []
|
13
13
|
loop do
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
statement = statements.shift
|
15
|
+
if statement
|
16
|
+
if statement['SELECT']
|
17
|
+
if statement['SELECT']['op'] == 0
|
18
|
+
if statement['SELECT']['fromClause']
|
18
19
|
# FROM subselects
|
19
|
-
statement[
|
20
|
-
|
20
|
+
statement['SELECT']['fromClause'].each do |item|
|
21
|
+
next unless item['RANGESUBSELECT']
|
22
|
+
statements << item['RANGESUBSELECT']['subquery']
|
21
23
|
end
|
22
24
|
|
23
25
|
# JOIN ON conditions
|
24
|
-
condition_items += conditions_from_join_clauses(statement[
|
26
|
+
condition_items += conditions_from_join_clauses(statement['SELECT']['fromClause'])
|
25
27
|
end
|
26
28
|
|
27
29
|
# WHERE clause
|
28
|
-
condition_items << statement[
|
29
|
-
|
30
|
-
|
31
|
-
|
30
|
+
condition_items << statement['SELECT']['whereClause'] if statement['SELECT']['whereClause']
|
31
|
+
|
32
|
+
# CTEs
|
33
|
+
if statement['SELECT']['withClause']
|
34
|
+
statement['SELECT']['withClause']['WITHCLAUSE']['ctes'].each do |item|
|
35
|
+
statements << item['COMMONTABLEEXPR']['ctequery'] if item['COMMONTABLEEXPR']
|
36
|
+
end
|
37
|
+
end
|
38
|
+
elsif statement['SELECT']['op'] == 1
|
39
|
+
statements << statement['SELECT']['larg'] if statement['SELECT']['larg']
|
40
|
+
statements << statement['SELECT']['rarg'] if statement['SELECT']['rarg']
|
32
41
|
end
|
33
|
-
elsif statement[
|
34
|
-
condition_items << statement[
|
35
|
-
elsif statement[
|
36
|
-
condition_items << statement[
|
42
|
+
elsif statement['UPDATE']
|
43
|
+
condition_items << statement['UPDATE']['whereClause'] if statement['UPDATE']['whereClause']
|
44
|
+
elsif statement['DELETE FROM']
|
45
|
+
condition_items << statement['DELETE FROM']['whereClause'] if statement['DELETE FROM']['whereClause']
|
37
46
|
end
|
38
47
|
end
|
39
48
|
|
40
49
|
# Process both JOIN and WHERE conditions here
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
next_item = condition_items.shift
|
51
|
+
if next_item
|
52
|
+
if next_item.keys[0].start_with?('AEXPR') || next_item['ANY']
|
53
|
+
%w(lexpr rexpr).each do |side|
|
54
|
+
expr = next_item.values[0][side]
|
55
|
+
next unless expr && expr.is_a?(Hash)
|
46
56
|
condition_items << expr
|
47
57
|
end
|
48
|
-
elsif next_item[
|
49
|
-
condition_items += next_item[
|
50
|
-
elsif next_item[
|
51
|
-
column, table = next_item[
|
58
|
+
elsif next_item['ROW']
|
59
|
+
condition_items += next_item['ROW']['args']
|
60
|
+
elsif next_item['COLUMNREF']
|
61
|
+
column, table = next_item['COLUMNREF']['fields'].reverse
|
52
62
|
filter_columns << [@aliases[table] || table, column]
|
53
|
-
elsif next_item[
|
54
|
-
condition_items << next_item[
|
55
|
-
elsif next_item[
|
63
|
+
elsif next_item['NULLTEST']
|
64
|
+
condition_items << next_item['NULLTEST']['arg']
|
65
|
+
elsif next_item['FUNCCALL']
|
56
66
|
# FIXME: This should actually be extracted as a funccall and be compared with those indices
|
57
|
-
condition_items += next_item[
|
58
|
-
elsif next_item[
|
59
|
-
condition_items << next_item[
|
60
|
-
statements << next_item[
|
67
|
+
condition_items += next_item['FUNCCALL']['args'] if next_item['FUNCCALL']['args']
|
68
|
+
elsif next_item['SUBLINK']
|
69
|
+
condition_items << next_item['SUBLINK']['testexpr']
|
70
|
+
statements << next_item['SUBLINK']['subselect']
|
61
71
|
end
|
62
72
|
end
|
63
73
|
|
@@ -67,18 +77,21 @@ class PgQuery
|
|
67
77
|
filter_columns.uniq
|
68
78
|
end
|
69
79
|
|
70
|
-
protected
|
80
|
+
protected
|
81
|
+
|
71
82
|
def conditions_from_join_clauses(from_clause)
|
72
83
|
condition_items = []
|
73
84
|
from_clause.each do |item|
|
74
|
-
next unless item[
|
85
|
+
next unless item['JOINEXPR']
|
75
86
|
|
76
|
-
joinexpr_items = [item[
|
87
|
+
joinexpr_items = [item['JOINEXPR']]
|
77
88
|
loop do
|
78
|
-
|
79
|
-
|
80
|
-
[
|
81
|
-
|
89
|
+
next_item = joinexpr_items.shift
|
90
|
+
break unless next_item
|
91
|
+
condition_items << next_item['quals'] if next_item['quals']
|
92
|
+
%w(larg rarg).each do |side|
|
93
|
+
next unless next_item[side]['JOINEXPR']
|
94
|
+
joinexpr_items << next_item[side]['JOINEXPR']
|
82
95
|
end
|
83
96
|
end
|
84
97
|
end
|
data/lib/pg_query/fingerprint.rb
CHANGED
@@ -1,62 +1,37 @@
|
|
1
1
|
require 'digest'
|
2
2
|
|
3
3
|
class PgQuery
|
4
|
-
def fingerprint
|
4
|
+
def fingerprint # rubocop:disable Metrics/CyclomaticComplexity
|
5
5
|
normalized_parsetree = deep_dup(parsetree)
|
6
|
-
exprs = normalized_parsetree.dup
|
7
|
-
loop do
|
8
|
-
expr = exprs.shift
|
9
6
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
elsif !v.nil?
|
19
|
-
# Remove SELECT target list names & ignore order
|
20
|
-
if k == "targetList" && v.is_a?(Array)
|
21
|
-
v.each {|v| v["RESTARGET"]["name"] = nil if v["RESTARGET"] } # Remove names
|
22
|
-
v.sort_by! {|v| v.to_s }
|
23
|
-
expr[k] = v
|
24
|
-
end
|
25
|
-
|
26
|
-
# Ignore INSERT cols order
|
27
|
-
if k == "cols" && v.is_a?(Array)
|
28
|
-
v.sort_by! {|v| v.to_s }
|
29
|
-
expr[k] = v
|
30
|
-
end
|
31
|
-
|
32
|
-
# Process sub-expressions
|
33
|
-
exprs << v
|
34
|
-
end
|
35
|
-
end
|
36
|
-
elsif expr.is_a?(Array)
|
37
|
-
exprs += expr
|
7
|
+
# First delete all simple elements and attributes that can be removed
|
8
|
+
treewalker! normalized_parsetree do |expr, k, v|
|
9
|
+
if v.is_a?(Hash) && %w(A_CONST ALIAS PARAMREF).include?(v.keys[0])
|
10
|
+
# Remove constants, aliases and param references from tree
|
11
|
+
expr[k] = nil
|
12
|
+
elsif k == 'location'
|
13
|
+
# Remove location info in order to ignore whitespace and target list ordering
|
14
|
+
expr.delete(k)
|
38
15
|
end
|
39
|
-
|
40
|
-
break if exprs.empty?
|
41
16
|
end
|
42
17
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
18
|
+
# Now remove all unnecessary info
|
19
|
+
treewalker! normalized_parsetree do |expr, k, v|
|
20
|
+
if k == 'AEXPR IN' && v.is_a?(Hash) && v['rexpr'].is_a?(Array)
|
21
|
+
# Compact identical IN list elements to one
|
22
|
+
v['rexpr'].uniq!
|
23
|
+
elsif k == 'targetList' && v.is_a?(Array)
|
24
|
+
# Remove SELECT target list names & ignore order
|
25
|
+
v.each { |v2| v2['RESTARGET']['name'] = nil if v2['RESTARGET'] } # Remove names
|
26
|
+
v.sort_by!(&:to_s)
|
27
|
+
expr[k] = v
|
28
|
+
elsif k == 'cols' && v.is_a?(Array)
|
29
|
+
# Ignore INSERT cols order
|
30
|
+
v.sort_by!(&:to_s)
|
31
|
+
expr[k] = v
|
53
32
|
end
|
54
|
-
when Array
|
55
|
-
obj.map { |it| deep_dup(it) }
|
56
|
-
when NilClass, FalseClass, TrueClass, Symbol, Numeric
|
57
|
-
obj # Can't be duplicated
|
58
|
-
else
|
59
|
-
obj.dup
|
60
33
|
end
|
34
|
+
|
35
|
+
Digest::SHA1.hexdigest(normalized_parsetree.to_s)
|
61
36
|
end
|
62
37
|
end
|
data/lib/pg_query/param_refs.rb
CHANGED
@@ -1,40 +1,45 @@
|
|
1
1
|
class PgQuery
|
2
|
-
def param_refs
|
2
|
+
def param_refs # rubocop:disable Metrics/CyclomaticComplexity
|
3
3
|
results = []
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
if
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
exprs << v if !v.nil?
|
4
|
+
|
5
|
+
treewalker! parsetree do |_, _, v|
|
6
|
+
next unless v.is_a?(Hash)
|
7
|
+
|
8
|
+
if v['PARAMREF']
|
9
|
+
results << { 'location' => v['PARAMREF']['location'],
|
10
|
+
'length' => param_ref_length(v['PARAMREF']) }
|
11
|
+
elsif v['TYPECAST']
|
12
|
+
next unless v['TYPECAST']['arg'] && v['TYPECAST']['typeName']
|
13
|
+
|
14
|
+
p = v['TYPECAST']['arg'].delete('PARAMREF')
|
15
|
+
t = v['TYPECAST']['typeName'].delete('TYPENAME')
|
16
|
+
next unless p && t
|
17
|
+
|
18
|
+
location = p['location']
|
19
|
+
typeloc = t['location']
|
20
|
+
typename = t['names'].join('.')
|
21
|
+
length = param_ref_length(p)
|
22
|
+
|
23
|
+
if typeloc < location
|
24
|
+
length += location - typeloc
|
25
|
+
location = typeloc
|
30
26
|
end
|
31
|
-
elsif expr.is_a?(Array)
|
32
|
-
exprs += expr
|
33
|
-
end
|
34
27
|
|
35
|
-
|
28
|
+
results << { 'location' => location, 'length' => length, 'typename' => typename }
|
29
|
+
end
|
36
30
|
end
|
37
|
-
|
31
|
+
|
32
|
+
results.sort_by! { |r| r['location'] }
|
38
33
|
results
|
39
34
|
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def param_ref_length(paramref_node)
|
39
|
+
if paramref_node['number'] == 0
|
40
|
+
1 # Actually a ? replacement character
|
41
|
+
else
|
42
|
+
('$' + paramref_node['number'].to_s).size
|
43
|
+
end
|
44
|
+
end
|
40
45
|
end
|
data/lib/pg_query/parse.rb
CHANGED
@@ -6,8 +6,8 @@ class PgQuery
|
|
6
6
|
|
7
7
|
begin
|
8
8
|
parsetree = JSON.parse(parsetree, max_nesting: 1000)
|
9
|
-
rescue JSON::ParserError
|
10
|
-
raise ParseError.new(
|
9
|
+
rescue JSON::ParserError
|
10
|
+
raise ParseError.new('Failed to parse JSON', -1)
|
11
11
|
end
|
12
12
|
|
13
13
|
warnings = []
|
@@ -22,6 +22,7 @@ class PgQuery
|
|
22
22
|
attr_reader :query
|
23
23
|
attr_reader :parsetree
|
24
24
|
attr_reader :warnings
|
25
|
+
|
25
26
|
def initialize(query, parsetree, warnings = [])
|
26
27
|
@query = query
|
27
28
|
@parsetree = parsetree
|
@@ -38,8 +39,9 @@ class PgQuery
|
|
38
39
|
@aliases
|
39
40
|
end
|
40
41
|
|
41
|
-
protected
|
42
|
-
|
42
|
+
protected
|
43
|
+
|
44
|
+
def load_tables_and_aliases! # rubocop:disable Metrics/CyclomaticComplexity
|
43
45
|
@tables = []
|
44
46
|
@aliases = {}
|
45
47
|
|
@@ -48,32 +50,47 @@ protected
|
|
48
50
|
where_clause_items = []
|
49
51
|
|
50
52
|
loop do
|
51
|
-
|
53
|
+
statement = statements.shift
|
54
|
+
if statement
|
52
55
|
case statement.keys[0]
|
53
|
-
when
|
54
|
-
if statement[
|
55
|
-
(statement[
|
56
|
-
if item[
|
57
|
-
statements << item[
|
56
|
+
when 'SELECT'
|
57
|
+
if statement['SELECT']['op'] == 0
|
58
|
+
(statement['SELECT']['fromClause'] || []).each do |item|
|
59
|
+
if item['RANGESUBSELECT']
|
60
|
+
statements << item['RANGESUBSELECT']['subquery']
|
58
61
|
else
|
59
62
|
from_clause_items << item
|
60
63
|
end
|
61
64
|
end
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
+
|
66
|
+
# CTEs
|
67
|
+
if statement['SELECT']['withClause']
|
68
|
+
statement['SELECT']['withClause']['WITHCLAUSE']['ctes'].each do |item|
|
69
|
+
statements << item['COMMONTABLEEXPR']['ctequery'] if item['COMMONTABLEEXPR']
|
70
|
+
end
|
71
|
+
end
|
72
|
+
elsif statement['SELECT']['op'] == 1
|
73
|
+
statements << statement['SELECT']['larg'] if statement['SELECT']['larg']
|
74
|
+
statements << statement['SELECT']['rarg'] if statement['SELECT']['rarg']
|
75
|
+
end
|
76
|
+
when 'INSERT INTO', 'UPDATE', 'DELETE FROM', 'VACUUM', 'COPY', 'ALTER TABLE', 'CREATESTMT', 'INDEXSTMT', 'RULESTMT', 'CREATETRIGSTMT'
|
77
|
+
from_clause_items << statement.values[0]['relation']
|
78
|
+
when 'VIEWSTMT'
|
79
|
+
from_clause_items << statement['VIEWSTMT']['view']
|
80
|
+
statements << statement['VIEWSTMT']['query']
|
81
|
+
when 'REFRESHMATVIEWSTMT'
|
82
|
+
from_clause_items << statement['REFRESHMATVIEWSTMT']['relation']
|
83
|
+
when 'EXPLAIN'
|
84
|
+
statements << statement['EXPLAIN']['query']
|
85
|
+
when 'CREATE TABLE AS'
|
86
|
+
if statement['CREATE TABLE AS']['into'] && statement['CREATE TABLE AS']['into']['INTOCLAUSE']['rel']
|
87
|
+
from_clause_items << statement['CREATE TABLE AS']['into']['INTOCLAUSE']['rel']
|
65
88
|
end
|
66
|
-
when
|
67
|
-
from_clause_items
|
68
|
-
when
|
69
|
-
|
70
|
-
|
71
|
-
from_clause_items << statement["CREATE TABLE AS"]["into"]["INTOCLAUSE"]["rel"] rescue nil
|
72
|
-
when "LOCK", "TRUNCATE"
|
73
|
-
from_clause_items += statement.values[0]["relations"]
|
74
|
-
when "GRANTSTMT"
|
75
|
-
objects = statement["GRANTSTMT"]["objects"]
|
76
|
-
case statement["GRANTSTMT"]["objtype"]
|
89
|
+
when 'LOCK', 'TRUNCATE'
|
90
|
+
from_clause_items += statement.values[0]['relations']
|
91
|
+
when 'GRANTSTMT'
|
92
|
+
objects = statement['GRANTSTMT']['objects']
|
93
|
+
case statement['GRANTSTMT']['objtype']
|
77
94
|
when 0 # Column
|
78
95
|
# FIXME
|
79
96
|
when 1 # Table
|
@@ -81,27 +98,29 @@ protected
|
|
81
98
|
when 2 # Sequence
|
82
99
|
# FIXME
|
83
100
|
end
|
84
|
-
when
|
85
|
-
objects = statement[
|
86
|
-
case statement[
|
101
|
+
when 'DROP'
|
102
|
+
objects = statement['DROP']['objects']
|
103
|
+
case statement['DROP']['removeType']
|
87
104
|
when 26 # Table
|
88
|
-
@tables += objects.map {|r| r.join('.') }
|
105
|
+
@tables += objects.map { |r| r.join('.') }
|
89
106
|
when 23 # Rule
|
90
|
-
@tables += objects.map {|r| r[0..-2].join('.') }
|
107
|
+
@tables += objects.map { |r| r[0..-2].join('.') }
|
91
108
|
when 28 # Trigger
|
92
|
-
@tables += objects.map {|r| r[0..-2].join('.') }
|
109
|
+
@tables += objects.map { |r| r[0..-2].join('.') }
|
93
110
|
end
|
94
111
|
end
|
95
112
|
|
96
|
-
where_clause_items << statement.values[0][
|
113
|
+
where_clause_items << statement.values[0]['whereClause'] if !statement.empty? && statement.values[0]['whereClause']
|
97
114
|
end
|
98
115
|
|
99
116
|
# Find subselects in WHERE clause
|
100
|
-
|
117
|
+
next_item = where_clause_items.shift
|
118
|
+
if next_item
|
101
119
|
case next_item.keys[0]
|
102
120
|
when /^AEXPR/, 'ANY'
|
103
|
-
|
104
|
-
|
121
|
+
%w(lexpr rexpr).each do |side|
|
122
|
+
elem = next_item.values[0][side]
|
123
|
+
next unless elem
|
105
124
|
if elem.is_a?(Array)
|
106
125
|
where_clause_items += elem
|
107
126
|
else
|
@@ -109,7 +128,7 @@ protected
|
|
109
128
|
end
|
110
129
|
end
|
111
130
|
when 'SUBLINK'
|
112
|
-
statements << next_item[
|
131
|
+
statements << next_item['SUBLINK']['subselect']
|
113
132
|
end
|
114
133
|
end
|
115
134
|
|
@@ -117,20 +136,21 @@ protected
|
|
117
136
|
end
|
118
137
|
|
119
138
|
loop do
|
120
|
-
|
139
|
+
next_item = from_clause_items.shift
|
140
|
+
break unless next_item
|
121
141
|
|
122
142
|
case next_item.keys[0]
|
123
|
-
when
|
124
|
-
|
125
|
-
from_clause_items << next_item[
|
143
|
+
when 'JOINEXPR'
|
144
|
+
%w(larg rarg).each do |side|
|
145
|
+
from_clause_items << next_item['JOINEXPR'][side]
|
126
146
|
end
|
127
|
-
when
|
128
|
-
from_clause_items += next_item[
|
129
|
-
when
|
130
|
-
rangevar = next_item[
|
131
|
-
table = [rangevar[
|
147
|
+
when 'ROW'
|
148
|
+
from_clause_items += next_item['ROW']['args']
|
149
|
+
when 'RANGEVAR'
|
150
|
+
rangevar = next_item['RANGEVAR']
|
151
|
+
table = [rangevar['schemaname'], rangevar['relname']].compact.join('.')
|
132
152
|
@tables << table
|
133
|
-
@aliases[rangevar[
|
153
|
+
@aliases[rangevar['alias']['ALIAS']['aliasname']] = table if rangevar['alias']
|
134
154
|
end
|
135
155
|
end
|
136
156
|
|