pgsqlarbiter 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Pgsqlarbiter
6
+ # Default set of PostgreSQL functions permitted in queries. This includes functions that could
7
+ # cause resource exhaustion (e.g. generate_series) — resource limits should be enforced elsewhere.
8
+ #
9
+ # Excluded: functions not used by regular queries such as pg_sleep, set_config, lo_*, pg_advisory_lock,
10
+ # pg_notify, sequence functions, and system information functions.
11
+ DEFAULT_QUERY_FUNCTIONS = Set[
12
+ # -- Aggregate functions --
13
+ "array_agg", "avg", "bit_and", "bit_or", "bit_xor",
14
+ "bool_and", "bool_or", "count", "every",
15
+ "json_agg", "jsonb_agg", "json_object_agg", "jsonb_object_agg",
16
+ "max", "min", "range_agg", "range_intersect_agg",
17
+ "string_agg", "sum", "xmlagg",
18
+
19
+ # -- Statistical aggregate functions --
20
+ "corr", "covar_pop", "covar_samp",
21
+ "regr_avgx", "regr_avgy", "regr_count", "regr_intercept",
22
+ "regr_r2", "regr_slope", "regr_sxx", "regr_sxy", "regr_syy",
23
+ "stddev", "stddev_pop", "stddev_samp",
24
+ "variance", "var_pop", "var_samp",
25
+
26
+ # -- Ordered-set aggregate functions --
27
+ "mode", "percentile_cont", "percentile_disc",
28
+
29
+ # -- Window functions --
30
+ "row_number", "rank", "dense_rank", "percent_rank", "cume_dist",
31
+ "ntile", "lag", "lead", "first_value", "last_value", "nth_value",
32
+
33
+ # -- Mathematical functions --
34
+ "abs", "cbrt", "ceil", "ceiling", "degrees", "div",
35
+ "exp", "factorial", "floor", "gcd", "lcm",
36
+ "ln", "log", "log10", "min_scale", "mod",
37
+ "pi", "power", "radians", "random",
38
+ "round", "scale", "sign", "sqrt",
39
+ "trim_scale", "trunc", "width_bucket",
40
+
41
+ # -- Trigonometric functions --
42
+ "acos", "acosd", "asin", "asind",
43
+ "atan", "atan2", "atan2d", "atand",
44
+ "cos", "cosd", "cot", "cotd",
45
+ "sin", "sind", "tan", "tand",
46
+
47
+ # -- Hyperbolic functions --
48
+ "sinh", "cosh", "tanh", "asinh", "acosh", "atanh",
49
+
50
+ # -- String functions --
51
+ "ascii", "btrim", "char_length", "character_length",
52
+ "chr", "concat", "concat_ws",
53
+ "convert", "convert_from", "convert_to",
54
+ "decode", "encode", "format",
55
+ "initcap", "left", "length", "lower",
56
+ "lpad", "ltrim", "md5",
57
+ "normalize", "octet_length", "overlay",
58
+ "parse_ident", "position",
59
+ "quote_ident", "quote_literal", "quote_nullable",
60
+ "regexp_count", "regexp_instr", "regexp_like",
61
+ "regexp_match", "regexp_matches", "regexp_replace",
62
+ "regexp_split_to_array", "regexp_split_to_table", "regexp_substr",
63
+ "repeat", "replace", "reverse", "right",
64
+ "rpad", "rtrim", "split_part",
65
+ "starts_with", "string_to_array", "string_to_table",
66
+ "strpos", "substr", "substring",
67
+ "to_ascii", "to_hex", "translate", "trim",
68
+ "unicode", "unistr", "upper",
69
+
70
+ # -- Binary string functions --
71
+ "bit_length", "get_bit", "get_byte",
72
+ "set_bit", "set_byte",
73
+ "sha224", "sha256", "sha384", "sha512",
74
+
75
+ # -- Date/time functions --
76
+ "age", "clock_timestamp", "date_bin",
77
+ "date_part", "date_trunc", "extract",
78
+ "isfinite", "justify_days", "justify_hours", "justify_interval",
79
+ "make_date", "make_interval", "make_time",
80
+ "make_timestamp", "make_timestamptz",
81
+ "now", "statement_timestamp",
82
+ "timeofday", "transaction_timestamp",
83
+
84
+ # -- Formatting functions --
85
+ "to_char", "to_date", "to_number", "to_timestamp",
86
+
87
+ # -- Conditional functions --
88
+ "coalesce", "nullif", "greatest", "least",
89
+
90
+ # -- Comparison functions --
91
+ "num_nulls", "num_nonnulls",
92
+
93
+ # -- JSON/JSONB functions --
94
+ "to_json", "to_jsonb", "array_to_json", "row_to_json",
95
+ "json_build_array", "jsonb_build_array",
96
+ "json_build_object", "jsonb_build_object",
97
+ "json_object", "jsonb_object",
98
+ "json_array", "jsonb_array",
99
+ "json_array_length", "jsonb_array_length",
100
+ "json_each", "jsonb_each",
101
+ "json_each_text", "jsonb_each_text",
102
+ "json_extract_path", "jsonb_extract_path",
103
+ "json_extract_path_text", "jsonb_extract_path_text",
104
+ "json_object_keys", "jsonb_object_keys",
105
+ "json_populate_record", "jsonb_populate_record",
106
+ "json_populate_recordset", "jsonb_populate_recordset",
107
+ "json_to_record", "jsonb_to_record",
108
+ "json_to_recordset", "jsonb_to_recordset",
109
+ "json_strip_nulls", "jsonb_strip_nulls",
110
+ "jsonb_set", "jsonb_set_lax", "jsonb_insert",
111
+ "jsonb_path_exists", "jsonb_path_match",
112
+ "jsonb_path_query", "jsonb_path_query_array", "jsonb_path_query_first",
113
+ "jsonb_path_exists_tz", "jsonb_path_match_tz",
114
+ "jsonb_path_query_tz", "jsonb_path_query_array_tz", "jsonb_path_query_first_tz",
115
+ "jsonb_pretty",
116
+ "json_typeof", "jsonb_typeof",
117
+ "json_array_elements", "jsonb_array_elements",
118
+ "json_array_elements_text", "jsonb_array_elements_text",
119
+ "json_scalar", "jsonb_scalar",
120
+ "json_table",
121
+
122
+ # -- Array functions --
123
+ "array_append", "array_cat", "array_dims", "array_fill",
124
+ "array_length", "array_lower", "array_ndims",
125
+ "array_position", "array_positions",
126
+ "array_prepend", "array_remove", "array_replace",
127
+ "array_sample", "array_shuffle",
128
+ "array_to_string", "array_upper",
129
+ "cardinality", "trim_array", "unnest",
130
+
131
+ # -- Range/multirange functions --
132
+ "isempty", "lower_inc", "upper_inc", "lower_inf", "upper_inf",
133
+ "range_merge", "multirange",
134
+ "int4range", "int8range", "numrange",
135
+ "tsrange", "tstzrange", "daterange",
136
+ "int4multirange", "int8multirange", "nummultirange",
137
+ "tsmultirange", "tstzmultirange", "datemultirange",
138
+
139
+ # -- Set-returning functions --
140
+ "generate_series", "generate_subscripts",
141
+
142
+ # -- Geometric functions --
143
+ "area", "center", "diagonal", "diameter", "height",
144
+ "isclosed", "isopen", "npoints",
145
+ "pclose", "popen", "radius", "slope", "width",
146
+ "box", "circle", "line", "lseg", "path", "point", "polygon",
147
+
148
+ # -- Network address functions --
149
+ "abbrev", "broadcast", "family",
150
+ "host", "hostmask", "inet_merge", "inet_same_family",
151
+ "masklen", "netmask", "network", "set_masklen",
152
+
153
+ # -- Text search functions --
154
+ "array_to_tsvector", "numnode",
155
+ "plainto_tsquery", "phraseto_tsquery",
156
+ "querytree", "setweight", "strip",
157
+ "to_tsquery", "to_tsvector",
158
+ "ts_delete", "ts_filter", "ts_headline", "ts_lexize",
159
+ "ts_rank", "ts_rank_cd", "ts_rewrite",
160
+ "tsvector_to_array", "websearch_to_tsquery",
161
+
162
+ # -- XML functions --
163
+ "xmlcomment", "xmlconcat", "xmlexists",
164
+ "xmlelement", "xmlforest", "xmlparse", "xmlroot", "xmlserialize",
165
+ "xmltable",
166
+ "xpath", "xpath_exists",
167
+
168
+ # -- Grouping function --
169
+ "grouping",
170
+
171
+ # -- Enum functions --
172
+ "enum_first", "enum_last", "enum_range",
173
+
174
+ # -- UUID functions --
175
+ "gen_random_uuid", "uuidv4", "uuidv7"
176
+ ].freeze
177
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgsqlarbiter
4
+ # Base error class for all pgsqlarbiter errors.
5
+ class Error < StandardError; end
6
+
7
+ # Raised when the lexer encounters invalid syntax (e.g. unterminated strings or unexpected characters).
8
+ class LexError < Error; end
9
+
10
+ # Raised when the analyzer encounters invalid or unparseable SQL structure.
11
+ class ParseError < Error; end
12
+
13
+ # Raised when the SQL contains more than one statement.
14
+ class MultipleStatementsError < Error; end
15
+
16
+ # Raised when the statement type is not a supported DML type (e.g. DDL, DCL, or TCL).
17
+ class DisallowedStatementError < Error; end
18
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Pgsqlarbiter
6
+ module Keywords
7
+ ALLOWED_STATEMENT_TYPES = Set[
8
+ "SELECT", "INSERT", "UPDATE", "DELETE", "MERGE", "VALUES", "WITH"
9
+ ].freeze
10
+
11
+ # Keywords that take parentheses but are NOT function calls
12
+ NON_FUNCTION_KEYWORDS = Set[
13
+ "EXISTS", "CASE", "CAST", "IN", "NOT", "ANY", "ALL", "SOME",
14
+ "ARRAY", "ROW", "VALUES", "LATERAL", "TABLE"
15
+ ].freeze
16
+
17
+ # Keywords that ARE function calls despite being reserved words
18
+ FUNCTION_KEYWORDS = Set[
19
+ "COALESCE", "NULLIF", "GREATEST", "LEAST", "EXTRACT",
20
+ "TRIM", "SUBSTRING", "OVERLAY", "POSITION", "NORMALIZE", "GROUPING",
21
+ "XMLELEMENT", "XMLFOREST", "XMLPARSE", "XMLROOT", "XMLSERIALIZE"
22
+ ].freeze
23
+
24
+ # Complete set of keywords the lexer recognizes
25
+ ALL = Set[
26
+ # Statement types
27
+ "SELECT", "INSERT", "UPDATE", "DELETE", "MERGE", "VALUES", "WITH",
28
+ # Disallowed statement types (rejected by analyzer)
29
+ "CREATE", "DROP", "ALTER", "TRUNCATE", "GRANT", "REVOKE",
30
+ "SHOW", "SET", "RESET",
31
+ "BEGIN", "START", "COMMIT", "ROLLBACK", "SAVEPOINT", "RELEASE",
32
+ "PREPARE", "EXECUTE", "DEALLOCATE",
33
+ "LISTEN", "NOTIFY", "UNLISTEN",
34
+ "LOAD", "COPY", "VACUUM", "ANALYZE", "CLUSTER", "REINDEX",
35
+ "LOCK", "DISCARD", "COMMENT", "SECURITY", "REASSIGN", "REFRESH",
36
+ "IMPORT", "CALL", "DO", "EXPLAIN",
37
+ # Structural keywords
38
+ "FROM", "JOIN", "INNER", "LEFT", "RIGHT", "FULL", "CROSS",
39
+ "NATURAL", "OUTER", "ON", "USING", "INTO", "AS",
40
+ "WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET", "FETCH",
41
+ "UNION", "INTERSECT", "EXCEPT",
42
+ "ALL", "DISTINCT", "LATERAL", "ONLY", "TABLE",
43
+ "RETURNING", "RECURSIVE", "COLUMNS",
44
+ "NOT", "MATERIALIZED",
45
+ "MATCHED", "WHEN", "THEN", "BY", "CONFLICT",
46
+ "AND", "OR", "IS", "IN", "BETWEEN",
47
+ "LIKE", "ILIKE", "SIMILAR",
48
+ "CASE", "CAST", "END", "ELSE",
49
+ "EXISTS", "ANY", "SOME", "ARRAY", "ROW",
50
+ "WINDOW", "OVER", "PARTITION", "WITHIN", "FILTER",
51
+ "NOTHING",
52
+ "FOR", "IF", "ELSE", "TRUE", "FALSE", "NULL",
53
+ "ASC", "DESC", "NULLS", "FIRST", "LAST",
54
+ # Function keywords
55
+ "COALESCE", "NULLIF", "GREATEST", "LEAST", "EXTRACT",
56
+ "TRIM", "SUBSTRING", "OVERLAY", "POSITION", "NORMALIZE", "GROUPING",
57
+ "XMLELEMENT", "XMLFOREST", "XMLPARSE", "XMLROOT", "XMLSERIALIZE",
58
+ # Type keywords (common)
59
+ "INT", "INTEGER", "BIGINT", "SMALLINT", "REAL", "FLOAT",
60
+ "DOUBLE", "PRECISION", "NUMERIC", "DECIMAL",
61
+ "CHAR", "CHARACTER", "VARCHAR", "TEXT",
62
+ "BOOLEAN", "DATE", "TIME", "TIMESTAMP", "INTERVAL",
63
+ "BOTH", "LEADING", "TRAILING"
64
+ ].freeze
65
+ end
66
+ end
@@ -0,0 +1,272 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "strscan"
4
+
5
+ module Pgsqlarbiter
6
+ # SQL lexer that converts a query string into an array of {Token} objects.
7
+ #
8
+ # Handles all PostgreSQL token types including keywords, identifiers (plain and
9
+ # double-quoted), strings (single-quoted, dollar-quoted, and prefixed), numbers,
10
+ # parameters, operators, and punctuation.
11
+ class Lexer
12
+ include TokenType
13
+
14
+ # Tokenize a SQL query string.
15
+ #
16
+ # @param sql [String] the SQL string to tokenize
17
+ # @return [Array<Token>] list of tokens ending with an EOF token
18
+ # @raise [LexError] on invalid syntax such as unexpected characters or unterminated
19
+ # strings/comments
20
+ def tokenize(sql)
21
+ @scanner = StringScanner.new(sql)
22
+ @tokens = []
23
+
24
+ until @scanner.eos?
25
+ scan_token
26
+ end
27
+
28
+ @tokens << Token.new(type: EOF, value: nil, position: @scanner.pos)
29
+ @tokens
30
+ end
31
+
32
+ private
33
+
34
+ def scan_token
35
+ pos = @scanner.pos
36
+
37
+ # 1. Whitespace — skip
38
+ return if @scanner.skip(/\s+/)
39
+
40
+ # 2. Line comment — skip
41
+ return if @scanner.skip(/--[^\n]*/)
42
+
43
+ # 3. Block comment — skip (handle nesting)
44
+ if @scanner.scan(/\/\*/)
45
+ scan_block_comment(pos)
46
+ return
47
+ end
48
+
49
+ # 4. Dollar-quoted string
50
+ if @scanner.check(/\$([\p{L}_][\p{L}\d_]*)?\$/)
51
+ scan_dollar_string(pos)
52
+ return
53
+ end
54
+
55
+ # 5. Parameter placeholder ($1, $2, ...)
56
+ if (m = @scanner.scan(/\$\d+/))
57
+ @tokens << Token.new(type: PARAM, value: m, position: pos)
58
+ return
59
+ end
60
+
61
+ # 6. Single-quoted strings (including E'', B'', X'', N'' prefixes)
62
+ # Prefix detection is handled in the identifier branch (step 12)
63
+ if @scanner.check(/'/)
64
+ scan_single_quoted_string(pos, prefix: nil)
65
+ return
66
+ end
67
+
68
+ # 7. Double-quoted identifier
69
+ if @scanner.check(/"/)
70
+ scan_quoted_identifier(pos, unicode: false)
71
+ return
72
+ end
73
+
74
+ # 8. Numbers
75
+ if (m = @scanner.scan(/0[xX][0-9a-fA-F_]+/))
76
+ @tokens << Token.new(type: NUMBER, value: m, position: pos)
77
+ return
78
+ end
79
+ if (m = @scanner.scan(/0[oO][0-7_]+/))
80
+ @tokens << Token.new(type: NUMBER, value: m, position: pos)
81
+ return
82
+ end
83
+ if (m = @scanner.scan(/0[bB][01_]+/))
84
+ @tokens << Token.new(type: NUMBER, value: m, position: pos)
85
+ return
86
+ end
87
+ if (m = @scanner.scan(/\d[\d_]*\.[\d_]+(?:[eE][+-]?\d[\d_]*)?/))
88
+ @tokens << Token.new(type: NUMBER, value: m, position: pos)
89
+ return
90
+ end
91
+ if (m = @scanner.scan(/\d[\d_]*[eE][+-]?\d[\d_]*/))
92
+ @tokens << Token.new(type: NUMBER, value: m, position: pos)
93
+ return
94
+ end
95
+ if (m = @scanner.scan(/\d[\d_]*/))
96
+ # Check this isn't followed by dot+digits (which would be a decimal)
97
+ if @scanner.check(/\.[\d_]/)
98
+ m += @scanner.scan(/\.[\d_]+(?:[eE][+-]?\d[\d_]*)?/)
99
+ end
100
+ @tokens << Token.new(type: NUMBER, value: m, position: pos)
101
+ return
102
+ end
103
+ if (m = @scanner.scan(/\.[\d_]+(?:[eE][+-]?\d[\d_]*)?/))
104
+ @tokens << Token.new(type: NUMBER, value: m, position: pos)
105
+ return
106
+ end
107
+
108
+ # 9. Typecast ::
109
+ if @scanner.scan(/::/)
110
+ @tokens << Token.new(type: TYPECAST, value: "::", position: pos)
111
+ return
112
+ end
113
+
114
+ # 10. Single-char punctuation
115
+ ch = @scanner.peek(1)
116
+ case ch
117
+ when "("
118
+ @scanner.getch
119
+ @tokens << Token.new(type: LPAREN, value: "(", position: pos)
120
+ return
121
+ when ")"
122
+ @scanner.getch
123
+ @tokens << Token.new(type: RPAREN, value: ")", position: pos)
124
+ return
125
+ when "["
126
+ @scanner.getch
127
+ @tokens << Token.new(type: LBRACKET, value: "[", position: pos)
128
+ return
129
+ when "]"
130
+ @scanner.getch
131
+ @tokens << Token.new(type: RBRACKET, value: "]", position: pos)
132
+ return
133
+ when ","
134
+ @scanner.getch
135
+ @tokens << Token.new(type: COMMA, value: ",", position: pos)
136
+ return
137
+ when ";"
138
+ @scanner.getch
139
+ @tokens << Token.new(type: SEMICOLON, value: ";", position: pos)
140
+ return
141
+ when "*"
142
+ @scanner.getch
143
+ @tokens << Token.new(type: STAR, value: "*", position: pos)
144
+ return
145
+ when "."
146
+ @scanner.getch
147
+ @tokens << Token.new(type: DOT, value: ".", position: pos)
148
+ return
149
+ end
150
+
151
+ # 11. Multi-char operators
152
+ if (m = @scanner.scan(%r{[+\-/<>=~!@#%^&|`?]+}))
153
+ @tokens << Token.new(type: OP, value: m, position: pos)
154
+ return
155
+ end
156
+
157
+ # 12. Unquoted identifier / keyword (with string prefix detection)
158
+ if (m = @scanner.scan(/[\p{L}_][\p{L}\d_]*/))
159
+ lower = m.downcase
160
+
161
+ # Check for U& prefix for unicode strings/identifiers
162
+ if lower == "u" && @scanner.check(/&['"]/i)
163
+ @scanner.scan(/&/)
164
+ if @scanner.check(/'/)
165
+ scan_single_quoted_string(pos, prefix: "U&")
166
+ else
167
+ scan_quoted_identifier(pos, unicode: true)
168
+ end
169
+ return
170
+ end
171
+
172
+ # Check for string prefixes: E, B, X, N immediately followed by '
173
+ if %w[e b x n].include?(lower) && @scanner.check(/'/)
174
+ scan_single_quoted_string(pos, prefix: lower)
175
+ return
176
+ end
177
+
178
+ upper = m.upcase
179
+ if Keywords::ALL.include?(upper)
180
+ @tokens << Token.new(type: KEYWORD, value: upper, position: pos)
181
+ else
182
+ @tokens << Token.new(type: IDENT, value: lower, position: pos)
183
+ end
184
+ return
185
+ end
186
+
187
+ # 13. Single colon (not part of ::)
188
+ if @scanner.scan(/:/)
189
+ @tokens << Token.new(type: OP, value: ":", position: pos)
190
+ return
191
+ end
192
+
193
+ raise LexError, "unexpected character #{@scanner.peek(1).inspect} at position #{pos}"
194
+ end
195
+
196
+ def scan_block_comment(start_pos)
197
+ depth = 1
198
+ until @scanner.eos?
199
+ if @scanner.scan(/\/\*/)
200
+ depth += 1
201
+ elsif @scanner.scan(/\*\//)
202
+ depth -= 1
203
+ return if depth == 0
204
+ else
205
+ @scanner.getch
206
+ end
207
+ end
208
+ raise LexError, "unterminated block comment starting at position #{start_pos}"
209
+ end
210
+
211
+ def scan_dollar_string(start_pos)
212
+ @scanner.scan(/\$([\p{L}_][\p{L}\d_]*)?\$/)
213
+ tag = @scanner.matched
214
+ content = +""
215
+ until @scanner.eos?
216
+ idx = @scanner.rest.index(tag)
217
+ if idx
218
+ content << @scanner.rest[0, idx]
219
+ @scanner.pos += idx + tag.length
220
+ @tokens << Token.new(type: STRING, value: content, position: start_pos)
221
+ return
222
+ else
223
+ content << @scanner.rest
224
+ @scanner.terminate
225
+ end
226
+ end
227
+ raise LexError, "unterminated dollar-quoted string starting at position #{start_pos}"
228
+ end
229
+
230
+ def scan_single_quoted_string(start_pos, prefix:)
231
+ @scanner.scan(/'/)
232
+ escape_mode = (prefix == "e") # E-strings support backslash escapes
233
+ content = +""
234
+
235
+ until @scanner.eos?
236
+ if escape_mode && @scanner.scan(/\\/)
237
+ if @scanner.eos?
238
+ raise LexError, "unterminated string starting at position #{start_pos}"
239
+ end
240
+ content << "\\" << @scanner.getch
241
+ elsif @scanner.scan(/''/)
242
+ content << "''"
243
+ elsif @scanner.scan(/'/)
244
+ @tokens << Token.new(type: STRING, value: content, position: start_pos)
245
+ return
246
+ else
247
+ content << @scanner.getch
248
+ end
249
+ end
250
+
251
+ raise LexError, "unterminated string starting at position #{start_pos}"
252
+ end
253
+
254
+ def scan_quoted_identifier(start_pos, unicode:)
255
+ @scanner.scan(/"/)
256
+ content = +""
257
+
258
+ until @scanner.eos?
259
+ if @scanner.scan(/""/)
260
+ content << '"'
261
+ elsif @scanner.scan(/"/)
262
+ @tokens << Token.new(type: QUOTED_IDENT, value: content, position: start_pos)
263
+ return
264
+ else
265
+ content << @scanner.getch
266
+ end
267
+ end
268
+
269
+ raise LexError, "unterminated quoted identifier starting at position #{start_pos}"
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgsqlarbiter
4
+ # Immutable token produced by the {Lexer}.
5
+ #
6
+ # @!attribute [r] type
7
+ # @return [Symbol] token type (one of the {TokenType} constants)
8
+ # @!attribute [r] value
9
+ # @return [String, nil] the token text (+nil+ for EOF)
10
+ # @!attribute [r] position
11
+ # @return [Integer] character offset in the original SQL string
12
+ Token = Data.define(:type, :value, :position)
13
+
14
+ # Constants for all token types produced by the {Lexer}.
15
+ module TokenType
16
+ KEYWORD = :keyword
17
+ IDENT = :ident
18
+ QUOTED_IDENT = :quoted_ident
19
+ STRING = :string
20
+ NUMBER = :number
21
+ PARAM = :param
22
+ LPAREN = :lparen
23
+ RPAREN = :rparen
24
+ LBRACKET = :lbracket
25
+ RBRACKET = :rbracket
26
+ COMMA = :comma
27
+ DOT = :dot
28
+ SEMICOLON = :semicolon
29
+ STAR = :star
30
+ TYPECAST = :typecast
31
+ OP = :op
32
+ EOF = :eof
33
+ end
34
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgsqlarbiter
4
+ # Immutable result of judging a SQL query against an {Arbiter}'s rules.
5
+ #
6
+ # A +Verdict+ tells you whether a query is allowed and, if not, exactly which
7
+ # checks failed. Use {#allowed?} for a quick boolean, {#reasons} for
8
+ # human-readable denial strings, or the individual fields for programmatic
9
+ # branching.
10
+ #
11
+ # @!attribute [r] allowed
12
+ # @return [Boolean] +true+ when the query passes all checks
13
+ # @!attribute [r] statement_type_allowed
14
+ # @return [Boolean] +true+ when the statement type is in the whitelist
15
+ # @!attribute [r] statement_type
16
+ # @return [Symbol] the query's actual statement type
17
+ # @!attribute [r] disallowed_tables
18
+ # @return [Array<String>] tables referenced by the query that are not whitelisted
19
+ # @!attribute [r] disallowed_functions
20
+ # @return [Array<String>] functions called by the query that are not whitelisted
21
+ Verdict = Data.define(:allowed, :statement_type_allowed, :statement_type,
22
+ :disallowed_tables, :disallowed_functions) do
23
+ alias_method :allowed?, :allowed
24
+ alias_method :statement_type_allowed?, :statement_type_allowed
25
+
26
+ # Human-readable denial reasons. Empty when the query is allowed.
27
+ #
28
+ # @return [Array<String>] frozen list of reason strings
29
+ def reasons
30
+ r = []
31
+ r << "statement type :#{statement_type} is not allowed" unless statement_type_allowed
32
+ disallowed_tables.each { |t| r << "table #{t.inspect} is not allowed" }
33
+ disallowed_functions.each { |f| r << "function #{f.inspect} is not allowed" }
34
+ r.freeze
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgsqlarbiter
4
+ VERSION = "0.2.0"
5
+ end