guardinari 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +114 -0
- data/Rakefile +42 -0
- data/ext/guardinari/Makefile +277 -0
- data/ext/guardinari/depend +4 -0
- data/ext/guardinari/extconf.rb +16 -0
- data/ext/guardinari/guardinari.c +2318 -0
- data/ext/guardinari/guardinari.l +245 -0
- data/ext/guardinari/scanner.h +494 -0
- data/lib/guardinari/version.rb +5 -0
- data/lib/guardinari.rb +9 -0
- data/sig/pagination_checker.rbs +4 -0
- metadata +61 -0
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
%option noyywrap caseless yylineno nodefault reentrant header-file="scanner.h"
|
|
2
|
+
|
|
3
|
+
%{
|
|
4
|
+
|
|
5
|
+
#include <stdbool.h>
|
|
6
|
+
|
|
7
|
+
#include "ruby.h"
|
|
8
|
+
|
|
9
|
+
enum tokens {
|
|
10
|
+
// Keywords (we care about)
|
|
11
|
+
T_SELECT = 260,
|
|
12
|
+
T_ORDER = 262,
|
|
13
|
+
T_BY = 263,
|
|
14
|
+
T_LIMIT = 264,
|
|
15
|
+
T_OFFSET = 265,
|
|
16
|
+
|
|
17
|
+
// Data types
|
|
18
|
+
T_STR = 266,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
#define TOK(x) DEBUG(#x"(%s)\n", yytext); yyextra->last_token = T_##x; return T_##x;
|
|
22
|
+
|
|
23
|
+
#define MAX_QUERY_NESTING 5
|
|
24
|
+
|
|
25
|
+
typedef struct {
|
|
26
|
+
bool select;
|
|
27
|
+
bool order_by;
|
|
28
|
+
bool have_limit;
|
|
29
|
+
int limit;
|
|
30
|
+
bool have_offset;
|
|
31
|
+
int offset;
|
|
32
|
+
} query_state;
|
|
33
|
+
|
|
34
|
+
typedef struct {
|
|
35
|
+
query_state stack[MAX_QUERY_NESTING];
|
|
36
|
+
VALUE bind_params;
|
|
37
|
+
int stack_index;
|
|
38
|
+
int last_token;
|
|
39
|
+
bool broken_pagination;
|
|
40
|
+
int error;
|
|
41
|
+
} scanner_state;
|
|
42
|
+
|
|
43
|
+
#define YY_EXTRA_TYPE scanner_state*
|
|
44
|
+
|
|
45
|
+
int ERR_NESTING = 1;
|
|
46
|
+
int ERR_PARSE = 2;
|
|
47
|
+
|
|
48
|
+
//#define DEBUG_BUILD 1
|
|
49
|
+
#ifdef DEBUG_BUILD
|
|
50
|
+
#define DEBUG(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } while (0)
|
|
51
|
+
#else
|
|
52
|
+
#define DEBUG(...)
|
|
53
|
+
#endif
|
|
54
|
+
|
|
55
|
+
void guardinari_check_pagination(scanner_state*);
|
|
56
|
+
|
|
57
|
+
/* Ruby stuff */
|
|
58
|
+
|
|
59
|
+
VALUE guardinari_find_broken_pagination(VALUE self, VALUE sql, VALUE bind_params);
|
|
60
|
+
|
|
61
|
+
VALUE rb_mGuardinari;
|
|
62
|
+
VALUE rb_eError;
|
|
63
|
+
VALUE rb_eNestingError;
|
|
64
|
+
VALUE rb_eParseError;
|
|
65
|
+
|
|
66
|
+
RUBY_FUNC_EXPORTED void Init_guardinari(void) {
|
|
67
|
+
rb_mGuardinari = rb_define_module("Guardinari");
|
|
68
|
+
|
|
69
|
+
rb_eError = rb_define_class_under(rb_mGuardinari, "Error", rb_eRuntimeError);
|
|
70
|
+
rb_eNestingError = rb_define_class_under(rb_mGuardinari, "NestingError", rb_eError);
|
|
71
|
+
rb_eParseError = rb_define_class_under(rb_mGuardinari, "ParseError", rb_eError);
|
|
72
|
+
|
|
73
|
+
rb_define_module_function(rb_mGuardinari, "find_broken_pagination", &guardinari_find_broken_pagination, 2);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
%}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
%x COMMENT
|
|
80
|
+
|
|
81
|
+
%%
|
|
82
|
+
|
|
83
|
+
/* The subset of keywords that are relevant */
|
|
84
|
+
select { yyextra->stack[yyextra->stack_index].select = true; TOK(SELECT); }
|
|
85
|
+
order { TOK(ORDER); }
|
|
86
|
+
by {
|
|
87
|
+
if (yyextra->last_token == T_ORDER) {
|
|
88
|
+
yyextra->stack[yyextra->stack_index].order_by = true;
|
|
89
|
+
} else {
|
|
90
|
+
DEBUG("last token: %d", yyextra->last_token);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
TOK(BY);
|
|
94
|
+
}
|
|
95
|
+
limit { TOK(LIMIT); }
|
|
96
|
+
offset { TOK(OFFSET); }
|
|
97
|
+
|
|
98
|
+
[0-9]+ {
|
|
99
|
+
DEBUG("have number: %s", yytext);
|
|
100
|
+
if (yyextra->last_token == T_LIMIT) {
|
|
101
|
+
yyextra->stack[yyextra->stack_index].have_limit = true;
|
|
102
|
+
yyextra->stack[yyextra->stack_index].limit = atoi(yytext);
|
|
103
|
+
} else if (yyextra->last_token == T_OFFSET) {
|
|
104
|
+
yyextra->stack[yyextra->stack_index].have_offset = true;
|
|
105
|
+
yyextra->stack[yyextra->stack_index].offset = atoi(yytext);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
$[0-9]+ {
|
|
110
|
+
DEBUG("have bind param %s", yytext);
|
|
111
|
+
// The only time we care about evaluating a bound parameter is when we're finding the limit or offset
|
|
112
|
+
// When we have a "limit 1 offset 0" query, it's not broken pagination; it's a finder method that is
|
|
113
|
+
// expected to return exactly 1 value
|
|
114
|
+
if (yyextra->last_token == T_LIMIT || yyextra->last_token == T_OFFSET) {
|
|
115
|
+
int param_index = atoi(yytext + 1) - 1;
|
|
116
|
+
DEBUG("Looking up bind param %d", param_index);
|
|
117
|
+
VALUE param = RARRAY_AREF(yyextra->bind_params, param_index);
|
|
118
|
+
DEBUG("Found %d", NUM2INT(param));
|
|
119
|
+
|
|
120
|
+
if (yyextra->last_token == T_LIMIT) {
|
|
121
|
+
yyextra->stack[yyextra->stack_index].limit = NUM2INT(param);
|
|
122
|
+
yyextra->stack[yyextra->stack_index].have_limit = true;
|
|
123
|
+
} else {
|
|
124
|
+
yyextra->stack[yyextra->stack_index].offset = NUM2INT(param);
|
|
125
|
+
yyextra->stack[yyextra->stack_index].have_offset = true;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
\"([^"]|(\\\"))+\" { TOK(STR); }
|
|
131
|
+
\'([^']|(\\\'))+\' { TOK(STR); }
|
|
132
|
+
|
|
133
|
+
/* Comments */
|
|
134
|
+
--.+ { /* skip single line comments */ }
|
|
135
|
+
"/*" { BEGIN COMMENT; }
|
|
136
|
+
<COMMENT>{
|
|
137
|
+
"*/" { BEGIN INITIAL; }
|
|
138
|
+
.|\n { /* skip */ }
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
"(" {
|
|
142
|
+
if (yyextra->stack_index + 1 == MAX_QUERY_NESTING) {
|
|
143
|
+
yyextra->stack_index = MAX_QUERY_NESTING - 1;
|
|
144
|
+
|
|
145
|
+
DEBUG("Maximum nesting reached, halting parsing");
|
|
146
|
+
|
|
147
|
+
yyextra->error = ERR_NESTING;
|
|
148
|
+
|
|
149
|
+
// Halt parsing, clean up
|
|
150
|
+
yyterminate();
|
|
151
|
+
|
|
152
|
+
} else {
|
|
153
|
+
++yyextra->stack_index;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
")" {
|
|
158
|
+
if (yyextra->stack_index == 0) {
|
|
159
|
+
printf("Unmatched ')'\n");
|
|
160
|
+
|
|
161
|
+
yyextra->error = ERR_PARSE;
|
|
162
|
+
|
|
163
|
+
yyterminate();
|
|
164
|
+
} else {
|
|
165
|
+
guardinari_check_pagination(yyextra);
|
|
166
|
+
// Reset this "stack frame" to zero for any subsequent queries
|
|
167
|
+
yyextra->stack[yyextra->stack_index] = (query_state){0};
|
|
168
|
+
--yyextra->stack_index;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
<<EOF>> {
|
|
173
|
+
if (yyextra->stack_index != 0) {
|
|
174
|
+
fprintf(stderr, "Error: Unexpected EOF looking for ')' to terminate sub-query\n");
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
guardinari_check_pagination(yyextra);
|
|
178
|
+
return 0;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
[ \t\n] { /* Whitespace */ }
|
|
182
|
+
|
|
183
|
+
. { DEBUG("Unknown character %c", *yytext); }
|
|
184
|
+
|
|
185
|
+
%%
|
|
186
|
+
|
|
187
|
+
void guardinari_check_pagination(scanner_state *state) {
|
|
188
|
+
// Bail when we already know the query has broken pagination
|
|
189
|
+
if (state->broken_pagination) {
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
query_state q = state->stack[state->stack_index];
|
|
194
|
+
DEBUG("Checking pagination");
|
|
195
|
+
DEBUG("select: %d", q.select);
|
|
196
|
+
DEBUG("have_limit/limit: %d/%d", q.have_limit, q.limit);
|
|
197
|
+
DEBUG("have_offset/offset: %d/%d", q.have_offset, q.offset);
|
|
198
|
+
DEBUG("have_order_by: %d", q.order_by);
|
|
199
|
+
|
|
200
|
+
bool broken = q.select && !q.order_by && (q.have_limit && q.have_offset && (q.limit > 1 || (q.limit == 1 && (q.offset > 0))));
|
|
201
|
+
|
|
202
|
+
DEBUG("Broken: %d", broken);
|
|
203
|
+
state->broken_pagination = broken;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
VALUE guardinari_find_broken_pagination(VALUE self, VALUE sql, VALUE bind_params) {
|
|
207
|
+
Check_Type(sql, T_STRING);
|
|
208
|
+
Check_Type(bind_params, T_ARRAY);
|
|
209
|
+
|
|
210
|
+
yyscan_t scanner;
|
|
211
|
+
yylex_init(&scanner);
|
|
212
|
+
|
|
213
|
+
scanner_state state = {};
|
|
214
|
+
|
|
215
|
+
// Note that we don't hold onto this value beyond the end of the function, so we don't
|
|
216
|
+
// need to increment its reference count.
|
|
217
|
+
state.bind_params = bind_params;
|
|
218
|
+
|
|
219
|
+
if (yylex_init_extra(&state, &scanner)) {
|
|
220
|
+
DEBUG("Unable to pass state to scanner");
|
|
221
|
+
abort();
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const char* sql_str = StringValueCStr(sql);
|
|
225
|
+
DEBUG("Checking SQL %s", sql_str);
|
|
226
|
+
YY_BUFFER_STATE buf = yy_scan_string(sql_str, scanner); // or
|
|
227
|
+
|
|
228
|
+
int tok;
|
|
229
|
+
while ((tok = yylex(scanner))) {
|
|
230
|
+
if (state.broken_pagination) {
|
|
231
|
+
break;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
yy_delete_buffer(buf, scanner);
|
|
236
|
+
yylex_destroy(scanner);
|
|
237
|
+
|
|
238
|
+
if (state.error) {
|
|
239
|
+
rb_raise(rb_eNestingError, "sub-queries are nested too deeply");
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return state.broken_pagination ? Qtrue : Qfalse;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
|