gmail_search_syntax 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +338 -0
- data/README.md +129 -0
- data/Rakefile +11 -0
- data/SCHEMA.md +223 -0
- data/examples/alias_collision_fix.rb +43 -0
- data/examples/demo.rb +28 -0
- data/examples/gmail_message_id_demo.rb +118 -0
- data/examples/postgres_vs_sqlite.rb +55 -0
- data/examples/sql_query.rb +47 -0
- data/lib/GMAIL_SEARCH_OPERATORS.md +58 -0
- data/lib/gmail_search_syntax/ast.rb +100 -0
- data/lib/gmail_search_syntax/parser.rb +224 -0
- data/lib/gmail_search_syntax/sql_visitor.rb +496 -0
- data/lib/gmail_search_syntax/tokenizer.rb +152 -0
- data/lib/gmail_search_syntax/version.rb +3 -0
- data/lib/gmail_search_syntax.rb +34 -0
- data/test/gmail_search_syntax_test.rb +691 -0
- data/test/integration_test.rb +668 -0
- data/test/postgres_visitor_test.rb +156 -0
- data/test/sql_visitor_test.rb +346 -0
- data/test/test_helper.rb +27 -0
- data/test/tokenizer_test.rb +185 -0
- metadata +115 -0
@@ -0,0 +1,496 @@
|
|
1
|
+
module GmailSearchSyntax
|
2
|
+
class Query
|
3
|
+
attr_reader :conditions, :joins, :params, :alias_counter
|
4
|
+
|
5
|
+
def initialize(alias_counter:)
|
6
|
+
@conditions = []
|
7
|
+
@joins = {}
|
8
|
+
@params = []
|
9
|
+
@table_aliases = {}
|
10
|
+
@alias_counter = alias_counter
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_condition(sql_fragment)
|
14
|
+
@conditions << sql_fragment
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_param(value)
|
18
|
+
@params << value
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_join(table_name, join_sql)
|
22
|
+
@joins["#{table_name}_#{@joins.size}"] = join_sql
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_table_alias(table_name, base_alias = nil)
|
26
|
+
counter_value = @alias_counter.next
|
27
|
+
base_alias || "#{table_name.split("_").map { |w| w[0] }.join}#{counter_value}"
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_sql
|
31
|
+
where_clause = @conditions.empty? ? "1 = 1" : @conditions.join(" ")
|
32
|
+
base_query = "SELECT DISTINCT m0.id FROM messages AS m0"
|
33
|
+
join_clause = @joins.values.join(" ")
|
34
|
+
full_query = [base_query, join_clause, "WHERE", where_clause].reject(&:empty?).join(" ")
|
35
|
+
[full_query, @params]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class SQLiteVisitor
|
40
|
+
def initialize(current_user_email: nil, alias_counter: (1..).each)
|
41
|
+
@current_user_email = current_user_email
|
42
|
+
@query = Query.new(alias_counter:)
|
43
|
+
end
|
44
|
+
|
45
|
+
def visit(node)
|
46
|
+
case node
|
47
|
+
when AST::Operator
|
48
|
+
visit_operator(node)
|
49
|
+
when AST::Text
|
50
|
+
visit_text(node)
|
51
|
+
when AST::And
|
52
|
+
visit_and(node)
|
53
|
+
when AST::Or
|
54
|
+
visit_or(node)
|
55
|
+
when AST::Not
|
56
|
+
visit_not(node)
|
57
|
+
when AST::Group
|
58
|
+
visit_group(node)
|
59
|
+
when AST::Around
|
60
|
+
visit_around(node)
|
61
|
+
else
|
62
|
+
raise "Unknown node type: #{node.class}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def to_query
|
67
|
+
@query
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def visit_operator(node)
|
73
|
+
case node.name
|
74
|
+
when "from", "to", "cc", "bcc", "deliveredto"
|
75
|
+
visit_address_operator(node)
|
76
|
+
when "subject"
|
77
|
+
visit_subject_operator(node)
|
78
|
+
when "after", "before", "older", "newer"
|
79
|
+
visit_date_operator(node)
|
80
|
+
when "older_than", "newer_than"
|
81
|
+
visit_relative_date_operator(node)
|
82
|
+
when "label"
|
83
|
+
visit_label_operator(node)
|
84
|
+
when "category"
|
85
|
+
visit_category_operator(node)
|
86
|
+
when "has"
|
87
|
+
visit_has_operator(node)
|
88
|
+
when "list"
|
89
|
+
visit_list_operator(node)
|
90
|
+
when "filename"
|
91
|
+
visit_filename_operator(node)
|
92
|
+
when "in"
|
93
|
+
visit_in_operator(node)
|
94
|
+
when "is"
|
95
|
+
visit_is_operator(node)
|
96
|
+
when "size", "larger", "smaller"
|
97
|
+
visit_size_operator(node)
|
98
|
+
when "rfc822msgid"
|
99
|
+
visit_rfc822msgid_operator(node)
|
100
|
+
else
|
101
|
+
raise "Unknown operator: #{node.name}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def visit_address_operator(node)
|
106
|
+
address_types = case node.name
|
107
|
+
when "from"
|
108
|
+
["from", "cc", "bcc"]
|
109
|
+
when "to"
|
110
|
+
["to", "cc", "bcc"]
|
111
|
+
when "cc"
|
112
|
+
["cc"]
|
113
|
+
when "bcc"
|
114
|
+
["bcc"]
|
115
|
+
when "deliveredto"
|
116
|
+
["delivered_to"]
|
117
|
+
end
|
118
|
+
|
119
|
+
if node.value.is_a?(AST::Or) || node.value.is_a?(AST::And) || node.value.is_a?(AST::Group)
|
120
|
+
sub_visitor = self.class.new(current_user_email: @current_user_email, alias_counter: @query.alias_counter)
|
121
|
+
sub_visitor.visit(node.value)
|
122
|
+
sub_query = sub_visitor.to_query
|
123
|
+
|
124
|
+
alias_name = @query.get_table_alias("message_addresses", "ma#{@query.get_table_alias("message_addresses").gsub(/\D/, "")}")
|
125
|
+
@query.add_join(alias_name, "INNER JOIN message_addresses AS #{alias_name} ON m0.id = #{alias_name}.message_id")
|
126
|
+
|
127
|
+
address_type_conditions = address_types.map { |type| "#{alias_name}.address_type = ?" }
|
128
|
+
address_types.each { |type| @query.add_param(type) }
|
129
|
+
|
130
|
+
email_conditions = sub_query.conditions.map { |cond| cond.gsub(/\bma\d+\.email_address\b/, "#{alias_name}.email_address") }
|
131
|
+
sub_query.params.each { |param| @query.add_param(param) }
|
132
|
+
|
133
|
+
@query.add_condition("((#{address_type_conditions.join(" OR ")}) AND (#{email_conditions.join(" ")}))")
|
134
|
+
else
|
135
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
136
|
+
|
137
|
+
value = @current_user_email if value == "me" && @current_user_email
|
138
|
+
|
139
|
+
alias_name = @query.get_table_alias("message_addresses", "ma#{@query.get_table_alias("message_addresses").gsub(/\D/, "")}")
|
140
|
+
@query.add_join(alias_name, "INNER JOIN message_addresses AS #{alias_name} ON m0.id = #{alias_name}.message_id")
|
141
|
+
|
142
|
+
address_type_conditions = address_types.map { |type| "#{alias_name}.address_type = ?" }
|
143
|
+
address_types.each { |type| @query.add_param(type) }
|
144
|
+
|
145
|
+
email_condition = build_string_match_condition("#{alias_name}.email_address", value)
|
146
|
+
|
147
|
+
@query.add_condition("((#{address_type_conditions.join(" OR ")}) AND #{email_condition})")
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def visit_subject_operator(node)
|
152
|
+
if node.value.is_a?(AST::Or) || node.value.is_a?(AST::And) || node.value.is_a?(AST::Group)
|
153
|
+
sub_visitor = self.class.new(current_user_email: @current_user_email, alias_counter: @query.alias_counter)
|
154
|
+
sub_visitor.visit(node.value)
|
155
|
+
sub_query = sub_visitor.to_query
|
156
|
+
|
157
|
+
subject_conditions = sub_query.conditions.map { |cond|
|
158
|
+
cond.gsub("messages_fts MATCH ?", "m0.subject LIKE ?")
|
159
|
+
.gsub("(1 = 1)", "m0.subject LIKE ?")
|
160
|
+
}
|
161
|
+
sub_query.params.each { |param| @query.add_param("%#{param}%") }
|
162
|
+
|
163
|
+
@query.add_condition("(#{subject_conditions.join(" ")})")
|
164
|
+
else
|
165
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
166
|
+
@query.add_param("%#{value}%")
|
167
|
+
@query.add_condition("m0.subject LIKE ?")
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def visit_date_operator(node)
|
172
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
173
|
+
date = parse_date(value)
|
174
|
+
@query.add_param(date)
|
175
|
+
|
176
|
+
case node.name
|
177
|
+
when "after", "newer"
|
178
|
+
@query.add_condition("m0.internal_date > ?")
|
179
|
+
when "before", "older"
|
180
|
+
@query.add_condition("m0.internal_date < ?")
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def visit_relative_date_operator(node)
|
185
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
186
|
+
modifier = parse_relative_time(value)
|
187
|
+
@query.add_param(modifier)
|
188
|
+
|
189
|
+
case node.name
|
190
|
+
when "older_than"
|
191
|
+
@query.add_condition("m0.internal_date < datetime('now', ?)")
|
192
|
+
when "newer_than"
|
193
|
+
@query.add_condition("m0.internal_date > datetime('now', ?)")
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def visit_label_operator(node)
|
198
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
199
|
+
|
200
|
+
alias_name = @query.get_table_alias("message_labels", "ml")
|
201
|
+
label_alias = @query.get_table_alias("labels", "l")
|
202
|
+
|
203
|
+
@query.add_join("#{alias_name}_#{label_alias}",
|
204
|
+
"INNER JOIN message_labels AS #{alias_name} ON m0.id = #{alias_name}.message_id " \
|
205
|
+
"INNER JOIN labels AS #{label_alias} ON #{alias_name}.label_id = #{label_alias}.id")
|
206
|
+
|
207
|
+
@query.add_param(value)
|
208
|
+
@query.add_condition("#{label_alias}.name = ?")
|
209
|
+
end
|
210
|
+
|
211
|
+
def visit_category_operator(node)
|
212
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
213
|
+
@query.add_param(value)
|
214
|
+
@query.add_condition("m0.category = ?")
|
215
|
+
end
|
216
|
+
|
217
|
+
def visit_has_operator(node)
|
218
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
219
|
+
|
220
|
+
case value
|
221
|
+
when "attachment", "youtube", "drive", "document", "spreadsheet", "presentation"
|
222
|
+
@query.add_condition("m0.has_#{value} = 1")
|
223
|
+
when "yellow-star", "orange-star", "red-star", "purple-star", "blue-star", "green-star",
|
224
|
+
"red-bang", "orange-guillemet", "yellow-bang", "green-check", "blue-info", "purple-question"
|
225
|
+
column_name = value.tr("-", "_")
|
226
|
+
@query.add_condition("m0.has_#{column_name} = 1")
|
227
|
+
when "userlabels"
|
228
|
+
alias_name = @query.get_table_alias("message_labels", "ml")
|
229
|
+
label_alias = @query.get_table_alias("labels", "l")
|
230
|
+
|
231
|
+
@query.add_join("#{alias_name}_#{label_alias}_userlabels",
|
232
|
+
"INNER JOIN message_labels AS #{alias_name} ON m0.id = #{alias_name}.message_id " \
|
233
|
+
"INNER JOIN labels AS #{label_alias} ON #{alias_name}.label_id = #{label_alias}.id")
|
234
|
+
|
235
|
+
@query.add_condition("#{label_alias}.is_system_label = 0")
|
236
|
+
when "nouserlabels"
|
237
|
+
@query.add_condition("NOT EXISTS (SELECT 1 FROM message_labels AS ml " \
|
238
|
+
"INNER JOIN labels AS l ON ml.label_id = l.id " \
|
239
|
+
"WHERE ml.message_id = m0.id AND l.is_system_label = 0)")
|
240
|
+
else
|
241
|
+
raise "Unknown has: value: #{value}"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def visit_list_operator(node)
|
246
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
247
|
+
condition = build_string_match_condition("m0.mailing_list", value)
|
248
|
+
@query.add_condition(condition)
|
249
|
+
end
|
250
|
+
|
251
|
+
def visit_filename_operator(node)
|
252
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
253
|
+
|
254
|
+
alias_name = @query.get_table_alias("attachments", "a")
|
255
|
+
@query.add_join(alias_name, "INNER JOIN attachments AS #{alias_name} ON m0.id = #{alias_name}.message_id")
|
256
|
+
|
257
|
+
if value.include?(".")
|
258
|
+
@query.add_param(value)
|
259
|
+
@query.add_condition("#{alias_name}.filename = ?")
|
260
|
+
else
|
261
|
+
@query.add_param("%.#{value}")
|
262
|
+
@query.add_param("#{value}%")
|
263
|
+
@query.add_condition("(#{alias_name}.filename LIKE ? OR #{alias_name}.filename LIKE ?)")
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
def visit_in_operator(node)
|
268
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
269
|
+
|
270
|
+
case value
|
271
|
+
when "anywhere"
|
272
|
+
nil
|
273
|
+
when "inbox"
|
274
|
+
@query.add_condition("m0.in_inbox = 1")
|
275
|
+
when "archive"
|
276
|
+
@query.add_condition("m0.in_archive = 1")
|
277
|
+
when "snoozed"
|
278
|
+
@query.add_condition("m0.in_snoozed = 1")
|
279
|
+
when "spam"
|
280
|
+
@query.add_condition("m0.in_spam = 1")
|
281
|
+
when "trash"
|
282
|
+
@query.add_condition("m0.in_trash = 1")
|
283
|
+
else
|
284
|
+
raise "Unknown in: value: #{value}"
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def visit_is_operator(node)
|
289
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
290
|
+
|
291
|
+
case value
|
292
|
+
when "important"
|
293
|
+
@query.add_condition("m0.is_important = 1")
|
294
|
+
when "starred"
|
295
|
+
@query.add_condition("m0.is_starred = 1")
|
296
|
+
when "unread"
|
297
|
+
@query.add_condition("m0.is_unread = 1")
|
298
|
+
when "read"
|
299
|
+
@query.add_condition("m0.is_read = 1")
|
300
|
+
when "muted"
|
301
|
+
@query.add_condition("m0.is_muted = 1")
|
302
|
+
else
|
303
|
+
raise "Unknown is: value: #{value}"
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
def visit_size_operator(node)
|
308
|
+
value = node.value
|
309
|
+
size_bytes = parse_size(value)
|
310
|
+
@query.add_param(size_bytes)
|
311
|
+
|
312
|
+
case node.name
|
313
|
+
when "size"
|
314
|
+
@query.add_condition("m0.size_bytes = ?")
|
315
|
+
when "larger"
|
316
|
+
@query.add_condition("m0.size_bytes > ?")
|
317
|
+
when "smaller"
|
318
|
+
@query.add_condition("m0.size_bytes < ?")
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def visit_rfc822msgid_operator(node)
|
323
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
324
|
+
@query.add_param(value)
|
325
|
+
@query.add_condition("m0.rfc822_message_id = ?")
|
326
|
+
end
|
327
|
+
|
328
|
+
def visit_text(node)
|
329
|
+
@query.add_param(node.value)
|
330
|
+
@query.add_condition("(m0.subject LIKE ? OR m0.body LIKE ?)")
|
331
|
+
@query.add_param("%#{node.value}%")
|
332
|
+
@query.add_param("%#{node.value}%")
|
333
|
+
end
|
334
|
+
|
335
|
+
def visit_and(node)
|
336
|
+
conditions = []
|
337
|
+
node.operands.each do |operand|
|
338
|
+
sub_visitor = self.class.new(current_user_email: @current_user_email, alias_counter: @query.alias_counter)
|
339
|
+
sub_visitor.visit(operand)
|
340
|
+
sub_query = sub_visitor.to_query
|
341
|
+
|
342
|
+
sub_query.joins.each { |key, join_sql| @query.add_join(key, join_sql) }
|
343
|
+
sub_query.params.each { |param| @query.add_param(param) }
|
344
|
+
|
345
|
+
conditions << if sub_query.conditions.length > 1
|
346
|
+
"(#{sub_query.conditions.join(" ")})"
|
347
|
+
else
|
348
|
+
sub_query.conditions.first
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
@query.add_condition("(#{conditions.join(" AND ")})")
|
353
|
+
end
|
354
|
+
|
355
|
+
def visit_or(node)
|
356
|
+
conditions = []
|
357
|
+
node.operands.each do |operand|
|
358
|
+
sub_visitor = self.class.new(current_user_email: @current_user_email, alias_counter: @query.alias_counter)
|
359
|
+
sub_visitor.visit(operand)
|
360
|
+
sub_query = sub_visitor.to_query
|
361
|
+
|
362
|
+
sub_query.joins.each { |key, join_sql| @query.add_join(key, join_sql) }
|
363
|
+
sub_query.params.each { |param| @query.add_param(param) }
|
364
|
+
|
365
|
+
conditions << if sub_query.conditions.length > 1
|
366
|
+
"(#{sub_query.conditions.join(" ")})"
|
367
|
+
else
|
368
|
+
sub_query.conditions.first
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
@query.add_condition("(#{conditions.join(" OR ")})")
|
373
|
+
end
|
374
|
+
|
375
|
+
def visit_not(node)
|
376
|
+
sub_visitor = self.class.new(current_user_email: @current_user_email, alias_counter: @query.alias_counter)
|
377
|
+
sub_visitor.visit(node.child)
|
378
|
+
sub_query = sub_visitor.to_query
|
379
|
+
|
380
|
+
sub_query.joins.each { |key, join_sql| @query.add_join(key, join_sql) }
|
381
|
+
sub_query.params.each { |param| @query.add_param(param) }
|
382
|
+
|
383
|
+
combined_condition = (sub_query.conditions.length > 1) ?
|
384
|
+
"(#{sub_query.conditions.join(" ")})" :
|
385
|
+
sub_query.conditions.first
|
386
|
+
|
387
|
+
@query.add_condition("NOT #{combined_condition}")
|
388
|
+
end
|
389
|
+
|
390
|
+
def visit_group(node)
|
391
|
+
if node.children.length == 1
|
392
|
+
visit(node.children.first)
|
393
|
+
else
|
394
|
+
conditions = []
|
395
|
+
node.children.each do |child|
|
396
|
+
sub_visitor = self.class.new(current_user_email: @current_user_email, alias_counter: @query.alias_counter)
|
397
|
+
sub_visitor.visit(child)
|
398
|
+
sub_query = sub_visitor.to_query
|
399
|
+
|
400
|
+
sub_query.joins.each { |key, join_sql| @query.add_join(key, join_sql) }
|
401
|
+
sub_query.params.each { |param| @query.add_param(param) }
|
402
|
+
|
403
|
+
conditions << sub_query.conditions.join(" ")
|
404
|
+
end
|
405
|
+
|
406
|
+
@query.add_condition("(#{conditions.join(" AND ")})")
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
def visit_around(node)
|
411
|
+
@query.add_condition("(1 = 0)")
|
412
|
+
end
|
413
|
+
|
414
|
+
def build_string_match_condition(column_name, value)
|
415
|
+
if value.start_with?("@")
|
416
|
+
@query.add_param("%#{value}")
|
417
|
+
"#{column_name} LIKE ?"
|
418
|
+
elsif value.end_with?("@")
|
419
|
+
@query.add_param("#{value}%")
|
420
|
+
"#{column_name} LIKE ?"
|
421
|
+
else
|
422
|
+
@query.add_param(value)
|
423
|
+
"#{column_name} = ?"
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
def parse_date(value)
|
428
|
+
value.tr("/", "-")
|
429
|
+
end
|
430
|
+
|
431
|
+
def parse_relative_time(value)
|
432
|
+
match = value.match(/^(\d+)([dmy])$/)
|
433
|
+
return value unless match
|
434
|
+
|
435
|
+
amount = match[1]
|
436
|
+
unit = case match[2]
|
437
|
+
when "d" then "days"
|
438
|
+
when "m" then "months"
|
439
|
+
when "y" then "years"
|
440
|
+
end
|
441
|
+
|
442
|
+
"-#{amount} #{unit}"
|
443
|
+
end
|
444
|
+
|
445
|
+
def parse_size(value)
|
446
|
+
if value.is_a?(Integer)
|
447
|
+
return value
|
448
|
+
end
|
449
|
+
|
450
|
+
if value =~ /^(\d+)([KMG])$/i
|
451
|
+
number = $1.to_i
|
452
|
+
unit = $2.upcase
|
453
|
+
|
454
|
+
case unit
|
455
|
+
when "K" then number * 1024
|
456
|
+
when "M" then number * 1024 * 1024
|
457
|
+
when "G" then number * 1024 * 1024 * 1024
|
458
|
+
end
|
459
|
+
else
|
460
|
+
value.to_i
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
class PostgresVisitor < SQLiteVisitor
|
466
|
+
# Override to use PostgreSQL's NOW() and INTERVAL syntax
|
467
|
+
def visit_relative_date_operator(node)
|
468
|
+
value = node.value.is_a?(String) ? node.value : node.value.value
|
469
|
+
interval = parse_relative_time_postgres(value)
|
470
|
+
@query.add_param(interval)
|
471
|
+
|
472
|
+
case node.name
|
473
|
+
when "older_than"
|
474
|
+
@query.add_condition("m0.internal_date < (NOW() - ?::interval)")
|
475
|
+
when "newer_than"
|
476
|
+
@query.add_condition("m0.internal_date > (NOW() - ?::interval)")
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
private
|
481
|
+
|
482
|
+
def parse_relative_time_postgres(value)
|
483
|
+
match = value.match(/^(\d+)([dmy])$/)
|
484
|
+
return value unless match
|
485
|
+
|
486
|
+
amount = match[1]
|
487
|
+
unit = case match[2]
|
488
|
+
when "d" then "days"
|
489
|
+
when "m" then "months"
|
490
|
+
when "y" then "years"
|
491
|
+
end
|
492
|
+
|
493
|
+
"#{amount} #{unit}"
|
494
|
+
end
|
495
|
+
end
|
496
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module GmailSearchSyntax
|
2
|
+
class Token
|
3
|
+
attr_reader :type, :value, :position
|
4
|
+
|
5
|
+
def initialize(type, value, position)
|
6
|
+
@type = type
|
7
|
+
@value = value
|
8
|
+
@position = position
|
9
|
+
end
|
10
|
+
|
11
|
+
def ==(other)
|
12
|
+
other.is_a?(Token) && @type == other.type && @value == other.value
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
"#<Token #{@type} #{@value.inspect}>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Tokenizer
|
21
|
+
OPERATORS = %w[
|
22
|
+
from to cc bcc subject after before older newer older_than newer_than
|
23
|
+
label category has list filename in is deliveredto size larger smaller
|
24
|
+
rfc822msgid
|
25
|
+
].freeze
|
26
|
+
|
27
|
+
LOGICAL_OPERATORS = %w[OR AND AROUND].freeze
|
28
|
+
|
29
|
+
def initialize(input)
|
30
|
+
@input = input
|
31
|
+
@position = 0
|
32
|
+
@tokens = []
|
33
|
+
end
|
34
|
+
|
35
|
+
def tokenize
|
36
|
+
while @position < @input.length
|
37
|
+
skip_whitespace
|
38
|
+
|
39
|
+
break if @position >= @input.length
|
40
|
+
|
41
|
+
char = current_char
|
42
|
+
|
43
|
+
case char
|
44
|
+
when "("
|
45
|
+
add_token(:lparen, char)
|
46
|
+
advance
|
47
|
+
when ")"
|
48
|
+
add_token(:rparen, char)
|
49
|
+
advance
|
50
|
+
when "{"
|
51
|
+
add_token(:lbrace, char)
|
52
|
+
advance
|
53
|
+
when "}"
|
54
|
+
add_token(:rbrace, char)
|
55
|
+
advance
|
56
|
+
when "-"
|
57
|
+
next_char = peek_char
|
58
|
+
if next_char && next_char !~ /\s/
|
59
|
+
add_token(:minus, char)
|
60
|
+
advance
|
61
|
+
else
|
62
|
+
read_word
|
63
|
+
end
|
64
|
+
when "+"
|
65
|
+
add_token(:plus, char)
|
66
|
+
advance
|
67
|
+
when '"'
|
68
|
+
read_quoted_string
|
69
|
+
when ":"
|
70
|
+
add_token(:colon, char)
|
71
|
+
advance
|
72
|
+
else
|
73
|
+
read_word
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
add_token(:eof, nil)
|
78
|
+
@tokens
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def current_char
|
84
|
+
@input[@position]
|
85
|
+
end
|
86
|
+
|
87
|
+
def peek_char(offset = 1)
|
88
|
+
@input[@position + offset]
|
89
|
+
end
|
90
|
+
|
91
|
+
def advance
|
92
|
+
@position += 1
|
93
|
+
end
|
94
|
+
|
95
|
+
def skip_whitespace
|
96
|
+
while @position < @input.length && @input[@position] =~ /\s/
|
97
|
+
advance
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def add_token(type, value)
|
102
|
+
@tokens << Token.new(type, value, @position)
|
103
|
+
end
|
104
|
+
|
105
|
+
def read_quoted_string
|
106
|
+
advance
|
107
|
+
|
108
|
+
value = ""
|
109
|
+
while @position < @input.length && current_char != '"'
|
110
|
+
if current_char == "\\"
|
111
|
+
advance
|
112
|
+
value += current_char if @position < @input.length
|
113
|
+
else
|
114
|
+
value += current_char
|
115
|
+
end
|
116
|
+
advance
|
117
|
+
end
|
118
|
+
|
119
|
+
advance if @position < @input.length
|
120
|
+
|
121
|
+
add_token(:quoted_string, value)
|
122
|
+
end
|
123
|
+
|
124
|
+
def read_word
|
125
|
+
value = ""
|
126
|
+
|
127
|
+
while @position < @input.length
|
128
|
+
char = current_char
|
129
|
+
break if /[\s():{}]/.match?(char)
|
130
|
+
break if char == "-"
|
131
|
+
value += char
|
132
|
+
advance
|
133
|
+
end
|
134
|
+
|
135
|
+
return if value.empty?
|
136
|
+
|
137
|
+
if LOGICAL_OPERATORS.include?(value)
|
138
|
+
add_token(value.downcase.to_sym, value)
|
139
|
+
elsif /@/.match?(value)
|
140
|
+
add_token(:email, value)
|
141
|
+
elsif /^\d+$/.match?(value)
|
142
|
+
add_token(:number, value.to_i)
|
143
|
+
elsif value =~ /^\d{4}\/\d{2}\/\d{2}$/ || value =~ /^\d{2}\/\d{2}\/\d{4}$/
|
144
|
+
add_token(:date, value)
|
145
|
+
elsif /^(\d+)([dmy])$/.match?(value)
|
146
|
+
add_token(:relative_time, value)
|
147
|
+
else
|
148
|
+
add_token(:word, value)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Gmail Search Syntax Parser
|
2
|
+
#
|
3
|
+
# Parses Gmail search queries into an Abstract Syntax Tree (AST).
|
4
|
+
# Based on the official Gmail search operators documentation:
|
5
|
+
# https://support.google.com/mail/answer/7190
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# ast = GmailSearchSyntax.parse!("from:boss subject:meeting")
|
9
|
+
# # => #<And #<Operator from: "boss"> AND #<Operator subject: "meeting">>
|
10
|
+
|
11
|
+
module GmailSearchSyntax
|
12
|
+
require_relative "gmail_search_syntax/version"
|
13
|
+
autoload :Tokenizer, "gmail_search_syntax/tokenizer"
|
14
|
+
autoload :Parser, "gmail_search_syntax/parser"
|
15
|
+
autoload :AST, "gmail_search_syntax/ast"
|
16
|
+
autoload :SQLiteVisitor, "gmail_search_syntax/sql_visitor"
|
17
|
+
autoload :PostgresVisitor, "gmail_search_syntax/sql_visitor"
|
18
|
+
|
19
|
+
# Backward compatibility alias (defined lazily)
|
20
|
+
def self.const_missing(name)
|
21
|
+
if name == :SqlVisitor
|
22
|
+
const_set(:SqlVisitor, SQLiteVisitor)
|
23
|
+
else
|
24
|
+
super
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class EmptyQueryError < StandardError; end
|
29
|
+
|
30
|
+
def self.parse!(query)
|
31
|
+
tokens = Tokenizer.new(query).tokenize
|
32
|
+
Parser.new(tokens).parse!
|
33
|
+
end
|
34
|
+
end
|