gmail_search_syntax 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +338 -0
- data/README.md +129 -0
- data/Rakefile +11 -0
- data/SCHEMA.md +223 -0
- data/examples/alias_collision_fix.rb +43 -0
- data/examples/demo.rb +28 -0
- data/examples/gmail_message_id_demo.rb +118 -0
- data/examples/postgres_vs_sqlite.rb +55 -0
- data/examples/sql_query.rb +47 -0
- data/lib/GMAIL_SEARCH_OPERATORS.md +58 -0
- data/lib/gmail_search_syntax/ast.rb +100 -0
- data/lib/gmail_search_syntax/parser.rb +224 -0
- data/lib/gmail_search_syntax/sql_visitor.rb +496 -0
- data/lib/gmail_search_syntax/tokenizer.rb +152 -0
- data/lib/gmail_search_syntax/version.rb +3 -0
- data/lib/gmail_search_syntax.rb +34 -0
- data/test/gmail_search_syntax_test.rb +691 -0
- data/test/integration_test.rb +668 -0
- data/test/postgres_visitor_test.rb +156 -0
- data/test/sql_visitor_test.rb +346 -0
- data/test/test_helper.rb +27 -0
- data/test/tokenizer_test.rb +185 -0
- metadata +115 -0
@@ -0,0 +1,668 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
require "sqlite3"
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
class IntegrationTest < Minitest::Test
|
6
|
+
include GmailMessageIdHelper
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@db = SQLite3::Database.new(":memory:")
|
10
|
+
create_tables
|
11
|
+
seed_labels
|
12
|
+
seed_messages
|
13
|
+
end
|
14
|
+
|
15
|
+
def teardown
|
16
|
+
@db&.close
|
17
|
+
end
|
18
|
+
|
19
|
+
def debug(message)
|
20
|
+
puts message if ENV["DEBUG"]
|
21
|
+
end
|
22
|
+
|
23
|
+
def create_tables
|
24
|
+
@db.execute_batch <<-SQL
|
25
|
+
CREATE TABLE messages (
|
26
|
+
id TEXT PRIMARY KEY,
|
27
|
+
rfc822_message_id TEXT,
|
28
|
+
subject TEXT,
|
29
|
+
body TEXT,
|
30
|
+
internal_date DATETIME,
|
31
|
+
size_bytes INTEGER,
|
32
|
+
|
33
|
+
is_important INTEGER DEFAULT 0,
|
34
|
+
is_starred INTEGER DEFAULT 0,
|
35
|
+
is_unread INTEGER DEFAULT 0,
|
36
|
+
is_read INTEGER DEFAULT 0,
|
37
|
+
is_muted INTEGER DEFAULT 0,
|
38
|
+
|
39
|
+
in_inbox INTEGER DEFAULT 1,
|
40
|
+
in_archive INTEGER DEFAULT 0,
|
41
|
+
in_snoozed INTEGER DEFAULT 0,
|
42
|
+
in_spam INTEGER DEFAULT 0,
|
43
|
+
in_trash INTEGER DEFAULT 0,
|
44
|
+
|
45
|
+
has_attachment INTEGER DEFAULT 0,
|
46
|
+
has_youtube INTEGER DEFAULT 0,
|
47
|
+
has_drive INTEGER DEFAULT 0,
|
48
|
+
has_document INTEGER DEFAULT 0,
|
49
|
+
has_spreadsheet INTEGER DEFAULT 0,
|
50
|
+
has_presentation INTEGER DEFAULT 0,
|
51
|
+
|
52
|
+
has_yellow_star INTEGER DEFAULT 0,
|
53
|
+
has_orange_star INTEGER DEFAULT 0,
|
54
|
+
has_red_star INTEGER DEFAULT 0,
|
55
|
+
has_purple_star INTEGER DEFAULT 0,
|
56
|
+
has_blue_star INTEGER DEFAULT 0,
|
57
|
+
has_green_star INTEGER DEFAULT 0,
|
58
|
+
has_red_bang INTEGER DEFAULT 0,
|
59
|
+
has_orange_guillemet INTEGER DEFAULT 0,
|
60
|
+
has_yellow_bang INTEGER DEFAULT 0,
|
61
|
+
has_green_check INTEGER DEFAULT 0,
|
62
|
+
has_blue_info INTEGER DEFAULT 0,
|
63
|
+
has_purple_question INTEGER DEFAULT 0,
|
64
|
+
|
65
|
+
category TEXT,
|
66
|
+
mailing_list TEXT,
|
67
|
+
|
68
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
69
|
+
);
|
70
|
+
|
71
|
+
CREATE INDEX idx_messages_internal_date ON messages(internal_date);
|
72
|
+
CREATE INDEX idx_messages_size_bytes ON messages(size_bytes);
|
73
|
+
CREATE INDEX idx_messages_rfc822_message_id ON messages(rfc822_message_id);
|
74
|
+
CREATE INDEX idx_messages_category ON messages(category);
|
75
|
+
CREATE INDEX idx_messages_mailing_list ON messages(mailing_list);
|
76
|
+
|
77
|
+
CREATE TABLE message_addresses (
|
78
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
79
|
+
message_id TEXT NOT NULL,
|
80
|
+
address_type TEXT NOT NULL,
|
81
|
+
email_address TEXT NOT NULL,
|
82
|
+
display_name TEXT,
|
83
|
+
|
84
|
+
FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
|
85
|
+
);
|
86
|
+
|
87
|
+
CREATE INDEX idx_message_addresses_message_id ON message_addresses(message_id);
|
88
|
+
CREATE INDEX idx_message_addresses_email ON message_addresses(email_address);
|
89
|
+
CREATE INDEX idx_message_addresses_type_email ON message_addresses(address_type, email_address);
|
90
|
+
|
91
|
+
CREATE TABLE labels (
|
92
|
+
id TEXT PRIMARY KEY,
|
93
|
+
name TEXT NOT NULL UNIQUE,
|
94
|
+
is_system_label INTEGER DEFAULT 0,
|
95
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
96
|
+
);
|
97
|
+
|
98
|
+
CREATE INDEX idx_labels_name ON labels(name);
|
99
|
+
|
100
|
+
CREATE TABLE message_labels (
|
101
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
102
|
+
message_id TEXT NOT NULL,
|
103
|
+
label_id TEXT NOT NULL,
|
104
|
+
|
105
|
+
FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE,
|
106
|
+
FOREIGN KEY (label_id) REFERENCES labels(id) ON DELETE CASCADE,
|
107
|
+
UNIQUE(message_id, label_id)
|
108
|
+
);
|
109
|
+
|
110
|
+
CREATE INDEX idx_message_labels_message_id ON message_labels(message_id);
|
111
|
+
CREATE INDEX idx_message_labels_label_id ON message_labels(label_id);
|
112
|
+
CREATE INDEX idx_message_labels_both ON message_labels(message_id, label_id);
|
113
|
+
|
114
|
+
CREATE TABLE attachments (
|
115
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
116
|
+
message_id TEXT NOT NULL,
|
117
|
+
filename TEXT NOT NULL,
|
118
|
+
content_type TEXT,
|
119
|
+
size_bytes INTEGER,
|
120
|
+
|
121
|
+
FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
|
122
|
+
);
|
123
|
+
|
124
|
+
CREATE INDEX idx_attachments_message_id ON attachments(message_id);
|
125
|
+
CREATE INDEX idx_attachments_filename ON attachments(filename);
|
126
|
+
SQL
|
127
|
+
end
|
128
|
+
|
129
|
+
def seed_labels
|
130
|
+
labels_yaml = <<~YAML
|
131
|
+
---
|
132
|
+
- id: CHAT
|
133
|
+
label_list_visibility: labelShow
|
134
|
+
message_list_visibility: hide
|
135
|
+
name: CHAT
|
136
|
+
type: system
|
137
|
+
- id: SENT
|
138
|
+
name: SENT
|
139
|
+
type: system
|
140
|
+
- id: INBOX
|
141
|
+
label_list_visibility: labelShow
|
142
|
+
message_list_visibility: hide
|
143
|
+
name: INBOX
|
144
|
+
type: system
|
145
|
+
- id: IMPORTANT
|
146
|
+
label_list_visibility: labelShow
|
147
|
+
message_list_visibility: hide
|
148
|
+
name: IMPORTANT
|
149
|
+
type: system
|
150
|
+
- id: TRASH
|
151
|
+
label_list_visibility: labelShow
|
152
|
+
message_list_visibility: hide
|
153
|
+
name: TRASH
|
154
|
+
type: system
|
155
|
+
- id: DRAFT
|
156
|
+
name: DRAFT
|
157
|
+
type: system
|
158
|
+
- id: SPAM
|
159
|
+
label_list_visibility: labelShow
|
160
|
+
message_list_visibility: hide
|
161
|
+
name: SPAM
|
162
|
+
type: system
|
163
|
+
- id: CATEGORY_FORUMS
|
164
|
+
name: CATEGORY_FORUMS
|
165
|
+
type: system
|
166
|
+
- id: CATEGORY_UPDATES
|
167
|
+
name: CATEGORY_UPDATES
|
168
|
+
type: system
|
169
|
+
- id: CATEGORY_PERSONAL
|
170
|
+
name: CATEGORY_PERSONAL
|
171
|
+
type: system
|
172
|
+
- id: CATEGORY_PROMOTIONS
|
173
|
+
name: CATEGORY_PROMOTIONS
|
174
|
+
type: system
|
175
|
+
- id: CATEGORY_SOCIAL
|
176
|
+
name: CATEGORY_SOCIAL
|
177
|
+
type: system
|
178
|
+
- id: STARRED
|
179
|
+
name: STARRED
|
180
|
+
type: system
|
181
|
+
- id: UNREAD
|
182
|
+
name: UNREAD
|
183
|
+
type: system
|
184
|
+
- color:
|
185
|
+
background_color: "#c9daf8"
|
186
|
+
text_color: "#285bac"
|
187
|
+
id: Label_10
|
188
|
+
label_list_visibility: labelShow
|
189
|
+
message_list_visibility: hide
|
190
|
+
name: "📥 Next Brief"
|
191
|
+
type: user
|
192
|
+
- color:
|
193
|
+
background_color: "#ffe6c7"
|
194
|
+
text_color: "#a46a21"
|
195
|
+
id: Label_11
|
196
|
+
label_list_visibility: labelShow
|
197
|
+
message_list_visibility: hide
|
198
|
+
name: "✉️ All Briefs"
|
199
|
+
type: user
|
200
|
+
- id: Label_12
|
201
|
+
label_list_visibility: labelShow
|
202
|
+
message_list_visibility: hide
|
203
|
+
name: Cora
|
204
|
+
type: user
|
205
|
+
- id: Label_13
|
206
|
+
label_list_visibility: labelShow
|
207
|
+
message_list_visibility: hide
|
208
|
+
name: Cora/Other
|
209
|
+
type: user
|
210
|
+
- id: Label_14
|
211
|
+
label_list_visibility: labelShow
|
212
|
+
message_list_visibility: hide
|
213
|
+
name: Cora/Newsletter
|
214
|
+
type: user
|
215
|
+
- id: Label_15
|
216
|
+
label_list_visibility: labelShow
|
217
|
+
message_list_visibility: hide
|
218
|
+
name: Cora/Action
|
219
|
+
type: user
|
220
|
+
- id: Label_16
|
221
|
+
label_list_visibility: labelShow
|
222
|
+
message_list_visibility: hide
|
223
|
+
name: Cora/Promotion
|
224
|
+
type: user
|
225
|
+
- id: Label_17
|
226
|
+
label_list_visibility: labelShow
|
227
|
+
message_list_visibility: hide
|
228
|
+
name: Cora/Every (Every.To)
|
229
|
+
type: user
|
230
|
+
- id: Label_18
|
231
|
+
label_list_visibility: labelShow
|
232
|
+
message_list_visibility: hide
|
233
|
+
name: Cora/Important Info
|
234
|
+
type: user
|
235
|
+
- id: Label_19
|
236
|
+
label_list_visibility: labelShow
|
237
|
+
message_list_visibility: hide
|
238
|
+
name: Cora/Google Drive
|
239
|
+
type: user
|
240
|
+
- id: Label_2
|
241
|
+
name: Apple Mail To Do
|
242
|
+
type: user
|
243
|
+
- id: Label_4
|
244
|
+
label_list_visibility: labelShow
|
245
|
+
message_list_visibility: hide
|
246
|
+
name: Drafts (gmail)
|
247
|
+
type: user
|
248
|
+
- id: Label_5
|
249
|
+
label_list_visibility: labelShow
|
250
|
+
message_list_visibility: hide
|
251
|
+
name: Junk (gmail)
|
252
|
+
type: user
|
253
|
+
- id: Label_6
|
254
|
+
name: Notes
|
255
|
+
type: user
|
256
|
+
- id: Label_8
|
257
|
+
name: Sent Messages (gmail)
|
258
|
+
type: user
|
259
|
+
- id: Label_9
|
260
|
+
name: ror
|
261
|
+
type: user
|
262
|
+
YAML
|
263
|
+
|
264
|
+
labels = YAML.load(labels_yaml)
|
265
|
+
|
266
|
+
labels.each do |label|
|
267
|
+
is_system = (label["type"] == "system") ? 1 : 0
|
268
|
+
@db.execute(
|
269
|
+
"INSERT INTO labels (id, name, is_system_label) VALUES (?, ?, ?)",
|
270
|
+
[label["id"], label["name"], is_system]
|
271
|
+
)
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
def seed_messages
|
276
|
+
senders = [
|
277
|
+
"alice@example.com", "bob@example.com", "charlie@example.com",
|
278
|
+
"david@company.com", "eve@startup.io", "frank@business.net",
|
279
|
+
"grace@tech.com", "heidi@design.co", "ivan@marketing.com",
|
280
|
+
"judy@sales.com"
|
281
|
+
]
|
282
|
+
|
283
|
+
recipients = [
|
284
|
+
"me@example.com", "team@example.com", "support@example.com",
|
285
|
+
"info@example.com", "admin@example.com"
|
286
|
+
]
|
287
|
+
|
288
|
+
subjects = [
|
289
|
+
"Meeting tomorrow", "Quarterly report", "Project update",
|
290
|
+
"Quick question", "Follow up", "Important announcement",
|
291
|
+
"Weekly newsletter", "Invitation to event", "Budget proposal",
|
292
|
+
"Code review needed"
|
293
|
+
]
|
294
|
+
|
295
|
+
categories = ["primary", "social", "promotions", "updates", "forums", nil]
|
296
|
+
|
297
|
+
mailing_lists = [
|
298
|
+
"announcements@example.com", "dev-team@company.com",
|
299
|
+
"newsletter@startup.io", nil, nil, nil
|
300
|
+
]
|
301
|
+
|
302
|
+
attachment_names = [
|
303
|
+
"report.pdf", "presentation.pptx", "spreadsheet.xlsx",
|
304
|
+
"document.docx", "image.jpg", "archive.zip"
|
305
|
+
]
|
306
|
+
|
307
|
+
label_ids = @db.execute("SELECT id FROM labels WHERE is_system_label = 0").flatten
|
308
|
+
|
309
|
+
two_years_ago = Time.now - (2 * 365 * 24 * 60 * 60)
|
310
|
+
now = Time.now
|
311
|
+
|
312
|
+
100.times do |i|
|
313
|
+
internal_date = Time.at(rand(two_years_ago.to_i..now.to_i))
|
314
|
+
|
315
|
+
# Generate Gmail-style message ID based on timestamp
|
316
|
+
# https://www.metaspike.com/dates-gmail-message-id-thread-id-timestamps/
|
317
|
+
message_id = generate_gmail_message_id(internal_date)
|
318
|
+
rfc822_id = "#{rand(1000000)}@example.com"
|
319
|
+
|
320
|
+
subject = subjects.sample + " ##{i}"
|
321
|
+
body = "This is the body of message #{i}. " + ("Lorem ipsum dolor sit amet. " * 10)
|
322
|
+
|
323
|
+
size_bytes = rand(1024..10485760)
|
324
|
+
|
325
|
+
category = categories.sample
|
326
|
+
mailing_list = mailing_lists.sample
|
327
|
+
|
328
|
+
@db.execute(
|
329
|
+
<<-SQL,
|
330
|
+
INSERT INTO messages (
|
331
|
+
id, rfc822_message_id, subject, body, internal_date, size_bytes,
|
332
|
+
is_important, is_starred, is_unread, is_read, is_muted,
|
333
|
+
in_inbox, in_archive, in_snoozed, in_spam, in_trash,
|
334
|
+
has_attachment, has_youtube, has_drive, has_document, has_spreadsheet, has_presentation,
|
335
|
+
has_yellow_star, has_orange_star, has_red_star, has_purple_star, has_blue_star, has_green_star,
|
336
|
+
has_red_bang, has_orange_guillemet, has_yellow_bang, has_green_check, has_blue_info, has_purple_question,
|
337
|
+
category, mailing_list
|
338
|
+
) VALUES (
|
339
|
+
?, ?, ?, ?, ?, ?,
|
340
|
+
?, ?, ?, ?, ?,
|
341
|
+
?, ?, ?, ?, ?,
|
342
|
+
?, ?, ?, ?, ?, ?,
|
343
|
+
?, ?, ?, ?, ?, ?,
|
344
|
+
?, ?, ?, ?, ?, ?,
|
345
|
+
?, ?
|
346
|
+
)
|
347
|
+
SQL
|
348
|
+
[message_id, rfc822_id, subject, body,
|
349
|
+
internal_date.strftime("%Y-%m-%d %H:%M:%S"), size_bytes,
|
350
|
+
rand(2), rand(2), rand(2), rand(2), rand(2),
|
351
|
+
rand(2), rand(2), rand(2), rand(2), rand(2),
|
352
|
+
rand(2), rand(2), rand(2), rand(2), rand(2), rand(2),
|
353
|
+
rand(2), rand(2), rand(2), rand(2), rand(2), rand(2),
|
354
|
+
rand(2), rand(2), rand(2), rand(2), rand(2),
|
355
|
+
category, mailing_list]
|
356
|
+
)
|
357
|
+
|
358
|
+
sender = senders.sample
|
359
|
+
@db.execute(
|
360
|
+
"INSERT INTO message_addresses (message_id, address_type, email_address) VALUES (?, ?, ?)",
|
361
|
+
[message_id, "from", sender]
|
362
|
+
)
|
363
|
+
|
364
|
+
rand(1..3).times do
|
365
|
+
recipient = recipients.sample
|
366
|
+
address_type = ["to", "cc"].sample
|
367
|
+
@db.execute(
|
368
|
+
"INSERT INTO message_addresses (message_id, address_type, email_address) VALUES (?, ?, ?)",
|
369
|
+
[message_id, address_type, recipient]
|
370
|
+
)
|
371
|
+
end
|
372
|
+
|
373
|
+
rand(0..3).times do
|
374
|
+
label_id = label_ids.sample
|
375
|
+
begin
|
376
|
+
@db.execute(
|
377
|
+
"INSERT INTO message_labels (message_id, label_id) VALUES (?, ?)",
|
378
|
+
[message_id, label_id]
|
379
|
+
)
|
380
|
+
rescue SQLite3::ConstraintException
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
if rand < 0.3
|
385
|
+
rand(1..2).times do
|
386
|
+
filename = attachment_names.sample
|
387
|
+
@db.execute(
|
388
|
+
"INSERT INTO attachments (message_id, filename, content_type, size_bytes) VALUES (?, ?, ?, ?)",
|
389
|
+
[message_id, filename, "application/octet-stream", rand(1024..5242880)]
|
390
|
+
)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
def test_database_setup_successful
|
397
|
+
message_count = @db.get_first_value("SELECT COUNT(*) FROM messages")
|
398
|
+
assert_equal 100, message_count
|
399
|
+
|
400
|
+
label_count = @db.get_first_value("SELECT COUNT(*) FROM labels")
|
401
|
+
assert_equal 30, label_count
|
402
|
+
|
403
|
+
address_count = @db.get_first_value("SELECT COUNT(*) FROM message_addresses")
|
404
|
+
assert message_count > 0
|
405
|
+
|
406
|
+
debug "\nDatabase seeded successfully:"
|
407
|
+
debug " Messages: #{message_count}"
|
408
|
+
debug " Labels: #{label_count}"
|
409
|
+
debug " Addresses: #{address_count}"
|
410
|
+
debug " Attachments: #{@db.get_first_value("SELECT COUNT(*) FROM attachments")}"
|
411
|
+
debug " Message-Label associations: #{@db.get_first_value("SELECT COUNT(*) FROM message_labels")}"
|
412
|
+
end
|
413
|
+
|
414
|
+
def test_query_5_latest_messages
|
415
|
+
rows = @db.execute(<<-SQL)
|
416
|
+
SELECT m.id, m.subject, m.internal_date
|
417
|
+
FROM messages m
|
418
|
+
ORDER BY m.internal_date DESC
|
419
|
+
LIMIT 5
|
420
|
+
SQL
|
421
|
+
|
422
|
+
assert_equal 5, rows.length
|
423
|
+
|
424
|
+
debug "\n5 Latest messages:"
|
425
|
+
rows.each do |row|
|
426
|
+
debug " #{row[0]}: #{row[1]} (#{row[2]})"
|
427
|
+
end
|
428
|
+
|
429
|
+
dates = rows.map { |r| r[2] }
|
430
|
+
assert_equal dates.sort.reverse, dates, "Messages should be ordered by date DESC"
|
431
|
+
end
|
432
|
+
|
433
|
+
def test_query_with_from_operator
|
434
|
+
ast = GmailSearchSyntax.parse!("from:alice@example.com")
|
435
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
436
|
+
visitor.visit(ast)
|
437
|
+
|
438
|
+
sql, params = visitor.to_query.to_sql
|
439
|
+
|
440
|
+
rows = @db.execute(sql, params)
|
441
|
+
|
442
|
+
assert rows.length > 0, "Should find messages from alice@example.com"
|
443
|
+
|
444
|
+
rows.each do |row|
|
445
|
+
message_id = row[0]
|
446
|
+
addresses = @db.execute(
|
447
|
+
"SELECT email_address FROM message_addresses WHERE message_id = ? AND address_type = 'from'",
|
448
|
+
[message_id]
|
449
|
+
)
|
450
|
+
assert addresses.any? { |addr| addr[0] == "alice@example.com" },
|
451
|
+
"Message #{message_id} should have alice@example.com as sender"
|
452
|
+
end
|
453
|
+
|
454
|
+
debug "\nFound #{rows.length} messages from alice@example.com"
|
455
|
+
end
|
456
|
+
|
457
|
+
def test_query_with_subject_operator
|
458
|
+
ast = GmailSearchSyntax.parse!("subject:meeting")
|
459
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
460
|
+
visitor.visit(ast)
|
461
|
+
|
462
|
+
sql, params = visitor.to_query.to_sql
|
463
|
+
|
464
|
+
rows = @db.execute(sql, params)
|
465
|
+
|
466
|
+
assert rows.length > 0, "Should find messages with 'meeting' in subject"
|
467
|
+
|
468
|
+
rows.each do |row|
|
469
|
+
message_id = row[0]
|
470
|
+
subject = @db.get_first_value("SELECT subject FROM messages WHERE id = ?", [message_id])
|
471
|
+
assert subject.downcase.include?("meeting"),
|
472
|
+
"Message #{message_id} subject '#{subject}' should contain 'meeting'"
|
473
|
+
end
|
474
|
+
|
475
|
+
debug "\nFound #{rows.length} messages with 'meeting' in subject"
|
476
|
+
end
|
477
|
+
|
478
|
+
def test_query_with_has_attachment
|
479
|
+
ast = GmailSearchSyntax.parse!("has:attachment")
|
480
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
481
|
+
visitor.visit(ast)
|
482
|
+
|
483
|
+
sql, params = visitor.to_query.to_sql
|
484
|
+
|
485
|
+
rows = @db.execute(sql, params)
|
486
|
+
|
487
|
+
assert rows.length > 0, "Should find messages with attachments"
|
488
|
+
|
489
|
+
rows.each do |row|
|
490
|
+
message_id = row[0]
|
491
|
+
has_attachment = @db.get_first_value(
|
492
|
+
"SELECT has_attachment FROM messages WHERE id = ?",
|
493
|
+
[message_id]
|
494
|
+
)
|
495
|
+
assert_equal 1, has_attachment, "Message #{message_id} should have has_attachment = 1"
|
496
|
+
end
|
497
|
+
|
498
|
+
debug "\nFound #{rows.length} messages with attachments"
|
499
|
+
end
|
500
|
+
|
501
|
+
def test_query_with_complex_conditions
|
502
|
+
ast = GmailSearchSyntax.parse!("from:alice@example.com subject:meeting")
|
503
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
504
|
+
visitor.visit(ast)
|
505
|
+
|
506
|
+
sql, params = visitor.to_query.to_sql
|
507
|
+
|
508
|
+
rows = @db.execute(sql, params)
|
509
|
+
|
510
|
+
rows.each do |row|
|
511
|
+
message_id = row[0]
|
512
|
+
|
513
|
+
addresses = @db.execute(
|
514
|
+
"SELECT email_address FROM message_addresses WHERE message_id = ? AND address_type IN ('from', 'cc', 'bcc')",
|
515
|
+
[message_id]
|
516
|
+
)
|
517
|
+
assert addresses.any? { |addr| addr[0] == "alice@example.com" },
|
518
|
+
"Message #{message_id} should have alice@example.com in from/cc/bcc"
|
519
|
+
|
520
|
+
subject = @db.get_first_value("SELECT subject FROM messages WHERE id = ?", [message_id])
|
521
|
+
assert subject.downcase.include?("meeting"),
|
522
|
+
"Message #{message_id} subject '#{subject}' should contain 'meeting'"
|
523
|
+
end
|
524
|
+
|
525
|
+
debug "\nFound #{rows.length} messages from alice@example.com with 'meeting' in subject"
|
526
|
+
end
|
527
|
+
|
528
|
+
def test_query_with_label
|
529
|
+
label_name = "Cora"
|
530
|
+
|
531
|
+
label_exists = @db.get_first_value("SELECT COUNT(*) FROM labels WHERE name = ?", [label_name])
|
532
|
+
assert label_exists > 0, "Label '#{label_name}' should exist"
|
533
|
+
|
534
|
+
ast = GmailSearchSyntax.parse!("label:Cora")
|
535
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
536
|
+
visitor.visit(ast)
|
537
|
+
|
538
|
+
sql, params = visitor.to_query.to_sql
|
539
|
+
|
540
|
+
rows = @db.execute(sql, params)
|
541
|
+
|
542
|
+
rows.each do |row|
|
543
|
+
message_id = row[0]
|
544
|
+
|
545
|
+
labels = @db.execute(
|
546
|
+
"SELECT l.name FROM message_labels ml " \
|
547
|
+
"INNER JOIN labels l ON ml.label_id = l.id " \
|
548
|
+
"WHERE ml.message_id = ?",
|
549
|
+
[message_id]
|
550
|
+
)
|
551
|
+
label_names = labels.map { |l| l[0] }
|
552
|
+
assert label_names.include?("Cora"),
|
553
|
+
"Message #{message_id} should have label 'Cora', has: #{label_names.inspect}"
|
554
|
+
end
|
555
|
+
|
556
|
+
debug "\nFound #{rows.length} messages with label 'Cora'"
|
557
|
+
end
|
558
|
+
|
559
|
+
def test_query_with_date_range
|
560
|
+
one_year_ago = (Time.now - 365 * 24 * 60 * 60).strftime("%Y/%m/%d")
|
561
|
+
one_year_ago_time = Time.parse(one_year_ago)
|
562
|
+
|
563
|
+
ast = GmailSearchSyntax.parse!("after:#{one_year_ago}")
|
564
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
565
|
+
visitor.visit(ast)
|
566
|
+
|
567
|
+
sql, params = visitor.to_query.to_sql
|
568
|
+
|
569
|
+
rows = @db.execute(sql, params)
|
570
|
+
|
571
|
+
assert rows.length > 0, "Should find messages from the last year"
|
572
|
+
|
573
|
+
rows.each do |row|
|
574
|
+
message_id = row[0]
|
575
|
+
internal_date_str = @db.get_first_value(
|
576
|
+
"SELECT internal_date FROM messages WHERE id = ?",
|
577
|
+
[message_id]
|
578
|
+
)
|
579
|
+
internal_date = Time.parse(internal_date_str)
|
580
|
+
assert internal_date > one_year_ago_time,
|
581
|
+
"Message #{message_id} date #{internal_date} should be after #{one_year_ago_time}"
|
582
|
+
end
|
583
|
+
|
584
|
+
debug "\nFound #{rows.length} messages after #{one_year_ago}"
|
585
|
+
end
|
586
|
+
|
587
|
+
def test_query_with_size_filter
|
588
|
+
ast = GmailSearchSyntax.parse!("larger:1M")
|
589
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
590
|
+
visitor.visit(ast)
|
591
|
+
|
592
|
+
sql, params = visitor.to_query.to_sql
|
593
|
+
|
594
|
+
rows = @db.execute(sql, params)
|
595
|
+
|
596
|
+
rows.each do |row|
|
597
|
+
message_id = row[0]
|
598
|
+
size = @db.get_first_value("SELECT size_bytes FROM messages WHERE id = ?", [message_id])
|
599
|
+
assert size > 1048576, "Message #{message_id} should be larger than 1M"
|
600
|
+
end
|
601
|
+
|
602
|
+
debug "\nFound #{rows.length} messages larger than 1M"
|
603
|
+
end
|
604
|
+
|
605
|
+
def test_gmail_message_ids_encode_timestamps
|
606
|
+
# Test that our generated message IDs follow Gmail's pattern
|
607
|
+
# Reference: https://www.metaspike.com/dates-gmail-message-id-thread-id-timestamps/
|
608
|
+
|
609
|
+
rows = @db.execute("SELECT id, internal_date FROM messages LIMIT 5")
|
610
|
+
|
611
|
+
rows.each do |message_id, internal_date_str|
|
612
|
+
internal_date = Time.parse(internal_date_str)
|
613
|
+
|
614
|
+
# Gmail message IDs should be 15-16 hex digits
|
615
|
+
assert message_id.match?(/^[0-9a-f]{15,16}$/), "Message ID should be 15-16 hex digits: #{message_id}"
|
616
|
+
|
617
|
+
# Decode the timestamp from the message ID (drop last 5 digits, convert from hex)
|
618
|
+
timestamp_hex = message_id[0..-6] # Drop last 5 digits
|
619
|
+
decoded_timestamp_ms = timestamp_hex.to_i(16)
|
620
|
+
decoded_time = Time.at(decoded_timestamp_ms / 1000.0)
|
621
|
+
|
622
|
+
# The decoded timestamp should be very close to the internal_date
|
623
|
+
# Allow a small margin of error due to timing
|
624
|
+
time_diff = (decoded_time - internal_date).abs
|
625
|
+
assert time_diff < 1, "Decoded timestamp should match internal_date within 1 second. " \
|
626
|
+
"Message ID: #{message_id}, Internal: #{internal_date}, Decoded: #{decoded_time}, Diff: #{time_diff}s"
|
627
|
+
|
628
|
+
debug "\nMessage ID #{message_id} decodes to #{decoded_time} (internal_date: #{internal_date})"
|
629
|
+
end
|
630
|
+
end
|
631
|
+
|
632
|
+
def test_gmail_message_id_with_seeded_random
|
633
|
+
# Test that the random: parameter allows reproducible message IDs
|
634
|
+
test_time = Time.parse("2024-06-15 12:00:00 UTC")
|
635
|
+
|
636
|
+
# Generate two IDs with the same seed - should be identical
|
637
|
+
seeded_random1 = Random.new(12345)
|
638
|
+
id1 = generate_gmail_message_id(test_time, random: seeded_random1)
|
639
|
+
|
640
|
+
seeded_random2 = Random.new(12345)
|
641
|
+
id2 = generate_gmail_message_id(test_time, random: seeded_random2)
|
642
|
+
|
643
|
+
assert_equal id1, id2, "Same seed should produce same message ID"
|
644
|
+
|
645
|
+
# Generate two IDs with different seeds - should be different
|
646
|
+
seeded_random3 = Random.new(11111)
|
647
|
+
seeded_random4 = Random.new(22222)
|
648
|
+
id3 = generate_gmail_message_id(test_time, random: seeded_random3)
|
649
|
+
id4 = generate_gmail_message_id(test_time, random: seeded_random4)
|
650
|
+
|
651
|
+
refute_equal id3, id4, "Different seeds should produce different message IDs"
|
652
|
+
|
653
|
+
# All should decode to the same timestamp
|
654
|
+
[id1, id2, id3, id4].each do |msg_id|
|
655
|
+
timestamp_hex = msg_id[0..-6]
|
656
|
+
decoded_timestamp_ms = timestamp_hex.to_i(16)
|
657
|
+
decoded_time = Time.at(decoded_timestamp_ms / 1000.0)
|
658
|
+
|
659
|
+
time_diff = (decoded_time - test_time).abs
|
660
|
+
assert time_diff < 1, "All IDs should decode to the same timestamp"
|
661
|
+
end
|
662
|
+
|
663
|
+
debug "\nSeeded ID 1: #{id1}"
|
664
|
+
debug "Seeded ID 2: #{id2}"
|
665
|
+
debug "Seeded ID 3: #{id3}"
|
666
|
+
debug "Seeded ID 4: #{id4}"
|
667
|
+
end
|
668
|
+
end
|