gmail_search_syntax 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/GMAIL_BEHAVIOR_COMPARISON.md +166 -0
- data/GMAIL_COMPATIBILITY_COMPLETE.md +236 -0
- data/IMPLEMENTATION_NOTES.md +174 -0
- data/README.md +2 -2
- data/examples/escaped_quotes_demo.rb +152 -0
- data/examples/gmail_comparison_demo.rb +82 -0
- data/examples/text_vs_substring_demo.rb +93 -0
- data/lib/gmail_search_syntax/ast.rb +14 -2
- data/lib/gmail_search_syntax/parser.rb +45 -27
- data/lib/gmail_search_syntax/sql_visitor.rb +22 -5
- data/lib/gmail_search_syntax/tokenizer.rb +47 -12
- data/lib/gmail_search_syntax/version.rb +1 -1
- data/test/gmail_search_syntax_test.rb +246 -186
- data/test/sql_visitor_test.rb +44 -1
- data/test/tokenizer_test.rb +204 -118
- metadata +7 -1
data/test/sql_visitor_test.rb
CHANGED
@@ -244,9 +244,21 @@ class SqlVisitorTest < Minitest::Test
|
|
244
244
|
def test_plain_text_search
|
245
245
|
sql, params = parse_and_visit("meeting")
|
246
246
|
|
247
|
+
# Text nodes now use word boundary matching
|
248
|
+
assert_includes sql, "m0.subject = ?"
|
247
249
|
assert_includes sql, "m0.subject LIKE ?"
|
250
|
+
assert_includes sql, "m0.body = ?"
|
248
251
|
assert_includes sql, "m0.body LIKE ?"
|
249
|
-
assert_equal ["meeting", "%meeting%", "%meeting%"], params
|
252
|
+
assert_equal ["meeting", "meeting %", "% meeting", "% meeting %", "meeting", "meeting %", "% meeting", "% meeting %"], params
|
253
|
+
end
|
254
|
+
|
255
|
+
def test_quoted_text_search_uses_substring
|
256
|
+
sql, params = parse_and_visit('"meeting"')
|
257
|
+
|
258
|
+
# Quoted strings create Substring nodes which use LIKE %value%
|
259
|
+
assert_includes sql, "m0.subject LIKE ?"
|
260
|
+
assert_includes sql, "m0.body LIKE ?"
|
261
|
+
assert_equal ["%meeting%", "%meeting%"], params
|
250
262
|
end
|
251
263
|
|
252
264
|
def test_complex_query
|
@@ -343,4 +355,35 @@ class SqlVisitorTest < Minitest::Test
|
|
343
355
|
join_count = sql.scan("INNER JOIN message_addresses").length
|
344
356
|
assert_equal 2, join_count
|
345
357
|
end
|
358
|
+
|
359
|
+
def test_quoted_string_with_escaped_quotes
|
360
|
+
sql, params = parse_and_visit('"She said \\"hello\\" to me"')
|
361
|
+
|
362
|
+
assert_includes sql, "m0.subject LIKE ?"
|
363
|
+
assert_includes sql, "m0.body LIKE ?"
|
364
|
+
assert_equal ['%She said "hello" to me%', '%She said "hello" to me%'], params
|
365
|
+
end
|
366
|
+
|
367
|
+
def test_subject_with_escaped_quotes
|
368
|
+
sql, params = parse_and_visit('subject:"Meeting: \\"Q1 Review\\""')
|
369
|
+
|
370
|
+
assert_includes sql, "m0.subject LIKE ?"
|
371
|
+
assert_equal ['%Meeting: "Q1 Review"%'], params
|
372
|
+
end
|
373
|
+
|
374
|
+
def test_unquoted_token_with_escaped_quote
|
375
|
+
sql, params = parse_and_visit('meeting\\"room')
|
376
|
+
|
377
|
+
# Unquoted tokens use word boundary matching
|
378
|
+
assert_includes sql, "m0.subject = ?"
|
379
|
+
assert_includes sql, "m0.body = ?"
|
380
|
+
assert_equal ['meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %', 'meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %'], params
|
381
|
+
end
|
382
|
+
|
383
|
+
def test_operator_with_unquoted_escaped_quote
|
384
|
+
sql, params = parse_and_visit('subject:test\\"value')
|
385
|
+
|
386
|
+
assert_includes sql, "m0.subject LIKE ?"
|
387
|
+
assert_equal ['%test"value%'], params
|
388
|
+
end
|
346
389
|
end
|
data/test/tokenizer_test.rb
CHANGED
@@ -5,181 +5,267 @@ class TokenizerTest < Minitest::Test
|
|
5
5
|
GmailSearchSyntax::Tokenizer.new(input).tokenize
|
6
6
|
end
|
7
7
|
|
8
|
+
def assert_token_stream(expected_tokens, actual_tokens)
|
9
|
+
assert expected_tokens.length > 0
|
10
|
+
assert_equal expected_tokens.length, actual_tokens.length, "Expected #{expected_tokens.length} tokens, got #{actual_tokens.length}"
|
11
|
+
|
12
|
+
expected_tokens.each_with_index do |expected_token, index|
|
13
|
+
actual_token = actual_tokens[index]
|
14
|
+
expected_token.each do |property, expected_value|
|
15
|
+
actual_value = actual_token.public_send(property)
|
16
|
+
assert_equal expected_value, actual_value, "Token #{index} #{actual_token}: expected #{property} to be #{expected_value.inspect}, got #{actual_value.inspect}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
8
21
|
def test_tokenize_simple_from
|
9
22
|
tokens = tokenize("from:amy@example.com")
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
23
|
+
expected = [
|
24
|
+
{type: :word, value: "from"},
|
25
|
+
{type: :colon},
|
26
|
+
{type: :email, value: "amy@example.com"},
|
27
|
+
{type: :eof}
|
28
|
+
]
|
29
|
+
assert_token_stream(expected, tokens)
|
17
30
|
end
|
18
31
|
|
19
32
|
def test_tokenize_quoted_string
|
20
33
|
tokens = tokenize('"hello world"')
|
21
|
-
|
22
|
-
|
23
|
-
|
34
|
+
expected = [
|
35
|
+
{type: :quoted_string, value: "hello world"},
|
36
|
+
{type: :eof}
|
37
|
+
]
|
38
|
+
assert_token_stream(expected, tokens)
|
24
39
|
end
|
25
40
|
|
26
41
|
def test_tokenize_operators
|
27
42
|
tokens = tokenize("from:amy@example.com OR to:bob@example.com")
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
assert_equal :email, tokens[6].type
|
40
|
-
assert_equal "bob@example.com", tokens[6].value
|
41
|
-
assert_equal :eof, tokens[7].type
|
43
|
+
expected = [
|
44
|
+
{type: :word, value: "from"},
|
45
|
+
{type: :colon},
|
46
|
+
{type: :email, value: "amy@example.com"},
|
47
|
+
{type: :or},
|
48
|
+
{type: :word, value: "to"},
|
49
|
+
{type: :colon},
|
50
|
+
{type: :email, value: "bob@example.com"},
|
51
|
+
{type: :eof}
|
52
|
+
]
|
53
|
+
assert_token_stream(expected, tokens)
|
42
54
|
end
|
43
55
|
|
44
56
|
def test_tokenize_parentheses
|
45
57
|
tokens = tokenize("subject:(meeting call)")
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
assert_equal :rparen, tokens[5].type
|
57
|
-
assert_equal :eof, tokens[6].type
|
58
|
+
expected = [
|
59
|
+
{type: :word, value: "subject"},
|
60
|
+
{type: :colon},
|
61
|
+
{type: :lparen},
|
62
|
+
{type: :word, value: "meeting"},
|
63
|
+
{type: :word, value: "call"},
|
64
|
+
{type: :rparen},
|
65
|
+
{type: :eof}
|
66
|
+
]
|
67
|
+
assert_token_stream(expected, tokens)
|
58
68
|
end
|
59
69
|
|
60
70
|
def test_tokenize_braces
|
61
71
|
tokens = tokenize("{from:a from:b}")
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
assert_equal "b", tokens[6].value
|
75
|
-
assert_equal :rbrace, tokens[7].type
|
76
|
-
assert_equal :eof, tokens[8].type
|
72
|
+
expected = [
|
73
|
+
{type: :lbrace},
|
74
|
+
{type: :word, value: "from"},
|
75
|
+
{type: :colon},
|
76
|
+
{type: :word, value: "a"},
|
77
|
+
{type: :word, value: "from"},
|
78
|
+
{type: :colon},
|
79
|
+
{type: :word, value: "b"},
|
80
|
+
{type: :rbrace},
|
81
|
+
{type: :eof}
|
82
|
+
]
|
83
|
+
assert_token_stream(expected, tokens)
|
77
84
|
end
|
78
85
|
|
79
86
|
def test_tokenize_negation
|
80
87
|
tokens = tokenize("dinner -movie")
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
88
|
+
expected = [
|
89
|
+
{type: :word, value: "dinner"},
|
90
|
+
{type: :minus},
|
91
|
+
{type: :word, value: "movie"},
|
92
|
+
{type: :eof}
|
93
|
+
]
|
94
|
+
assert_token_stream(expected, tokens)
|
87
95
|
end
|
88
96
|
|
89
97
|
def test_tokenize_around
|
90
98
|
tokens = tokenize("holiday AROUND 10 vacation")
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
assert_equal "vacation", tokens[3].value
|
100
|
-
assert_equal :eof, tokens[4].type
|
99
|
+
expected = [
|
100
|
+
{type: :word, value: "holiday"},
|
101
|
+
{type: :around},
|
102
|
+
{type: :number, value: 10},
|
103
|
+
{type: :word, value: "vacation"},
|
104
|
+
{type: :eof}
|
105
|
+
]
|
106
|
+
assert_token_stream(expected, tokens)
|
101
107
|
end
|
102
108
|
|
103
109
|
def test_tokenize_date
|
104
110
|
tokens = tokenize("after:2004/04/16")
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
+
expected = [
|
112
|
+
{type: :word, value: "after"},
|
113
|
+
{type: :colon},
|
114
|
+
{type: :date, value: "2004/04/16"},
|
115
|
+
{type: :eof}
|
116
|
+
]
|
117
|
+
assert_token_stream(expected, tokens)
|
111
118
|
end
|
112
119
|
|
113
120
|
def test_tokenize_relative_time
|
114
121
|
tokens = tokenize("older_than:1y")
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
122
|
+
expected = [
|
123
|
+
{type: :word, value: "older_than"},
|
124
|
+
{type: :colon},
|
125
|
+
{type: :relative_time, value: "1y"},
|
126
|
+
{type: :eof}
|
127
|
+
]
|
128
|
+
assert_token_stream(expected, tokens)
|
121
129
|
end
|
122
130
|
|
123
131
|
def test_tokenize_number
|
124
132
|
tokens = tokenize("size:1000000")
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
133
|
+
expected = [
|
134
|
+
{type: :word, value: "size"},
|
135
|
+
{type: :colon},
|
136
|
+
{type: :number, value: 1000000},
|
137
|
+
{type: :eof}
|
138
|
+
]
|
139
|
+
assert_token_stream(expected, tokens)
|
131
140
|
end
|
132
141
|
|
133
142
|
def test_tokenize_and_operator
|
134
143
|
tokens = tokenize("from:amy@example.com AND to:bob@example.com")
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
144
|
+
expected = [
|
145
|
+
{type: :word, value: "from"},
|
146
|
+
{type: :colon},
|
147
|
+
{type: :email, value: "amy@example.com"},
|
148
|
+
{type: :and, value: "AND"},
|
149
|
+
{type: :word, value: "to"},
|
150
|
+
{type: :colon},
|
151
|
+
{type: :email, value: "bob@example.com"},
|
152
|
+
{type: :eof}
|
153
|
+
]
|
154
|
+
assert_token_stream(expected, tokens)
|
139
155
|
end
|
140
156
|
|
141
157
|
def test_tokenize_plus
|
142
158
|
tokens = tokenize("+unicorn")
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
159
|
+
expected = [
|
160
|
+
{type: :plus},
|
161
|
+
{type: :word, value: "unicorn"},
|
162
|
+
{type: :eof}
|
163
|
+
]
|
164
|
+
assert_token_stream(expected, tokens)
|
147
165
|
end
|
148
166
|
|
149
167
|
def test_tokenize_complex_query
|
150
168
|
tokens = tokenize('from:boss@example.com subject:"urgent meeting" has:attachment')
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
assert_equal "has", tokens[6].value
|
165
|
-
assert_equal :colon, tokens[7].type
|
166
|
-
assert_equal :word, tokens[8].type
|
167
|
-
assert_equal "attachment", tokens[8].value
|
168
|
-
assert_equal :eof, tokens[9].type
|
169
|
+
expected = [
|
170
|
+
{type: :word, value: "from"},
|
171
|
+
{type: :colon},
|
172
|
+
{type: :email, value: "boss@example.com"},
|
173
|
+
{type: :word, value: "subject"},
|
174
|
+
{type: :colon},
|
175
|
+
{type: :quoted_string, value: "urgent meeting"},
|
176
|
+
{type: :word, value: "has"},
|
177
|
+
{type: :colon},
|
178
|
+
{type: :word, value: "attachment"},
|
179
|
+
{type: :eof}
|
180
|
+
]
|
181
|
+
assert_token_stream(expected, tokens)
|
169
182
|
end
|
170
183
|
|
171
184
|
def test_tokenize_email_with_plus
|
172
185
|
tokens = tokenize("to:user+tag@example.com")
|
173
|
-
|
174
|
-
|
186
|
+
expected = [
|
187
|
+
{type: :word, value: "to"},
|
188
|
+
{type: :colon},
|
189
|
+
{type: :email, value: "user+tag@example.com"},
|
190
|
+
{type: :eof}
|
191
|
+
]
|
192
|
+
assert_token_stream(expected, tokens)
|
175
193
|
end
|
176
194
|
|
177
195
|
def test_tokenize_multiple_words
|
178
196
|
tokens = tokenize("project report meeting")
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
197
|
+
expected = [
|
198
|
+
{type: :word, value: "project"},
|
199
|
+
{type: :word, value: "report"},
|
200
|
+
{type: :word, value: "meeting"},
|
201
|
+
{type: :eof}
|
202
|
+
]
|
203
|
+
assert_token_stream(expected, tokens)
|
204
|
+
end
|
205
|
+
|
206
|
+
def test_tokenize_quoted_string_with_escaped_quote
|
207
|
+
tokens = tokenize('"She said \\"hello\\" to me"')
|
208
|
+
expected = [
|
209
|
+
{type: :quoted_string, value: 'She said "hello" to me'},
|
210
|
+
{type: :eof}
|
211
|
+
]
|
212
|
+
assert_token_stream(expected, tokens)
|
213
|
+
end
|
214
|
+
|
215
|
+
def test_tokenize_quoted_string_with_escaped_backslash
|
216
|
+
tokens = tokenize('"path\\\\to\\\\file"')
|
217
|
+
expected = [
|
218
|
+
{type: :quoted_string, value: 'path\\to\\file'},
|
219
|
+
{type: :eof}
|
220
|
+
]
|
221
|
+
assert_token_stream(expected, tokens)
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_tokenize_quoted_string_with_multiple_escapes
|
225
|
+
tokens = tokenize('"test \\"nested\\" and \\\\ slash"')
|
226
|
+
expected = [
|
227
|
+
{type: :quoted_string, value: 'test "nested" and \\ slash'},
|
228
|
+
{type: :eof}
|
229
|
+
]
|
230
|
+
assert_token_stream(expected, tokens)
|
231
|
+
end
|
232
|
+
|
233
|
+
def test_tokenize_word_with_escaped_quote
|
234
|
+
tokens = tokenize('meeting\\"room')
|
235
|
+
expected = [
|
236
|
+
{type: :word, value: 'meeting"room'},
|
237
|
+
{type: :eof}
|
238
|
+
]
|
239
|
+
assert_token_stream(expected, tokens)
|
240
|
+
end
|
241
|
+
|
242
|
+
def test_tokenize_word_with_escaped_backslash
|
243
|
+
tokens = tokenize('path\\\\to')
|
244
|
+
expected = [
|
245
|
+
{type: :word, value: 'path\\to'},
|
246
|
+
{type: :eof}
|
247
|
+
]
|
248
|
+
assert_token_stream(expected, tokens)
|
249
|
+
end
|
250
|
+
|
251
|
+
def test_tokenize_multiple_words_with_escapes
|
252
|
+
tokens = tokenize('meeting\\"room another\\\\word')
|
253
|
+
expected = [
|
254
|
+
{type: :word, value: 'meeting"room'},
|
255
|
+
{type: :word, value: 'another\\word'},
|
256
|
+
{type: :eof}
|
257
|
+
]
|
258
|
+
assert_token_stream(expected, tokens)
|
259
|
+
end
|
260
|
+
|
261
|
+
def test_tokenize_operator_value_with_escaped_quote
|
262
|
+
tokens = tokenize('subject:test\\"value')
|
263
|
+
expected = [
|
264
|
+
{type: :word, value: "subject"},
|
265
|
+
{type: :colon},
|
266
|
+
{type: :word, value: 'test"value'},
|
267
|
+
{type: :eof}
|
268
|
+
]
|
269
|
+
assert_token_stream(expected, tokens)
|
184
270
|
end
|
185
271
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gmail_search_syntax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- me@julik.nl
|
@@ -70,14 +70,20 @@ extensions: []
|
|
70
70
|
extra_rdoc_files: []
|
71
71
|
files:
|
72
72
|
- ARCHITECTURE.md
|
73
|
+
- GMAIL_BEHAVIOR_COMPARISON.md
|
74
|
+
- GMAIL_COMPATIBILITY_COMPLETE.md
|
75
|
+
- IMPLEMENTATION_NOTES.md
|
73
76
|
- README.md
|
74
77
|
- Rakefile
|
75
78
|
- SCHEMA.md
|
76
79
|
- examples/alias_collision_fix.rb
|
77
80
|
- examples/demo.rb
|
81
|
+
- examples/escaped_quotes_demo.rb
|
82
|
+
- examples/gmail_comparison_demo.rb
|
78
83
|
- examples/gmail_message_id_demo.rb
|
79
84
|
- examples/postgres_vs_sqlite.rb
|
80
85
|
- examples/sql_query.rb
|
86
|
+
- examples/text_vs_substring_demo.rb
|
81
87
|
- lib/GMAIL_SEARCH_OPERATORS.md
|
82
88
|
- lib/gmail_search_syntax.rb
|
83
89
|
- lib/gmail_search_syntax/ast.rb
|