gmail_search_syntax 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -244,9 +244,21 @@ class SqlVisitorTest < Minitest::Test
244
244
  def test_plain_text_search
245
245
  sql, params = parse_and_visit("meeting")
246
246
 
247
+ # Text nodes now use word boundary matching
248
+ assert_includes sql, "m0.subject = ?"
247
249
  assert_includes sql, "m0.subject LIKE ?"
250
+ assert_includes sql, "m0.body = ?"
248
251
  assert_includes sql, "m0.body LIKE ?"
249
- assert_equal ["meeting", "%meeting%", "%meeting%"], params
252
+ assert_equal ["meeting", "meeting %", "% meeting", "% meeting %", "meeting", "meeting %", "% meeting", "% meeting %"], params
253
+ end
254
+
255
+ def test_quoted_text_search_uses_substring
256
+ sql, params = parse_and_visit('"meeting"')
257
+
258
+ # Quoted strings create Substring nodes which use LIKE %value%
259
+ assert_includes sql, "m0.subject LIKE ?"
260
+ assert_includes sql, "m0.body LIKE ?"
261
+ assert_equal ["%meeting%", "%meeting%"], params
250
262
  end
251
263
 
252
264
  def test_complex_query
@@ -343,4 +355,35 @@ class SqlVisitorTest < Minitest::Test
343
355
  join_count = sql.scan("INNER JOIN message_addresses").length
344
356
  assert_equal 2, join_count
345
357
  end
358
+
359
+ def test_quoted_string_with_escaped_quotes
360
+ sql, params = parse_and_visit('"She said \\"hello\\" to me"')
361
+
362
+ assert_includes sql, "m0.subject LIKE ?"
363
+ assert_includes sql, "m0.body LIKE ?"
364
+ assert_equal ['%She said "hello" to me%', '%She said "hello" to me%'], params
365
+ end
366
+
367
+ def test_subject_with_escaped_quotes
368
+ sql, params = parse_and_visit('subject:"Meeting: \\"Q1 Review\\""')
369
+
370
+ assert_includes sql, "m0.subject LIKE ?"
371
+ assert_equal ['%Meeting: "Q1 Review"%'], params
372
+ end
373
+
374
+ def test_unquoted_token_with_escaped_quote
375
+ sql, params = parse_and_visit('meeting\\"room')
376
+
377
+ # Unquoted tokens use word boundary matching
378
+ assert_includes sql, "m0.subject = ?"
379
+ assert_includes sql, "m0.body = ?"
380
+ assert_equal ['meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %', 'meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %'], params
381
+ end
382
+
383
+ def test_operator_with_unquoted_escaped_quote
384
+ sql, params = parse_and_visit('subject:test\\"value')
385
+
386
+ assert_includes sql, "m0.subject LIKE ?"
387
+ assert_equal ['%test"value%'], params
388
+ end
346
389
  end
@@ -5,181 +5,267 @@ class TokenizerTest < Minitest::Test
5
5
  GmailSearchSyntax::Tokenizer.new(input).tokenize
6
6
  end
7
7
 
8
+ def assert_token_stream(expected_tokens, actual_tokens)
9
+ assert expected_tokens.length > 0
10
+ assert_equal expected_tokens.length, actual_tokens.length, "Expected #{expected_tokens.length} tokens, got #{actual_tokens.length}"
11
+
12
+ expected_tokens.each_with_index do |expected_token, index|
13
+ actual_token = actual_tokens[index]
14
+ expected_token.each do |property, expected_value|
15
+ actual_value = actual_token.public_send(property)
16
+ assert_equal expected_value, actual_value, "Token #{index} #{actual_token}: expected #{property} to be #{expected_value.inspect}, got #{actual_value.inspect}"
17
+ end
18
+ end
19
+ end
20
+
8
21
  def test_tokenize_simple_from
9
22
  tokens = tokenize("from:amy@example.com")
10
- assert_equal 4, tokens.length
11
- assert_equal :word, tokens[0].type
12
- assert_equal "from", tokens[0].value
13
- assert_equal :colon, tokens[1].type
14
- assert_equal :email, tokens[2].type
15
- assert_equal "amy@example.com", tokens[2].value
16
- assert_equal :eof, tokens[3].type
23
+ expected = [
24
+ {type: :word, value: "from"},
25
+ {type: :colon},
26
+ {type: :email, value: "amy@example.com"},
27
+ {type: :eof}
28
+ ]
29
+ assert_token_stream(expected, tokens)
17
30
  end
18
31
 
19
32
  def test_tokenize_quoted_string
20
33
  tokens = tokenize('"hello world"')
21
- assert_equal 2, tokens.length
22
- assert_equal :quoted_string, tokens[0].type
23
- assert_equal "hello world", tokens[0].value
34
+ expected = [
35
+ {type: :quoted_string, value: "hello world"},
36
+ {type: :eof}
37
+ ]
38
+ assert_token_stream(expected, tokens)
24
39
  end
25
40
 
26
41
  def test_tokenize_operators
27
42
  tokens = tokenize("from:amy@example.com OR to:bob@example.com")
28
-
29
- assert_equal 8, tokens.length
30
- assert_equal :word, tokens[0].type
31
- assert_equal "from", tokens[0].value
32
- assert_equal :colon, tokens[1].type
33
- assert_equal :email, tokens[2].type
34
- assert_equal "amy@example.com", tokens[2].value
35
- assert_equal :or, tokens[3].type
36
- assert_equal :word, tokens[4].type
37
- assert_equal "to", tokens[4].value
38
- assert_equal :colon, tokens[5].type
39
- assert_equal :email, tokens[6].type
40
- assert_equal "bob@example.com", tokens[6].value
41
- assert_equal :eof, tokens[7].type
43
+ expected = [
44
+ {type: :word, value: "from"},
45
+ {type: :colon},
46
+ {type: :email, value: "amy@example.com"},
47
+ {type: :or},
48
+ {type: :word, value: "to"},
49
+ {type: :colon},
50
+ {type: :email, value: "bob@example.com"},
51
+ {type: :eof}
52
+ ]
53
+ assert_token_stream(expected, tokens)
42
54
  end
43
55
 
44
56
  def test_tokenize_parentheses
45
57
  tokens = tokenize("subject:(meeting call)")
46
-
47
- assert_equal 7, tokens.length
48
- assert_equal :word, tokens[0].type
49
- assert_equal "subject", tokens[0].value
50
- assert_equal :colon, tokens[1].type
51
- assert_equal :lparen, tokens[2].type
52
- assert_equal :word, tokens[3].type
53
- assert_equal "meeting", tokens[3].value
54
- assert_equal :word, tokens[4].type
55
- assert_equal "call", tokens[4].value
56
- assert_equal :rparen, tokens[5].type
57
- assert_equal :eof, tokens[6].type
58
+ expected = [
59
+ {type: :word, value: "subject"},
60
+ {type: :colon},
61
+ {type: :lparen},
62
+ {type: :word, value: "meeting"},
63
+ {type: :word, value: "call"},
64
+ {type: :rparen},
65
+ {type: :eof}
66
+ ]
67
+ assert_token_stream(expected, tokens)
58
68
  end
59
69
 
60
70
  def test_tokenize_braces
61
71
  tokens = tokenize("{from:a from:b}")
62
-
63
- assert_equal 9, tokens.length
64
- assert_equal :lbrace, tokens[0].type
65
- assert_equal :word, tokens[1].type
66
- assert_equal "from", tokens[1].value
67
- assert_equal :colon, tokens[2].type
68
- assert_equal :word, tokens[3].type
69
- assert_equal "a", tokens[3].value
70
- assert_equal :word, tokens[4].type
71
- assert_equal "from", tokens[4].value
72
- assert_equal :colon, tokens[5].type
73
- assert_equal :word, tokens[6].type
74
- assert_equal "b", tokens[6].value
75
- assert_equal :rbrace, tokens[7].type
76
- assert_equal :eof, tokens[8].type
72
+ expected = [
73
+ {type: :lbrace},
74
+ {type: :word, value: "from"},
75
+ {type: :colon},
76
+ {type: :word, value: "a"},
77
+ {type: :word, value: "from"},
78
+ {type: :colon},
79
+ {type: :word, value: "b"},
80
+ {type: :rbrace},
81
+ {type: :eof}
82
+ ]
83
+ assert_token_stream(expected, tokens)
77
84
  end
78
85
 
79
86
  def test_tokenize_negation
80
87
  tokens = tokenize("dinner -movie")
81
- assert_equal 4, tokens.length
82
- assert_equal :word, tokens[0].type
83
- assert_equal "dinner", tokens[0].value
84
- assert_equal :minus, tokens[1].type
85
- assert_equal :word, tokens[2].type
86
- assert_equal "movie", tokens[2].value
88
+ expected = [
89
+ {type: :word, value: "dinner"},
90
+ {type: :minus},
91
+ {type: :word, value: "movie"},
92
+ {type: :eof}
93
+ ]
94
+ assert_token_stream(expected, tokens)
87
95
  end
88
96
 
89
97
  def test_tokenize_around
90
98
  tokens = tokenize("holiday AROUND 10 vacation")
91
-
92
- assert_equal 5, tokens.length
93
- assert_equal :word, tokens[0].type
94
- assert_equal "holiday", tokens[0].value
95
- assert_equal :around, tokens[1].type
96
- assert_equal :number, tokens[2].type
97
- assert_equal 10, tokens[2].value
98
- assert_equal :word, tokens[3].type
99
- assert_equal "vacation", tokens[3].value
100
- assert_equal :eof, tokens[4].type
99
+ expected = [
100
+ {type: :word, value: "holiday"},
101
+ {type: :around},
102
+ {type: :number, value: 10},
103
+ {type: :word, value: "vacation"},
104
+ {type: :eof}
105
+ ]
106
+ assert_token_stream(expected, tokens)
101
107
  end
102
108
 
103
109
  def test_tokenize_date
104
110
  tokens = tokenize("after:2004/04/16")
105
- assert_equal 4, tokens.length
106
- assert_equal :word, tokens[0].type
107
- assert_equal "after", tokens[0].value
108
- assert_equal :colon, tokens[1].type
109
- assert_equal :date, tokens[2].type
110
- assert_equal "2004/04/16", tokens[2].value
111
+ expected = [
112
+ {type: :word, value: "after"},
113
+ {type: :colon},
114
+ {type: :date, value: "2004/04/16"},
115
+ {type: :eof}
116
+ ]
117
+ assert_token_stream(expected, tokens)
111
118
  end
112
119
 
113
120
  def test_tokenize_relative_time
114
121
  tokens = tokenize("older_than:1y")
115
- assert_equal 4, tokens.length
116
- assert_equal :word, tokens[0].type
117
- assert_equal "older_than", tokens[0].value
118
- assert_equal :colon, tokens[1].type
119
- assert_equal :relative_time, tokens[2].type
120
- assert_equal "1y", tokens[2].value
122
+ expected = [
123
+ {type: :word, value: "older_than"},
124
+ {type: :colon},
125
+ {type: :relative_time, value: "1y"},
126
+ {type: :eof}
127
+ ]
128
+ assert_token_stream(expected, tokens)
121
129
  end
122
130
 
123
131
  def test_tokenize_number
124
132
  tokens = tokenize("size:1000000")
125
- assert_equal 4, tokens.length
126
- assert_equal :word, tokens[0].type
127
- assert_equal "size", tokens[0].value
128
- assert_equal :colon, tokens[1].type
129
- assert_equal :number, tokens[2].type
130
- assert_equal 1000000, tokens[2].value
133
+ expected = [
134
+ {type: :word, value: "size"},
135
+ {type: :colon},
136
+ {type: :number, value: 1000000},
137
+ {type: :eof}
138
+ ]
139
+ assert_token_stream(expected, tokens)
131
140
  end
132
141
 
133
142
  def test_tokenize_and_operator
134
143
  tokens = tokenize("from:amy@example.com AND to:bob@example.com")
135
-
136
- and_token = tokens.find { |t| t.type == :and }
137
- refute_nil and_token
138
- assert_equal "AND", and_token.value
144
+ expected = [
145
+ {type: :word, value: "from"},
146
+ {type: :colon},
147
+ {type: :email, value: "amy@example.com"},
148
+ {type: :and, value: "AND"},
149
+ {type: :word, value: "to"},
150
+ {type: :colon},
151
+ {type: :email, value: "bob@example.com"},
152
+ {type: :eof}
153
+ ]
154
+ assert_token_stream(expected, tokens)
139
155
  end
140
156
 
141
157
  def test_tokenize_plus
142
158
  tokens = tokenize("+unicorn")
143
- assert_equal 3, tokens.length
144
- assert_equal :plus, tokens[0].type
145
- assert_equal :word, tokens[1].type
146
- assert_equal "unicorn", tokens[1].value
159
+ expected = [
160
+ {type: :plus},
161
+ {type: :word, value: "unicorn"},
162
+ {type: :eof}
163
+ ]
164
+ assert_token_stream(expected, tokens)
147
165
  end
148
166
 
149
167
  def test_tokenize_complex_query
150
168
  tokens = tokenize('from:boss@example.com subject:"urgent meeting" has:attachment')
151
-
152
- assert_equal 10, tokens.length
153
- assert_equal :word, tokens[0].type
154
- assert_equal "from", tokens[0].value
155
- assert_equal :colon, tokens[1].type
156
- assert_equal :email, tokens[2].type
157
- assert_equal "boss@example.com", tokens[2].value
158
- assert_equal :word, tokens[3].type
159
- assert_equal "subject", tokens[3].value
160
- assert_equal :colon, tokens[4].type
161
- assert_equal :quoted_string, tokens[5].type
162
- assert_equal "urgent meeting", tokens[5].value
163
- assert_equal :word, tokens[6].type
164
- assert_equal "has", tokens[6].value
165
- assert_equal :colon, tokens[7].type
166
- assert_equal :word, tokens[8].type
167
- assert_equal "attachment", tokens[8].value
168
- assert_equal :eof, tokens[9].type
169
+ expected = [
170
+ {type: :word, value: "from"},
171
+ {type: :colon},
172
+ {type: :email, value: "boss@example.com"},
173
+ {type: :word, value: "subject"},
174
+ {type: :colon},
175
+ {type: :quoted_string, value: "urgent meeting"},
176
+ {type: :word, value: "has"},
177
+ {type: :colon},
178
+ {type: :word, value: "attachment"},
179
+ {type: :eof}
180
+ ]
181
+ assert_token_stream(expected, tokens)
169
182
  end
170
183
 
171
184
  def test_tokenize_email_with_plus
172
185
  tokens = tokenize("to:user+tag@example.com")
173
- email_token = tokens.find { |t| t.type == :email }
174
- assert_equal "user+tag@example.com", email_token.value
186
+ expected = [
187
+ {type: :word, value: "to"},
188
+ {type: :colon},
189
+ {type: :email, value: "user+tag@example.com"},
190
+ {type: :eof}
191
+ ]
192
+ assert_token_stream(expected, tokens)
175
193
  end
176
194
 
177
195
  def test_tokenize_multiple_words
178
196
  tokens = tokenize("project report meeting")
179
- word_tokens = tokens.select { |t| t.type == :word }
180
- assert_equal 3, word_tokens.length
181
- assert_equal "project", word_tokens[0].value
182
- assert_equal "report", word_tokens[1].value
183
- assert_equal "meeting", word_tokens[2].value
197
+ expected = [
198
+ {type: :word, value: "project"},
199
+ {type: :word, value: "report"},
200
+ {type: :word, value: "meeting"},
201
+ {type: :eof}
202
+ ]
203
+ assert_token_stream(expected, tokens)
204
+ end
205
+
206
+ def test_tokenize_quoted_string_with_escaped_quote
207
+ tokens = tokenize('"She said \\"hello\\" to me"')
208
+ expected = [
209
+ {type: :quoted_string, value: 'She said "hello" to me'},
210
+ {type: :eof}
211
+ ]
212
+ assert_token_stream(expected, tokens)
213
+ end
214
+
215
+ def test_tokenize_quoted_string_with_escaped_backslash
216
+ tokens = tokenize('"path\\\\to\\\\file"')
217
+ expected = [
218
+ {type: :quoted_string, value: 'path\\to\\file'},
219
+ {type: :eof}
220
+ ]
221
+ assert_token_stream(expected, tokens)
222
+ end
223
+
224
+ def test_tokenize_quoted_string_with_multiple_escapes
225
+ tokens = tokenize('"test \\"nested\\" and \\\\ slash"')
226
+ expected = [
227
+ {type: :quoted_string, value: 'test "nested" and \\ slash'},
228
+ {type: :eof}
229
+ ]
230
+ assert_token_stream(expected, tokens)
231
+ end
232
+
233
+ def test_tokenize_word_with_escaped_quote
234
+ tokens = tokenize('meeting\\"room')
235
+ expected = [
236
+ {type: :word, value: 'meeting"room'},
237
+ {type: :eof}
238
+ ]
239
+ assert_token_stream(expected, tokens)
240
+ end
241
+
242
+ def test_tokenize_word_with_escaped_backslash
243
+ tokens = tokenize('path\\\\to')
244
+ expected = [
245
+ {type: :word, value: 'path\\to'},
246
+ {type: :eof}
247
+ ]
248
+ assert_token_stream(expected, tokens)
249
+ end
250
+
251
+ def test_tokenize_multiple_words_with_escapes
252
+ tokens = tokenize('meeting\\"room another\\\\word')
253
+ expected = [
254
+ {type: :word, value: 'meeting"room'},
255
+ {type: :word, value: 'another\\word'},
256
+ {type: :eof}
257
+ ]
258
+ assert_token_stream(expected, tokens)
259
+ end
260
+
261
+ def test_tokenize_operator_value_with_escaped_quote
262
+ tokens = tokenize('subject:test\\"value')
263
+ expected = [
264
+ {type: :word, value: "subject"},
265
+ {type: :colon},
266
+ {type: :word, value: 'test"value'},
267
+ {type: :eof}
268
+ ]
269
+ assert_token_stream(expected, tokens)
184
270
  end
185
271
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gmail_search_syntax
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - me@julik.nl
@@ -70,14 +70,20 @@ extensions: []
70
70
  extra_rdoc_files: []
71
71
  files:
72
72
  - ARCHITECTURE.md
73
+ - GMAIL_BEHAVIOR_COMPARISON.md
74
+ - GMAIL_COMPATIBILITY_COMPLETE.md
75
+ - IMPLEMENTATION_NOTES.md
73
76
  - README.md
74
77
  - Rakefile
75
78
  - SCHEMA.md
76
79
  - examples/alias_collision_fix.rb
77
80
  - examples/demo.rb
81
+ - examples/escaped_quotes_demo.rb
82
+ - examples/gmail_comparison_demo.rb
78
83
  - examples/gmail_message_id_demo.rb
79
84
  - examples/postgres_vs_sqlite.rb
80
85
  - examples/sql_query.rb
86
+ - examples/text_vs_substring_demo.rb
81
87
  - lib/GMAIL_SEARCH_OPERATORS.md
82
88
  - lib/gmail_search_syntax.rb
83
89
  - lib/gmail_search_syntax/ast.rb