gmail_search_syntax 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/GMAIL_BEHAVIOR_COMPARISON.md +166 -0
- data/GMAIL_COMPATIBILITY_COMPLETE.md +236 -0
- data/examples/gmail_comparison_demo.rb +82 -0
- data/lib/gmail_search_syntax/parser.rb +42 -24
- data/lib/gmail_search_syntax/tokenizer.rb +3 -3
- data/lib/gmail_search_syntax/version.rb +1 -1
- data/test/gmail_search_syntax_test.rb +185 -171
- data/test/tokenizer_test.rb +176 -144
- metadata +4 -1
data/test/tokenizer_test.rb
CHANGED
@@ -5,235 +5,267 @@ class TokenizerTest < Minitest::Test
|
|
5
5
|
GmailSearchSyntax::Tokenizer.new(input).tokenize
|
6
6
|
end
|
7
7
|
|
8
|
+
def assert_token_stream(expected_tokens, actual_tokens)
|
9
|
+
assert expected_tokens.length > 0
|
10
|
+
assert_equal expected_tokens.length, actual_tokens.length, "Expected #{expected_tokens.length} tokens, got #{actual_tokens.length}"
|
11
|
+
|
12
|
+
expected_tokens.each_with_index do |expected_token, index|
|
13
|
+
actual_token = actual_tokens[index]
|
14
|
+
expected_token.each do |property, expected_value|
|
15
|
+
actual_value = actual_token.public_send(property)
|
16
|
+
assert_equal expected_value, actual_value, "Token #{index} #{actual_token}: expected #{property} to be #{expected_value.inspect}, got #{actual_value.inspect}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
8
21
|
def test_tokenize_simple_from
|
9
22
|
tokens = tokenize("from:amy@example.com")
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
23
|
+
expected = [
|
24
|
+
{type: :word, value: "from"},
|
25
|
+
{type: :colon},
|
26
|
+
{type: :email, value: "amy@example.com"},
|
27
|
+
{type: :eof}
|
28
|
+
]
|
29
|
+
assert_token_stream(expected, tokens)
|
17
30
|
end
|
18
31
|
|
19
32
|
def test_tokenize_quoted_string
|
20
33
|
tokens = tokenize('"hello world"')
|
21
|
-
|
22
|
-
|
23
|
-
|
34
|
+
expected = [
|
35
|
+
{type: :quoted_string, value: "hello world"},
|
36
|
+
{type: :eof}
|
37
|
+
]
|
38
|
+
assert_token_stream(expected, tokens)
|
24
39
|
end
|
25
40
|
|
26
41
|
def test_tokenize_operators
|
27
42
|
tokens = tokenize("from:amy@example.com OR to:bob@example.com")
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
assert_equal :email, tokens[6].type
|
40
|
-
assert_equal "bob@example.com", tokens[6].value
|
41
|
-
assert_equal :eof, tokens[7].type
|
43
|
+
expected = [
|
44
|
+
{type: :word, value: "from"},
|
45
|
+
{type: :colon},
|
46
|
+
{type: :email, value: "amy@example.com"},
|
47
|
+
{type: :or},
|
48
|
+
{type: :word, value: "to"},
|
49
|
+
{type: :colon},
|
50
|
+
{type: :email, value: "bob@example.com"},
|
51
|
+
{type: :eof}
|
52
|
+
]
|
53
|
+
assert_token_stream(expected, tokens)
|
42
54
|
end
|
43
55
|
|
44
56
|
def test_tokenize_parentheses
|
45
57
|
tokens = tokenize("subject:(meeting call)")
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
assert_equal :rparen, tokens[5].type
|
57
|
-
assert_equal :eof, tokens[6].type
|
58
|
+
expected = [
|
59
|
+
{type: :word, value: "subject"},
|
60
|
+
{type: :colon},
|
61
|
+
{type: :lparen},
|
62
|
+
{type: :word, value: "meeting"},
|
63
|
+
{type: :word, value: "call"},
|
64
|
+
{type: :rparen},
|
65
|
+
{type: :eof}
|
66
|
+
]
|
67
|
+
assert_token_stream(expected, tokens)
|
58
68
|
end
|
59
69
|
|
60
70
|
def test_tokenize_braces
|
61
71
|
tokens = tokenize("{from:a from:b}")
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
assert_equal "b", tokens[6].value
|
75
|
-
assert_equal :rbrace, tokens[7].type
|
76
|
-
assert_equal :eof, tokens[8].type
|
72
|
+
expected = [
|
73
|
+
{type: :lbrace},
|
74
|
+
{type: :word, value: "from"},
|
75
|
+
{type: :colon},
|
76
|
+
{type: :word, value: "a"},
|
77
|
+
{type: :word, value: "from"},
|
78
|
+
{type: :colon},
|
79
|
+
{type: :word, value: "b"},
|
80
|
+
{type: :rbrace},
|
81
|
+
{type: :eof}
|
82
|
+
]
|
83
|
+
assert_token_stream(expected, tokens)
|
77
84
|
end
|
78
85
|
|
79
86
|
def test_tokenize_negation
|
80
87
|
tokens = tokenize("dinner -movie")
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
88
|
+
expected = [
|
89
|
+
{type: :word, value: "dinner"},
|
90
|
+
{type: :minus},
|
91
|
+
{type: :word, value: "movie"},
|
92
|
+
{type: :eof}
|
93
|
+
]
|
94
|
+
assert_token_stream(expected, tokens)
|
87
95
|
end
|
88
96
|
|
89
97
|
def test_tokenize_around
|
90
98
|
tokens = tokenize("holiday AROUND 10 vacation")
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
assert_equal "vacation", tokens[3].value
|
100
|
-
assert_equal :eof, tokens[4].type
|
99
|
+
expected = [
|
100
|
+
{type: :word, value: "holiday"},
|
101
|
+
{type: :around},
|
102
|
+
{type: :number, value: 10},
|
103
|
+
{type: :word, value: "vacation"},
|
104
|
+
{type: :eof}
|
105
|
+
]
|
106
|
+
assert_token_stream(expected, tokens)
|
101
107
|
end
|
102
108
|
|
103
109
|
def test_tokenize_date
|
104
110
|
tokens = tokenize("after:2004/04/16")
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
+
expected = [
|
112
|
+
{type: :word, value: "after"},
|
113
|
+
{type: :colon},
|
114
|
+
{type: :date, value: "2004/04/16"},
|
115
|
+
{type: :eof}
|
116
|
+
]
|
117
|
+
assert_token_stream(expected, tokens)
|
111
118
|
end
|
112
119
|
|
113
120
|
def test_tokenize_relative_time
|
114
121
|
tokens = tokenize("older_than:1y")
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
122
|
+
expected = [
|
123
|
+
{type: :word, value: "older_than"},
|
124
|
+
{type: :colon},
|
125
|
+
{type: :relative_time, value: "1y"},
|
126
|
+
{type: :eof}
|
127
|
+
]
|
128
|
+
assert_token_stream(expected, tokens)
|
121
129
|
end
|
122
130
|
|
123
131
|
def test_tokenize_number
|
124
132
|
tokens = tokenize("size:1000000")
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
133
|
+
expected = [
|
134
|
+
{type: :word, value: "size"},
|
135
|
+
{type: :colon},
|
136
|
+
{type: :number, value: 1000000},
|
137
|
+
{type: :eof}
|
138
|
+
]
|
139
|
+
assert_token_stream(expected, tokens)
|
131
140
|
end
|
132
141
|
|
133
142
|
def test_tokenize_and_operator
|
134
143
|
tokens = tokenize("from:amy@example.com AND to:bob@example.com")
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
144
|
+
expected = [
|
145
|
+
{type: :word, value: "from"},
|
146
|
+
{type: :colon},
|
147
|
+
{type: :email, value: "amy@example.com"},
|
148
|
+
{type: :and, value: "AND"},
|
149
|
+
{type: :word, value: "to"},
|
150
|
+
{type: :colon},
|
151
|
+
{type: :email, value: "bob@example.com"},
|
152
|
+
{type: :eof}
|
153
|
+
]
|
154
|
+
assert_token_stream(expected, tokens)
|
139
155
|
end
|
140
156
|
|
141
157
|
def test_tokenize_plus
|
142
158
|
tokens = tokenize("+unicorn")
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
159
|
+
expected = [
|
160
|
+
{type: :plus},
|
161
|
+
{type: :word, value: "unicorn"},
|
162
|
+
{type: :eof}
|
163
|
+
]
|
164
|
+
assert_token_stream(expected, tokens)
|
147
165
|
end
|
148
166
|
|
149
167
|
def test_tokenize_complex_query
|
150
168
|
tokens = tokenize('from:boss@example.com subject:"urgent meeting" has:attachment')
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
assert_equal "has", tokens[6].value
|
165
|
-
assert_equal :colon, tokens[7].type
|
166
|
-
assert_equal :word, tokens[8].type
|
167
|
-
assert_equal "attachment", tokens[8].value
|
168
|
-
assert_equal :eof, tokens[9].type
|
169
|
+
expected = [
|
170
|
+
{type: :word, value: "from"},
|
171
|
+
{type: :colon},
|
172
|
+
{type: :email, value: "boss@example.com"},
|
173
|
+
{type: :word, value: "subject"},
|
174
|
+
{type: :colon},
|
175
|
+
{type: :quoted_string, value: "urgent meeting"},
|
176
|
+
{type: :word, value: "has"},
|
177
|
+
{type: :colon},
|
178
|
+
{type: :word, value: "attachment"},
|
179
|
+
{type: :eof}
|
180
|
+
]
|
181
|
+
assert_token_stream(expected, tokens)
|
169
182
|
end
|
170
183
|
|
171
184
|
def test_tokenize_email_with_plus
|
172
185
|
tokens = tokenize("to:user+tag@example.com")
|
173
|
-
|
174
|
-
|
186
|
+
expected = [
|
187
|
+
{type: :word, value: "to"},
|
188
|
+
{type: :colon},
|
189
|
+
{type: :email, value: "user+tag@example.com"},
|
190
|
+
{type: :eof}
|
191
|
+
]
|
192
|
+
assert_token_stream(expected, tokens)
|
175
193
|
end
|
176
194
|
|
177
195
|
def test_tokenize_multiple_words
|
178
196
|
tokens = tokenize("project report meeting")
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
197
|
+
expected = [
|
198
|
+
{type: :word, value: "project"},
|
199
|
+
{type: :word, value: "report"},
|
200
|
+
{type: :word, value: "meeting"},
|
201
|
+
{type: :eof}
|
202
|
+
]
|
203
|
+
assert_token_stream(expected, tokens)
|
184
204
|
end
|
185
205
|
|
186
206
|
def test_tokenize_quoted_string_with_escaped_quote
|
187
207
|
tokens = tokenize('"She said \\"hello\\" to me"')
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
208
|
+
expected = [
|
209
|
+
{type: :quoted_string, value: 'She said "hello" to me'},
|
210
|
+
{type: :eof}
|
211
|
+
]
|
212
|
+
assert_token_stream(expected, tokens)
|
192
213
|
end
|
193
214
|
|
194
215
|
def test_tokenize_quoted_string_with_escaped_backslash
|
195
216
|
tokens = tokenize('"path\\\\to\\\\file"')
|
196
|
-
|
197
|
-
|
198
|
-
|
217
|
+
expected = [
|
218
|
+
{type: :quoted_string, value: 'path\\to\\file'},
|
219
|
+
{type: :eof}
|
220
|
+
]
|
221
|
+
assert_token_stream(expected, tokens)
|
199
222
|
end
|
200
223
|
|
201
224
|
def test_tokenize_quoted_string_with_multiple_escapes
|
202
225
|
tokens = tokenize('"test \\"nested\\" and \\\\ slash"')
|
203
|
-
|
204
|
-
|
205
|
-
|
226
|
+
expected = [
|
227
|
+
{type: :quoted_string, value: 'test "nested" and \\ slash'},
|
228
|
+
{type: :eof}
|
229
|
+
]
|
230
|
+
assert_token_stream(expected, tokens)
|
206
231
|
end
|
207
232
|
|
208
233
|
def test_tokenize_word_with_escaped_quote
|
209
234
|
tokens = tokenize('meeting\\"room')
|
210
|
-
|
211
|
-
|
212
|
-
|
235
|
+
expected = [
|
236
|
+
{type: :word, value: 'meeting"room'},
|
237
|
+
{type: :eof}
|
238
|
+
]
|
239
|
+
assert_token_stream(expected, tokens)
|
213
240
|
end
|
214
241
|
|
215
242
|
def test_tokenize_word_with_escaped_backslash
|
216
243
|
tokens = tokenize('path\\\\to')
|
217
|
-
|
218
|
-
|
219
|
-
|
244
|
+
expected = [
|
245
|
+
{type: :word, value: 'path\\to'},
|
246
|
+
{type: :eof}
|
247
|
+
]
|
248
|
+
assert_token_stream(expected, tokens)
|
220
249
|
end
|
221
250
|
|
222
251
|
def test_tokenize_multiple_words_with_escapes
|
223
252
|
tokens = tokenize('meeting\\"room another\\\\word')
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
253
|
+
expected = [
|
254
|
+
{type: :word, value: 'meeting"room'},
|
255
|
+
{type: :word, value: 'another\\word'},
|
256
|
+
{type: :eof}
|
257
|
+
]
|
258
|
+
assert_token_stream(expected, tokens)
|
228
259
|
end
|
229
260
|
|
230
261
|
def test_tokenize_operator_value_with_escaped_quote
|
231
262
|
tokens = tokenize('subject:test\\"value')
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
263
|
+
expected = [
|
264
|
+
{type: :word, value: "subject"},
|
265
|
+
{type: :colon},
|
266
|
+
{type: :word, value: 'test"value'},
|
267
|
+
{type: :eof}
|
268
|
+
]
|
269
|
+
assert_token_stream(expected, tokens)
|
238
270
|
end
|
239
271
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gmail_search_syntax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- me@julik.nl
|
@@ -70,6 +70,8 @@ extensions: []
|
|
70
70
|
extra_rdoc_files: []
|
71
71
|
files:
|
72
72
|
- ARCHITECTURE.md
|
73
|
+
- GMAIL_BEHAVIOR_COMPARISON.md
|
74
|
+
- GMAIL_COMPATIBILITY_COMPLETE.md
|
73
75
|
- IMPLEMENTATION_NOTES.md
|
74
76
|
- README.md
|
75
77
|
- Rakefile
|
@@ -77,6 +79,7 @@ files:
|
|
77
79
|
- examples/alias_collision_fix.rb
|
78
80
|
- examples/demo.rb
|
79
81
|
- examples/escaped_quotes_demo.rb
|
82
|
+
- examples/gmail_comparison_demo.rb
|
80
83
|
- examples/gmail_message_id_demo.rb
|
81
84
|
- examples/postgres_vs_sqlite.rb
|
82
85
|
- examples/sql_query.rb
|