gmail_search_syntax 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/IMPLEMENTATION_NOTES.md +174 -0
- data/README.md +2 -2
- data/examples/escaped_quotes_demo.rb +152 -0
- data/examples/text_vs_substring_demo.rb +93 -0
- data/lib/gmail_search_syntax/ast.rb +14 -2
- data/lib/gmail_search_syntax/parser.rb +3 -3
- data/lib/gmail_search_syntax/sql_visitor.rb +22 -5
- data/lib/gmail_search_syntax/tokenizer.rb +44 -9
- data/lib/gmail_search_syntax/version.rb +1 -1
- data/test/gmail_search_syntax_test.rb +76 -30
- data/test/sql_visitor_test.rb +44 -1
- data/test/tokenizer_test.rb +54 -0
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60c2512ef5571bfb496971d202b28148946bdc60355f96d2048151cfb12956d4
|
4
|
+
data.tar.gz: 6aac7ff084afb11ff543fc87c6d78dd8efd702ae69ca5a9c6a63348239743134
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2ff3554682a4a877d289aa11970bc77c2cde13bc532e485b693e52fe408482372079ab8119e10ace3234097410128b4ea48dddb24a317cc2894f32ed0f34f7d
|
7
|
+
data.tar.gz: 64dd03a936bcfae224b8ee54ee9d462901daaf7fcc9edecc86e760494ec11fb9c0154d5f7c8203b01772d8e423851d2df79ac03707ffaa50d70bcb8eef57f53a
|
@@ -0,0 +1,174 @@
|
|
1
|
+
# Implementation Notes: StringToken vs Substring Nodes
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
This implementation distinguishes between **word boundary matching** (unquoted text) and **substring matching** (quoted text) in the Gmail search syntax parser.
|
6
|
+
|
7
|
+
## Changes Made
|
8
|
+
|
9
|
+
### 1. Renamed and New AST Nodes
|
10
|
+
|
11
|
+
- **Renamed** `Text` to `StringToken` for clarity - represents unquoted text tokens
|
12
|
+
- **Added** `Substring` node to the AST (`lib/gmail_search_syntax/ast.rb`) that represents quoted strings.
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
class Substring < Node
|
16
|
+
attr_reader :value
|
17
|
+
|
18
|
+
def initialize(value)
|
19
|
+
@value = value
|
20
|
+
end
|
21
|
+
|
22
|
+
def inspect
|
23
|
+
"#<Substring #{@value.inspect}>"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
```
|
27
|
+
|
28
|
+
### 2. Parser Updates
|
29
|
+
|
30
|
+
Modified the parser (`lib/gmail_search_syntax/parser.rb`) to create:
|
31
|
+
- `StringToken` nodes for unquoted text
|
32
|
+
- `Substring` nodes for quoted strings (`:quoted_string` tokens)
|
33
|
+
|
34
|
+
### 3. SQL Visitor Updates
|
35
|
+
|
36
|
+
Updated the SQL visitor (`lib/gmail_search_syntax/sql_visitor.rb`) with two different behaviors:
|
37
|
+
|
38
|
+
#### StringToken Node (Word Boundary Matching)
|
39
|
+
```ruby
|
40
|
+
def visit_string_token(node)
|
41
|
+
# Matches complete words only
|
42
|
+
# Uses: = exact, LIKE "value %", LIKE "% value", LIKE "% value %"
|
43
|
+
end
|
44
|
+
```
|
45
|
+
|
46
|
+
SQL Pattern:
|
47
|
+
```sql
|
48
|
+
(m0.subject = ? OR m0.subject LIKE ? OR m0.subject LIKE ? OR m0.subject LIKE ?)
|
49
|
+
OR
|
50
|
+
(m0.body = ? OR m0.body LIKE ? OR m0.body LIKE ? OR m0.body LIKE ?)
|
51
|
+
```
|
52
|
+
|
53
|
+
Parameters: `["meeting", "meeting %", "% meeting", "% meeting %", ...]`
|
54
|
+
|
55
|
+
**Matches:** "meeting tomorrow", "the meeting", "just meeting"
|
56
|
+
**Does NOT match:** "meetings", "premeeting", "meetingroom"
|
57
|
+
|
58
|
+
#### Substring Node (Partial Matching)
|
59
|
+
```ruby
|
60
|
+
def visit_substring(node)
|
61
|
+
# Matches anywhere in the text
|
62
|
+
# Uses: LIKE "%value%"
|
63
|
+
end
|
64
|
+
```
|
65
|
+
|
66
|
+
SQL Pattern:
|
67
|
+
```sql
|
68
|
+
(m0.subject LIKE ? OR m0.body LIKE ?)
|
69
|
+
```
|
70
|
+
|
71
|
+
Parameters: `["%meeting%", "%meeting%"]`
|
72
|
+
|
73
|
+
**Matches:** "meeting", "meetings", "premeeting", "meetingroom"
|
74
|
+
|
75
|
+
## Examples
|
76
|
+
|
77
|
+
### Unquoted (Word Boundary)
|
78
|
+
```ruby
|
79
|
+
GmailSearchSyntax.parse!("meeting")
|
80
|
+
# => #<StringToken "meeting">
|
81
|
+
# SQL: ... WHERE m0.subject = ? OR m0.subject LIKE ? OR ...
|
82
|
+
```
|
83
|
+
|
84
|
+
### Quoted (Substring)
|
85
|
+
```ruby
|
86
|
+
GmailSearchSyntax.parse!('"meeting"')
|
87
|
+
# => #<Substring "meeting">
|
88
|
+
# SQL: ... WHERE m0.subject LIKE ? OR m0.body LIKE ?
|
89
|
+
```
|
90
|
+
|
91
|
+
### Combined
|
92
|
+
```ruby
|
93
|
+
GmailSearchSyntax.parse!('urgent "q1 report"')
|
94
|
+
# => #<And #<StringToken "urgent"> AND #<Substring "q1 report">>
|
95
|
+
```
|
96
|
+
|
97
|
+
## Rationale
|
98
|
+
|
99
|
+
This implementation provides:
|
100
|
+
|
101
|
+
1. **More precise searching** - Unquoted text matches complete words/tokens, avoiding false positives from partial matches
|
102
|
+
2. **Flexible substring search** - Quoted text still allows finding substrings when needed
|
103
|
+
3. **Gmail-like behavior** - Aligns with user expectations from Gmail's search syntax
|
104
|
+
4. **SQL efficiency** - Word boundary matching is more specific than substring matching
|
105
|
+
|
106
|
+
## Escape Sequences
|
107
|
+
|
108
|
+
Both `StringToken` and `Substring` nodes support escape sequences in **both quoted and unquoted tokens**:
|
109
|
+
|
110
|
+
### Supported Escapes
|
111
|
+
|
112
|
+
- `\"` - Literal double quote
|
113
|
+
- `\\` - Literal backslash
|
114
|
+
- Other escape sequences (e.g., `\n`, `\t`) are preserved as-is (backslash + character)
|
115
|
+
|
116
|
+
### Examples
|
117
|
+
|
118
|
+
**Quoted Strings (Substring nodes):**
|
119
|
+
```ruby
|
120
|
+
# Escaped quotes in quoted string
|
121
|
+
'"She said \\"hello\\" to me"'
|
122
|
+
# => #<Substring 'She said "hello" to me'>
|
123
|
+
|
124
|
+
# Escaped backslashes in quoted string
|
125
|
+
'"path\\\\to\\\\file"'
|
126
|
+
# => #<Substring 'path\\to\\file'>
|
127
|
+
|
128
|
+
# In operator values with quoted strings
|
129
|
+
'subject:"Meeting: \\"Q1 Review\\""'
|
130
|
+
# => #<Operator subject: 'Meeting: "Q1 Review"'>
|
131
|
+
```
|
132
|
+
|
133
|
+
**Unquoted Tokens (StringToken nodes):**
|
134
|
+
```ruby
|
135
|
+
# Escaped quotes in unquoted token
|
136
|
+
'meeting\\"room'
|
137
|
+
# => #<StringToken 'meeting"room'>
|
138
|
+
|
139
|
+
# Escaped backslashes in unquoted token
|
140
|
+
'path\\\\to\\\\file'
|
141
|
+
# => #<StringToken 'path\\to\\file'>
|
142
|
+
|
143
|
+
# In operator values with unquoted tokens
|
144
|
+
'subject:test\\"value'
|
145
|
+
# => #<Operator subject: 'test"value'>
|
146
|
+
```
|
147
|
+
|
148
|
+
This allows you to include literal quotes and backslashes in any token, whether quoted or unquoted.
|
149
|
+
|
150
|
+
## Testing
|
151
|
+
|
152
|
+
All tests pass with comprehensive coverage:
|
153
|
+
- Basic functionality tests
|
154
|
+
- Escape sequence tests in tokenizer
|
155
|
+
- Integration tests for parsing with escaped quotes
|
156
|
+
- SQL generation tests with escaped quotes
|
157
|
+
|
158
|
+
New tests added:
|
159
|
+
- `test_quoted_text_search_uses_substring` in `test/sql_visitor_test.rb`
|
160
|
+
- `test_tokenize_quoted_string_with_escaped_quote` in `test/tokenizer_test.rb`
|
161
|
+
- `test_tokenize_quoted_string_with_escaped_backslash` in `test/tokenizer_test.rb`
|
162
|
+
- `test_tokenize_word_with_escaped_quote` in `test/tokenizer_test.rb`
|
163
|
+
- `test_tokenize_word_with_escaped_backslash` in `test/tokenizer_test.rb`
|
164
|
+
- `test_quoted_string_with_escaped_quotes` in `test/gmail_search_syntax_test.rb`
|
165
|
+
- `test_unquoted_text_with_escaped_quote` in `test/gmail_search_syntax_test.rb`
|
166
|
+
- `test_unquoted_text_with_escaped_backslash` in `test/gmail_search_syntax_test.rb`
|
167
|
+
- `test_subject_with_escaped_quotes` in `test/sql_visitor_test.rb`
|
168
|
+
- `test_unquoted_token_with_escaped_quote` in `test/sql_visitor_test.rb`
|
169
|
+
- `test_operator_with_unquoted_escaped_quote` in `test/sql_visitor_test.rb`
|
170
|
+
|
171
|
+
Run demos:
|
172
|
+
- `bundle exec ruby examples/text_vs_substring_demo.rb`
|
173
|
+
- `bundle exec ruby examples/escaped_quotes_demo.rb`
|
174
|
+
|
data/README.md
CHANGED
@@ -39,11 +39,11 @@ GmailSearchSyntax.parse!("from:amy OR from:bob")
|
|
39
39
|
|
40
40
|
# Negation
|
41
41
|
GmailSearchSyntax.parse!("dinner -movie")
|
42
|
-
# => #<And #<
|
42
|
+
# => #<And #<StringToken "dinner"> AND #<Not #<StringToken "movie">>>
|
43
43
|
|
44
44
|
# Proximity search
|
45
45
|
GmailSearchSyntax.parse!("holiday AROUND 10 vacation")
|
46
|
-
# => #<Around #<
|
46
|
+
# => #<Around #<StringToken "holiday"> AROUND 10 #<StringToken "vacation">>
|
47
47
|
|
48
48
|
# Complex query with OR inside operator values
|
49
49
|
GmailSearchSyntax.parse!("from:{alice@ bob@} subject:urgent")
|
@@ -0,0 +1,152 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative "../lib/gmail_search_syntax"
|
4
|
+
|
5
|
+
puts "=" * 80
|
6
|
+
puts "Escaped Quotes Demo"
|
7
|
+
puts "=" * 80
|
8
|
+
puts
|
9
|
+
|
10
|
+
# Example 1: Escaped quotes in a substring search
|
11
|
+
puts "1. Substring with escaped quotes"
|
12
|
+
puts "-" * 40
|
13
|
+
query1 = '"She said \\"hello\\" to me"'
|
14
|
+
puts "Input: #{query1}"
|
15
|
+
ast1 = GmailSearchSyntax.parse!(query1)
|
16
|
+
puts "AST: #{ast1.inspect}"
|
17
|
+
puts "Value: #{ast1.value.inspect}"
|
18
|
+
|
19
|
+
visitor1 = GmailSearchSyntax::SQLiteVisitor.new
|
20
|
+
visitor1.visit(ast1)
|
21
|
+
sql1, params1 = visitor1.to_query.to_sql
|
22
|
+
|
23
|
+
puts "\nSQL:\n#{sql1}"
|
24
|
+
puts "\nParams: #{params1.inspect}"
|
25
|
+
puts
|
26
|
+
|
27
|
+
# Example 2: Escaped quotes in subject operator
|
28
|
+
puts "2. Subject with escaped quotes"
|
29
|
+
puts "-" * 40
|
30
|
+
query2 = 'subject:"Meeting: \\"Q1 Review\\""'
|
31
|
+
puts "Input: #{query2}"
|
32
|
+
ast2 = GmailSearchSyntax.parse!(query2)
|
33
|
+
puts "AST: #{ast2.inspect}"
|
34
|
+
puts "Operator: #{ast2.name}"
|
35
|
+
puts "Value: #{ast2.value.inspect}"
|
36
|
+
|
37
|
+
visitor2 = GmailSearchSyntax::SQLiteVisitor.new
|
38
|
+
visitor2.visit(ast2)
|
39
|
+
sql2, params2 = visitor2.to_query.to_sql
|
40
|
+
|
41
|
+
puts "\nSQL:\n#{sql2}"
|
42
|
+
puts "\nParams: #{params2.inspect}"
|
43
|
+
puts
|
44
|
+
|
45
|
+
# Example 3: Escaped backslashes
|
46
|
+
puts "3. Escaped backslashes"
|
47
|
+
puts "-" * 40
|
48
|
+
query3 = '"path\\\\to\\\\file"'
|
49
|
+
puts "Input: #{query3}"
|
50
|
+
ast3 = GmailSearchSyntax.parse!(query3)
|
51
|
+
puts "AST: #{ast3.inspect}"
|
52
|
+
puts "Value: #{ast3.value.inspect}"
|
53
|
+
puts
|
54
|
+
|
55
|
+
# Example 4: Mixed escapes
|
56
|
+
puts "4. Mixed escapes (quotes and backslashes)"
|
57
|
+
puts "-" * 40
|
58
|
+
query4 = '"He said: \\"C:\\\\Users\\\\file.txt\\""'
|
59
|
+
puts "Input: #{query4}"
|
60
|
+
ast4 = GmailSearchSyntax.parse!(query4)
|
61
|
+
puts "AST: #{ast4.inspect}"
|
62
|
+
puts "Value: #{ast4.value.inspect}"
|
63
|
+
|
64
|
+
visitor4 = GmailSearchSyntax::SQLiteVisitor.new
|
65
|
+
visitor4.visit(ast4)
|
66
|
+
sql4, params4 = visitor4.to_query.to_sql
|
67
|
+
|
68
|
+
puts "\nSQL:\n#{sql4}"
|
69
|
+
puts "\nParams: #{params4.inspect}"
|
70
|
+
puts
|
71
|
+
|
72
|
+
# Example 5: Complex query with escaped quotes
|
73
|
+
puts "5. Complex query with escaped quotes"
|
74
|
+
puts "-" * 40
|
75
|
+
query5 = 'from:boss subject:"\\"Important\\" Meeting" has:attachment'
|
76
|
+
puts "Input: #{query5}"
|
77
|
+
ast5 = GmailSearchSyntax.parse!(query5)
|
78
|
+
puts "AST: #{ast5.inspect}"
|
79
|
+
|
80
|
+
visitor5 = GmailSearchSyntax::SQLiteVisitor.new
|
81
|
+
visitor5.visit(ast5)
|
82
|
+
sql5, params5 = visitor5.to_query.to_sql
|
83
|
+
|
84
|
+
puts "\nSQL:\n#{sql5}"
|
85
|
+
puts "\nParams: #{params5.inspect}"
|
86
|
+
puts
|
87
|
+
|
88
|
+
# Example 6: Escaped quotes in unquoted tokens
|
89
|
+
puts "6. Unquoted token with escaped quote"
|
90
|
+
puts "-" * 40
|
91
|
+
query6 = 'meeting\\"room'
|
92
|
+
puts "Input: #{query6}"
|
93
|
+
ast6 = GmailSearchSyntax.parse!(query6)
|
94
|
+
puts "AST: #{ast6.inspect}"
|
95
|
+
puts "Value: #{ast6.value.inspect}"
|
96
|
+
|
97
|
+
visitor6 = GmailSearchSyntax::SQLiteVisitor.new
|
98
|
+
visitor6.visit(ast6)
|
99
|
+
sql6, params6 = visitor6.to_query.to_sql
|
100
|
+
|
101
|
+
puts "\nSQL:\n#{sql6}"
|
102
|
+
puts "\nParams: #{params6.inspect}"
|
103
|
+
puts "\nNote: Unquoted tokens use word boundary matching (not substring)"
|
104
|
+
puts
|
105
|
+
|
106
|
+
# Example 7: Escaped quotes in operator with unquoted value
|
107
|
+
puts "7. Operator with unquoted escaped quote"
|
108
|
+
puts "-" * 40
|
109
|
+
query7 = 'subject:test\\"value'
|
110
|
+
puts "Input: #{query7}"
|
111
|
+
ast7 = GmailSearchSyntax.parse!(query7)
|
112
|
+
puts "AST: #{ast7.inspect}"
|
113
|
+
puts "Operator: #{ast7.name}"
|
114
|
+
puts "Value: #{ast7.value.inspect}"
|
115
|
+
|
116
|
+
visitor7 = GmailSearchSyntax::SQLiteVisitor.new
|
117
|
+
visitor7.visit(ast7)
|
118
|
+
sql7, params7 = visitor7.to_query.to_sql
|
119
|
+
|
120
|
+
puts "\nSQL:\n#{sql7}"
|
121
|
+
puts "\nParams: #{params7.inspect}"
|
122
|
+
puts
|
123
|
+
|
124
|
+
# Example 8: Escaped backslash in unquoted token
|
125
|
+
puts "8. Unquoted token with escaped backslash"
|
126
|
+
puts "-" * 40
|
127
|
+
query8 = 'path\\\\to\\\\file'
|
128
|
+
puts "Input: #{query8}"
|
129
|
+
ast8 = GmailSearchSyntax.parse!(query8)
|
130
|
+
puts "AST: #{ast8.inspect}"
|
131
|
+
puts "Value: #{ast8.value.inspect}"
|
132
|
+
puts
|
133
|
+
|
134
|
+
puts "=" * 80
|
135
|
+
puts "Summary"
|
136
|
+
puts "=" * 80
|
137
|
+
puts "Escape sequences work in BOTH quoted and unquoted tokens:"
|
138
|
+
puts
|
139
|
+
puts "Quoted strings (Substring nodes):"
|
140
|
+
puts " - Use substring matching (LIKE %value%)"
|
141
|
+
puts " - \"She said \\\"hello\\\"\" → 'She said \"hello\"'"
|
142
|
+
puts
|
143
|
+
puts "Unquoted tokens (StringToken nodes):"
|
144
|
+
puts " - Use word boundary matching (= or LIKE with boundaries)"
|
145
|
+
puts " - meeting\\\"room → 'meeting\"room'"
|
146
|
+
puts " - path\\\\to\\\\file → 'path\\to\\file'"
|
147
|
+
puts
|
148
|
+
puts "Supported escapes:"
|
149
|
+
puts " \\\" → literal double quote"
|
150
|
+
puts " \\\\ → literal backslash"
|
151
|
+
puts " Other (\\n, \\t, etc.) → preserved as-is"
|
152
|
+
puts "=" * 80
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative "../lib/gmail_search_syntax"
|
4
|
+
|
5
|
+
puts "=" * 80
|
6
|
+
puts "StringToken vs Substring Demo"
|
7
|
+
puts "=" * 80
|
8
|
+
puts
|
9
|
+
|
10
|
+
# Example 1: Unquoted text (StringToken node) - word boundary matching
|
11
|
+
puts "1. Unquoted text: meeting"
|
12
|
+
puts "-" * 40
|
13
|
+
query1 = "meeting"
|
14
|
+
ast1 = GmailSearchSyntax.parse!(query1)
|
15
|
+
puts "AST: #{ast1.inspect}"
|
16
|
+
puts "Node type: #{ast1.class.name}"
|
17
|
+
|
18
|
+
visitor1 = GmailSearchSyntax::SQLiteVisitor.new
|
19
|
+
visitor1.visit(ast1)
|
20
|
+
sql1, params1 = visitor1.to_query.to_sql
|
21
|
+
|
22
|
+
puts "\nSQL:\n#{sql1}"
|
23
|
+
puts "\nParams: #{params1.inspect}"
|
24
|
+
puts "\nExplanation:"
|
25
|
+
puts " - Matches 'meeting' as a complete word"
|
26
|
+
puts " - Will match: 'meeting tomorrow', 'the meeting', 'just a meeting here'"
|
27
|
+
puts " - Will NOT match: 'meetings', 'premeeting', 'meetingroom'"
|
28
|
+
puts
|
29
|
+
|
30
|
+
# Example 2: Quoted text (Substring node) - substring matching
|
31
|
+
puts "2. Quoted text: \"meeting\""
|
32
|
+
puts "-" * 40
|
33
|
+
query2 = '"meeting"'
|
34
|
+
ast2 = GmailSearchSyntax.parse!(query2)
|
35
|
+
puts "AST: #{ast2.inspect}"
|
36
|
+
puts "Node type: #{ast2.class.name}"
|
37
|
+
|
38
|
+
visitor2 = GmailSearchSyntax::SQLiteVisitor.new
|
39
|
+
visitor2.visit(ast2)
|
40
|
+
sql2, params2 = visitor2.to_query.to_sql
|
41
|
+
|
42
|
+
puts "\nSQL:\n#{sql2}"
|
43
|
+
puts "\nParams: #{params2.inspect}"
|
44
|
+
puts "\nExplanation:"
|
45
|
+
puts " - Matches 'meeting' as a substring anywhere"
|
46
|
+
puts " - Will match: 'meeting', 'meetings', 'premeeting', 'meetingroom'"
|
47
|
+
puts " - This is useful for partial matching"
|
48
|
+
puts
|
49
|
+
|
50
|
+
# Example 3: Multi-word quoted phrase
|
51
|
+
puts "3. Quoted phrase: \"quarterly review\""
|
52
|
+
puts "-" * 40
|
53
|
+
query3 = '"quarterly review"'
|
54
|
+
ast3 = GmailSearchSyntax.parse!(query3)
|
55
|
+
puts "AST: #{ast3.inspect}"
|
56
|
+
puts "Node type: #{ast3.class.name}"
|
57
|
+
|
58
|
+
visitor3 = GmailSearchSyntax::SQLiteVisitor.new
|
59
|
+
visitor3.visit(ast3)
|
60
|
+
sql3, params3 = visitor3.to_query.to_sql
|
61
|
+
|
62
|
+
puts "\nSQL:\n#{sql3}"
|
63
|
+
puts "\nParams: #{params3.inspect}"
|
64
|
+
puts "\nExplanation:"
|
65
|
+
puts " - Matches 'quarterly review' as a substring"
|
66
|
+
puts " - Will match: 'quarterly review meeting', 'the quarterly review is done'"
|
67
|
+
puts
|
68
|
+
|
69
|
+
# Example 4: Combined usage
|
70
|
+
puts "4. Combined: urgent \"q1 report\""
|
71
|
+
puts "-" * 40
|
72
|
+
query4 = 'urgent "q1 report"'
|
73
|
+
ast4 = GmailSearchSyntax.parse!(query4)
|
74
|
+
puts "AST: #{ast4.inspect}"
|
75
|
+
|
76
|
+
visitor4 = GmailSearchSyntax::SQLiteVisitor.new
|
77
|
+
visitor4.visit(ast4)
|
78
|
+
sql4, params4 = visitor4.to_query.to_sql
|
79
|
+
|
80
|
+
puts "\nSQL:\n#{sql4}"
|
81
|
+
puts "\nParams: #{params4.inspect}"
|
82
|
+
puts "\nExplanation:"
|
83
|
+
puts " - 'urgent' uses word boundary matching (complete word)"
|
84
|
+
puts " - '\"q1 report\"' uses substring matching (partial match)"
|
85
|
+
puts " - Both conditions must be satisfied (AND)"
|
86
|
+
puts
|
87
|
+
|
88
|
+
puts "=" * 80
|
89
|
+
puts "Summary"
|
90
|
+
puts "=" * 80
|
91
|
+
puts "StringToken node (unquoted): Word boundary matching - finds complete words"
|
92
|
+
puts "Substring node (quoted): Substring matching - finds partial matches"
|
93
|
+
puts "=" * 80
|
@@ -23,7 +23,7 @@ module GmailSearchSyntax
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
class
|
26
|
+
class StringToken < Node
|
27
27
|
attr_reader :value
|
28
28
|
|
29
29
|
def initialize(value)
|
@@ -31,7 +31,19 @@ module GmailSearchSyntax
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def inspect
|
34
|
-
"#<
|
34
|
+
"#<StringToken #{@value.inspect}>"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class Substring < Node
|
39
|
+
attr_reader :value
|
40
|
+
|
41
|
+
def initialize(value)
|
42
|
+
@value = value
|
43
|
+
end
|
44
|
+
|
45
|
+
def inspect
|
46
|
+
"#<Substring #{@value.inspect}>"
|
35
47
|
end
|
36
48
|
end
|
37
49
|
|
@@ -129,11 +129,11 @@ module GmailSearchSyntax
|
|
129
129
|
when :quoted_string
|
130
130
|
value = current_token.value
|
131
131
|
advance
|
132
|
-
AST::
|
132
|
+
AST::Substring.new(value)
|
133
133
|
when :email, :number, :date, :relative_time
|
134
134
|
value = current_token.value
|
135
135
|
advance
|
136
|
-
AST::
|
136
|
+
AST::StringToken.new(value)
|
137
137
|
else
|
138
138
|
advance
|
139
139
|
nil
|
@@ -183,7 +183,7 @@ module GmailSearchSyntax
|
|
183
183
|
end
|
184
184
|
|
185
185
|
advance
|
186
|
-
AST::
|
186
|
+
AST::StringToken.new(word)
|
187
187
|
end
|
188
188
|
|
189
189
|
def parse_operator_value
|
@@ -46,8 +46,10 @@ module GmailSearchSyntax
|
|
46
46
|
case node
|
47
47
|
when AST::Operator
|
48
48
|
visit_operator(node)
|
49
|
-
when AST::
|
50
|
-
|
49
|
+
when AST::StringToken
|
50
|
+
visit_string_token(node)
|
51
|
+
when AST::Substring
|
52
|
+
visit_substring(node)
|
51
53
|
when AST::And
|
52
54
|
visit_and(node)
|
53
55
|
when AST::Or
|
@@ -325,11 +327,26 @@ module GmailSearchSyntax
|
|
325
327
|
@query.add_condition("m0.rfc822_message_id = ?")
|
326
328
|
end
|
327
329
|
|
328
|
-
def
|
329
|
-
|
330
|
-
|
330
|
+
def visit_string_token(node)
|
331
|
+
# Word boundary matching - the value should appear as a complete word/token
|
332
|
+
# We use LIKE with word boundaries: spaces, start/end of string
|
333
|
+
value = node.value
|
334
|
+
@query.add_param(value)
|
335
|
+
@query.add_param("#{value} %")
|
336
|
+
@query.add_param("% #{value}")
|
337
|
+
@query.add_param("% #{value} %")
|
338
|
+
@query.add_condition("((m0.subject = ? OR m0.subject LIKE ? OR m0.subject LIKE ? OR m0.subject LIKE ?) OR (m0.body = ? OR m0.body LIKE ? OR m0.body LIKE ? OR m0.body LIKE ?))")
|
339
|
+
@query.add_param(value)
|
340
|
+
@query.add_param("#{value} %")
|
341
|
+
@query.add_param("% #{value}")
|
342
|
+
@query.add_param("% #{value} %")
|
343
|
+
end
|
344
|
+
|
345
|
+
def visit_substring(node)
|
346
|
+
# Substring matching - the value can appear anywhere in the text
|
331
347
|
@query.add_param("%#{node.value}%")
|
332
348
|
@query.add_param("%#{node.value}%")
|
349
|
+
@query.add_condition("(m0.subject LIKE ? OR m0.body LIKE ?)")
|
333
350
|
end
|
334
351
|
|
335
352
|
def visit_and(node)
|
@@ -103,20 +103,37 @@ module GmailSearchSyntax
|
|
103
103
|
end
|
104
104
|
|
105
105
|
def read_quoted_string
|
106
|
-
advance
|
106
|
+
advance # Skip opening quote
|
107
107
|
|
108
108
|
value = ""
|
109
|
-
while @position < @input.length
|
110
|
-
|
109
|
+
while @position < @input.length
|
110
|
+
char = current_char
|
111
|
+
|
112
|
+
if char == "\\"
|
113
|
+
# Handle escape sequences
|
111
114
|
advance
|
112
|
-
|
115
|
+
if @position < @input.length
|
116
|
+
next_char = current_char
|
117
|
+
value += case next_char
|
118
|
+
when '"', "\\"
|
119
|
+
# Escaped quote or backslash - add the literal character
|
120
|
+
next_char
|
121
|
+
else
|
122
|
+
# Other escapes - keep the backslash and the character
|
123
|
+
"\\" + next_char
|
124
|
+
end
|
125
|
+
advance
|
126
|
+
end
|
127
|
+
elsif char == '"'
|
128
|
+
# Unescaped quote - end of string
|
129
|
+
break
|
113
130
|
else
|
114
|
-
value +=
|
131
|
+
value += char
|
132
|
+
advance
|
115
133
|
end
|
116
|
-
advance
|
117
134
|
end
|
118
135
|
|
119
|
-
advance if @position < @input.length
|
136
|
+
advance if @position < @input.length && current_char == '"' # Skip closing quote
|
120
137
|
|
121
138
|
add_token(:quoted_string, value)
|
122
139
|
end
|
@@ -128,8 +145,26 @@ module GmailSearchSyntax
|
|
128
145
|
char = current_char
|
129
146
|
break if /[\s():{}]/.match?(char)
|
130
147
|
break if char == "-"
|
131
|
-
|
132
|
-
|
148
|
+
|
149
|
+
if char == "\\"
|
150
|
+
# Handle escape sequences in unquoted tokens
|
151
|
+
advance
|
152
|
+
if @position < @input.length
|
153
|
+
next_char = current_char
|
154
|
+
value += case next_char
|
155
|
+
when '"', "\\"
|
156
|
+
# Escaped quote or backslash - add the literal character
|
157
|
+
next_char
|
158
|
+
else
|
159
|
+
# Other escapes - keep the backslash and the character
|
160
|
+
"\\" + next_char
|
161
|
+
end
|
162
|
+
advance
|
163
|
+
end
|
164
|
+
else
|
165
|
+
value += char
|
166
|
+
advance
|
167
|
+
end
|
133
168
|
end
|
134
169
|
|
135
170
|
return if value.empty?
|
@@ -4,7 +4,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
4
4
|
include GmailSearchSyntax::AST
|
5
5
|
|
6
6
|
def test_version
|
7
|
-
|
7
|
+
assert GmailSearchSyntax::VERSION
|
8
8
|
end
|
9
9
|
|
10
10
|
def test_simple_from_operator
|
@@ -129,11 +129,11 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
129
129
|
assert_instance_of And, ast
|
130
130
|
|
131
131
|
assert_equal 2, ast.operands.length
|
132
|
-
assert_instance_of
|
132
|
+
assert_instance_of StringToken, ast.operands[0]
|
133
133
|
assert_equal "dinner", ast.operands[0].value
|
134
134
|
|
135
135
|
assert_instance_of Not, ast.operands[1]
|
136
|
-
assert_instance_of
|
136
|
+
assert_instance_of StringToken, ast.operands[1].child
|
137
137
|
assert_equal "movie", ast.operands[1].child.value
|
138
138
|
end
|
139
139
|
|
@@ -141,17 +141,17 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
141
141
|
ast = GmailSearchSyntax.parse!("holiday AROUND 10 vacation")
|
142
142
|
assert_instance_of Around, ast
|
143
143
|
|
144
|
-
assert_instance_of
|
144
|
+
assert_instance_of StringToken, ast.left
|
145
145
|
assert_equal "holiday", ast.left.value
|
146
146
|
assert_equal 10, ast.distance
|
147
147
|
|
148
|
-
assert_instance_of
|
148
|
+
assert_instance_of StringToken, ast.right
|
149
149
|
assert_equal "vacation", ast.right.value
|
150
150
|
end
|
151
151
|
|
152
152
|
def test_around_with_quoted_string
|
153
153
|
ast = GmailSearchSyntax.parse!('"secret AROUND 25 birthday"')
|
154
|
-
assert_instance_of
|
154
|
+
assert_instance_of Substring, ast
|
155
155
|
assert_equal "secret AROUND 25 birthday", ast.value
|
156
156
|
end
|
157
157
|
|
@@ -192,7 +192,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
192
192
|
|
193
193
|
def test_quoted_exact_phrase
|
194
194
|
ast = GmailSearchSyntax.parse!('"dinner and movie tonight"')
|
195
|
-
assert_instance_of
|
195
|
+
assert_instance_of Substring, ast
|
196
196
|
assert_equal "dinner and movie tonight", ast.value
|
197
197
|
end
|
198
198
|
|
@@ -203,9 +203,9 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
203
203
|
|
204
204
|
assert_instance_of And, ast.value
|
205
205
|
assert_equal 2, ast.value.operands.length
|
206
|
-
assert_instance_of
|
206
|
+
assert_instance_of StringToken, ast.value.operands[0]
|
207
207
|
assert_equal "dinner", ast.value.operands[0].value
|
208
|
-
assert_instance_of
|
208
|
+
assert_instance_of StringToken, ast.value.operands[1]
|
209
209
|
assert_equal "movie", ast.value.operands[1].value
|
210
210
|
end
|
211
211
|
|
@@ -218,7 +218,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
218
218
|
assert_equal "in", ast.operands[0].name
|
219
219
|
assert_equal "anywhere", ast.operands[0].value
|
220
220
|
|
221
|
-
assert_instance_of
|
221
|
+
assert_instance_of StringToken, ast.operands[1]
|
222
222
|
assert_equal "movie", ast.operands[1].value
|
223
223
|
end
|
224
224
|
|
@@ -327,7 +327,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
327
327
|
|
328
328
|
def test_plain_text_search
|
329
329
|
ast = GmailSearchSyntax.parse!("meeting")
|
330
|
-
assert_instance_of
|
330
|
+
assert_instance_of StringToken, ast
|
331
331
|
assert_equal "meeting", ast.value
|
332
332
|
end
|
333
333
|
|
@@ -336,10 +336,10 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
336
336
|
assert_instance_of And, ast
|
337
337
|
|
338
338
|
assert_equal 2, ast.operands.length
|
339
|
-
assert_instance_of
|
339
|
+
assert_instance_of StringToken, ast.operands[0]
|
340
340
|
assert_equal "project", ast.operands[0].value
|
341
341
|
|
342
|
-
assert_instance_of
|
342
|
+
assert_instance_of StringToken, ast.operands[1]
|
343
343
|
assert_equal "report", ast.operands[1].value
|
344
344
|
end
|
345
345
|
|
@@ -417,7 +417,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
417
417
|
|
418
418
|
def test_quoted_string_with_operators_inside
|
419
419
|
ast = GmailSearchSyntax.parse!('"from:amy to:bob"')
|
420
|
-
assert_instance_of
|
420
|
+
assert_instance_of Substring, ast
|
421
421
|
assert_equal "from:amy to:bob", ast.value
|
422
422
|
end
|
423
423
|
|
@@ -460,7 +460,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
460
460
|
|
461
461
|
def test_parentheses_with_single_term
|
462
462
|
ast = GmailSearchSyntax.parse!("(meeting)")
|
463
|
-
assert_instance_of
|
463
|
+
assert_instance_of StringToken, ast
|
464
464
|
assert_equal "meeting", ast.value
|
465
465
|
end
|
466
466
|
|
@@ -471,11 +471,11 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
471
471
|
|
472
472
|
assert_instance_of And, ast.value
|
473
473
|
assert_equal 3, ast.value.operands.length
|
474
|
-
assert_instance_of
|
474
|
+
assert_instance_of StringToken, ast.value.operands[0]
|
475
475
|
assert_equal "project", ast.value.operands[0].value
|
476
|
-
assert_instance_of
|
476
|
+
assert_instance_of StringToken, ast.value.operands[1]
|
477
477
|
assert_equal "status", ast.value.operands[1].value
|
478
|
-
assert_instance_of
|
478
|
+
assert_instance_of StringToken, ast.value.operands[2]
|
479
479
|
assert_equal "update", ast.value.operands[2].value
|
480
480
|
end
|
481
481
|
|
@@ -484,10 +484,10 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
484
484
|
assert_instance_of And, ast
|
485
485
|
|
486
486
|
assert_equal 2, ast.operands.length
|
487
|
-
assert_instance_of
|
487
|
+
assert_instance_of StringToken, ast.operands[0]
|
488
488
|
assert_equal "meeting", ast.operands[0].value
|
489
489
|
|
490
|
-
assert_instance_of
|
490
|
+
assert_instance_of StringToken, ast.operands[1]
|
491
491
|
assert_equal "project", ast.operands[1].value
|
492
492
|
end
|
493
493
|
|
@@ -505,9 +505,9 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
505
505
|
|
506
506
|
assert_instance_of Or, ast.value
|
507
507
|
assert_equal 2, ast.value.operands.length
|
508
|
-
assert_instance_of
|
508
|
+
assert_instance_of StringToken, ast.value.operands[0]
|
509
509
|
assert_equal "mischa@", ast.value.operands[0].value
|
510
|
-
assert_instance_of
|
510
|
+
assert_instance_of StringToken, ast.value.operands[1]
|
511
511
|
assert_equal "julik@", ast.value.operands[1].value
|
512
512
|
end
|
513
513
|
|
@@ -518,9 +518,9 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
518
518
|
|
519
519
|
assert_instance_of Or, ast.value
|
520
520
|
assert_equal 2, ast.value.operands.length
|
521
|
-
assert_instance_of
|
521
|
+
assert_instance_of StringToken, ast.value.operands[0]
|
522
522
|
assert_equal "amy@example.com", ast.value.operands[0].value
|
523
|
-
assert_instance_of
|
523
|
+
assert_instance_of StringToken, ast.value.operands[1]
|
524
524
|
assert_equal "bob@example.com", ast.value.operands[1].value
|
525
525
|
end
|
526
526
|
|
@@ -543,9 +543,9 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
543
543
|
|
544
544
|
assert_instance_of And, ast.value
|
545
545
|
assert_equal 2, ast.value.operands.length
|
546
|
-
assert_instance_of
|
546
|
+
assert_instance_of StringToken, ast.value.operands[0]
|
547
547
|
assert_equal "urgent", ast.value.operands[0].value
|
548
|
-
assert_instance_of
|
548
|
+
assert_instance_of StringToken, ast.value.operands[1]
|
549
549
|
assert_equal "meeting", ast.value.operands[1].value
|
550
550
|
end
|
551
551
|
|
@@ -570,7 +570,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
570
570
|
|
571
571
|
assert_instance_of And, ast.value
|
572
572
|
assert_equal 2, ast.value.operands.length
|
573
|
-
assert_instance_of
|
573
|
+
assert_instance_of StringToken, ast.value.operands[0]
|
574
574
|
assert_equal "meeting", ast.value.operands[0].value
|
575
575
|
assert_instance_of Not, ast.value.operands[1]
|
576
576
|
assert_equal "cancelled", ast.value.operands[1].child.value
|
@@ -604,7 +604,7 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
604
604
|
assert_equal 2, ast.value.operands[0].operands.length
|
605
605
|
assert_equal "urgent", ast.value.operands[0].operands[0].value
|
606
606
|
assert_equal "important", ast.value.operands[0].operands[1].value
|
607
|
-
assert_instance_of
|
607
|
+
assert_instance_of StringToken, ast.value.operands[1]
|
608
608
|
assert_equal "meeting", ast.value.operands[1].value
|
609
609
|
end
|
610
610
|
|
@@ -615,9 +615,9 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
615
615
|
|
616
616
|
assert_instance_of Or, ast.value
|
617
617
|
assert_equal 2, ast.value.operands.length
|
618
|
-
assert_instance_of
|
618
|
+
assert_instance_of StringToken, ast.value.operands[0]
|
619
619
|
assert_equal "mischa@", ast.value.operands[0].value
|
620
|
-
assert_instance_of
|
620
|
+
assert_instance_of StringToken, ast.value.operands[1]
|
621
621
|
assert_equal "marc@", ast.value.operands[1].value
|
622
622
|
end
|
623
623
|
|
@@ -688,4 +688,50 @@ class GmailSearchSyntaxTest < Minitest::Test
|
|
688
688
|
assert_equal "subject", ast.operands[1].name
|
689
689
|
assert_instance_of And, ast.operands[1].value
|
690
690
|
end
|
691
|
+
|
692
|
+
def test_quoted_string_with_escaped_quotes
|
693
|
+
ast = GmailSearchSyntax.parse!('"She said \\"hello\\" to me"')
|
694
|
+
assert_instance_of Substring, ast
|
695
|
+
assert_equal 'She said "hello" to me', ast.value
|
696
|
+
end
|
697
|
+
|
698
|
+
def test_quoted_string_with_escaped_backslash
|
699
|
+
ast = GmailSearchSyntax.parse!('"path\\\\to\\\\file"')
|
700
|
+
assert_instance_of Substring, ast
|
701
|
+
assert_equal 'path\\to\\file', ast.value
|
702
|
+
end
|
703
|
+
|
704
|
+
def test_subject_with_escaped_quotes
|
705
|
+
ast = GmailSearchSyntax.parse!('subject:"Meeting: \\"Q1 Review\\""')
|
706
|
+
assert_instance_of Operator, ast
|
707
|
+
assert_equal "subject", ast.name
|
708
|
+
assert_equal 'Meeting: "Q1 Review"', ast.value
|
709
|
+
end
|
710
|
+
|
711
|
+
def test_unquoted_text_with_escaped_quote
|
712
|
+
ast = GmailSearchSyntax.parse!('meeting\\"room')
|
713
|
+
assert_instance_of StringToken, ast
|
714
|
+
assert_equal 'meeting"room', ast.value
|
715
|
+
end
|
716
|
+
|
717
|
+
def test_unquoted_text_with_escaped_backslash
|
718
|
+
ast = GmailSearchSyntax.parse!('path\\\\to\\\\file')
|
719
|
+
assert_instance_of StringToken, ast
|
720
|
+
assert_equal 'path\\to\\file', ast.value
|
721
|
+
end
|
722
|
+
|
723
|
+
def test_operator_with_unquoted_escaped_quote
|
724
|
+
ast = GmailSearchSyntax.parse!('subject:test\\"value')
|
725
|
+
assert_instance_of Operator, ast
|
726
|
+
assert_equal "subject", ast.name
|
727
|
+
assert_equal 'test"value', ast.value
|
728
|
+
end
|
729
|
+
|
730
|
+
def test_multiple_tokens_with_escapes
|
731
|
+
ast = GmailSearchSyntax.parse!('meeting\\"room project\\\\plan')
|
732
|
+
assert_instance_of And, ast
|
733
|
+
assert_equal 2, ast.operands.length
|
734
|
+
assert_equal 'meeting"room', ast.operands[0].value
|
735
|
+
assert_equal 'project\\plan', ast.operands[1].value
|
736
|
+
end
|
691
737
|
end
|
data/test/sql_visitor_test.rb
CHANGED
@@ -244,9 +244,21 @@ class SqlVisitorTest < Minitest::Test
|
|
244
244
|
def test_plain_text_search
|
245
245
|
sql, params = parse_and_visit("meeting")
|
246
246
|
|
247
|
+
# Text nodes now use word boundary matching
|
248
|
+
assert_includes sql, "m0.subject = ?"
|
247
249
|
assert_includes sql, "m0.subject LIKE ?"
|
250
|
+
assert_includes sql, "m0.body = ?"
|
248
251
|
assert_includes sql, "m0.body LIKE ?"
|
249
|
-
assert_equal ["meeting", "%meeting%", "%meeting%"], params
|
252
|
+
assert_equal ["meeting", "meeting %", "% meeting", "% meeting %", "meeting", "meeting %", "% meeting", "% meeting %"], params
|
253
|
+
end
|
254
|
+
|
255
|
+
def test_quoted_text_search_uses_substring
|
256
|
+
sql, params = parse_and_visit('"meeting"')
|
257
|
+
|
258
|
+
# Quoted strings create Substring nodes which use LIKE %value%
|
259
|
+
assert_includes sql, "m0.subject LIKE ?"
|
260
|
+
assert_includes sql, "m0.body LIKE ?"
|
261
|
+
assert_equal ["%meeting%", "%meeting%"], params
|
250
262
|
end
|
251
263
|
|
252
264
|
def test_complex_query
|
@@ -343,4 +355,35 @@ class SqlVisitorTest < Minitest::Test
|
|
343
355
|
join_count = sql.scan("INNER JOIN message_addresses").length
|
344
356
|
assert_equal 2, join_count
|
345
357
|
end
|
358
|
+
|
359
|
+
def test_quoted_string_with_escaped_quotes
|
360
|
+
sql, params = parse_and_visit('"She said \\"hello\\" to me"')
|
361
|
+
|
362
|
+
assert_includes sql, "m0.subject LIKE ?"
|
363
|
+
assert_includes sql, "m0.body LIKE ?"
|
364
|
+
assert_equal ['%She said "hello" to me%', '%She said "hello" to me%'], params
|
365
|
+
end
|
366
|
+
|
367
|
+
def test_subject_with_escaped_quotes
|
368
|
+
sql, params = parse_and_visit('subject:"Meeting: \\"Q1 Review\\""')
|
369
|
+
|
370
|
+
assert_includes sql, "m0.subject LIKE ?"
|
371
|
+
assert_equal ['%Meeting: "Q1 Review"%'], params
|
372
|
+
end
|
373
|
+
|
374
|
+
def test_unquoted_token_with_escaped_quote
|
375
|
+
sql, params = parse_and_visit('meeting\\"room')
|
376
|
+
|
377
|
+
# Unquoted tokens use word boundary matching
|
378
|
+
assert_includes sql, "m0.subject = ?"
|
379
|
+
assert_includes sql, "m0.body = ?"
|
380
|
+
assert_equal ['meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %', 'meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %'], params
|
381
|
+
end
|
382
|
+
|
383
|
+
def test_operator_with_unquoted_escaped_quote
|
384
|
+
sql, params = parse_and_visit('subject:test\\"value')
|
385
|
+
|
386
|
+
assert_includes sql, "m0.subject LIKE ?"
|
387
|
+
assert_equal ['%test"value%'], params
|
388
|
+
end
|
346
389
|
end
|
data/test/tokenizer_test.rb
CHANGED
@@ -182,4 +182,58 @@ class TokenizerTest < Minitest::Test
|
|
182
182
|
assert_equal "report", word_tokens[1].value
|
183
183
|
assert_equal "meeting", word_tokens[2].value
|
184
184
|
end
|
185
|
+
|
186
|
+
def test_tokenize_quoted_string_with_escaped_quote
|
187
|
+
tokens = tokenize('"She said \\"hello\\" to me"')
|
188
|
+
assert_equal 2, tokens.length
|
189
|
+
assert_equal :quoted_string, tokens[0].type
|
190
|
+
assert_equal 'She said "hello" to me', tokens[0].value
|
191
|
+
assert_equal :eof, tokens[1].type
|
192
|
+
end
|
193
|
+
|
194
|
+
def test_tokenize_quoted_string_with_escaped_backslash
|
195
|
+
tokens = tokenize('"path\\\\to\\\\file"')
|
196
|
+
assert_equal 2, tokens.length
|
197
|
+
assert_equal :quoted_string, tokens[0].type
|
198
|
+
assert_equal 'path\\to\\file', tokens[0].value
|
199
|
+
end
|
200
|
+
|
201
|
+
def test_tokenize_quoted_string_with_multiple_escapes
|
202
|
+
tokens = tokenize('"test \\"nested\\" and \\\\ slash"')
|
203
|
+
assert_equal 2, tokens.length
|
204
|
+
assert_equal :quoted_string, tokens[0].type
|
205
|
+
assert_equal 'test "nested" and \\ slash', tokens[0].value
|
206
|
+
end
|
207
|
+
|
208
|
+
def test_tokenize_word_with_escaped_quote
|
209
|
+
tokens = tokenize('meeting\\"room')
|
210
|
+
assert_equal 2, tokens.length
|
211
|
+
assert_equal :word, tokens[0].type
|
212
|
+
assert_equal 'meeting"room', tokens[0].value
|
213
|
+
end
|
214
|
+
|
215
|
+
def test_tokenize_word_with_escaped_backslash
|
216
|
+
tokens = tokenize('path\\\\to')
|
217
|
+
assert_equal 2, tokens.length
|
218
|
+
assert_equal :word, tokens[0].type
|
219
|
+
assert_equal 'path\\to', tokens[0].value
|
220
|
+
end
|
221
|
+
|
222
|
+
def test_tokenize_multiple_words_with_escapes
|
223
|
+
tokens = tokenize('meeting\\"room another\\\\word')
|
224
|
+
word_tokens = tokens.select { |t| t.type == :word }
|
225
|
+
assert_equal 2, word_tokens.length
|
226
|
+
assert_equal 'meeting"room', word_tokens[0].value
|
227
|
+
assert_equal 'another\\word', word_tokens[1].value
|
228
|
+
end
|
229
|
+
|
230
|
+
def test_tokenize_operator_value_with_escaped_quote
|
231
|
+
tokens = tokenize('subject:test\\"value')
|
232
|
+
assert_equal 4, tokens.length
|
233
|
+
assert_equal :word, tokens[0].type
|
234
|
+
assert_equal "subject", tokens[0].value
|
235
|
+
assert_equal :colon, tokens[1].type
|
236
|
+
assert_equal :word, tokens[2].type
|
237
|
+
assert_equal 'test"value', tokens[2].value
|
238
|
+
end
|
185
239
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gmail_search_syntax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- me@julik.nl
|
@@ -70,14 +70,17 @@ extensions: []
|
|
70
70
|
extra_rdoc_files: []
|
71
71
|
files:
|
72
72
|
- ARCHITECTURE.md
|
73
|
+
- IMPLEMENTATION_NOTES.md
|
73
74
|
- README.md
|
74
75
|
- Rakefile
|
75
76
|
- SCHEMA.md
|
76
77
|
- examples/alias_collision_fix.rb
|
77
78
|
- examples/demo.rb
|
79
|
+
- examples/escaped_quotes_demo.rb
|
78
80
|
- examples/gmail_message_id_demo.rb
|
79
81
|
- examples/postgres_vs_sqlite.rb
|
80
82
|
- examples/sql_query.rb
|
83
|
+
- examples/text_vs_substring_demo.rb
|
81
84
|
- lib/GMAIL_SEARCH_OPERATORS.md
|
82
85
|
- lib/gmail_search_syntax.rb
|
83
86
|
- lib/gmail_search_syntax/ast.rb
|