gmail_search_syntax 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +338 -0
- data/README.md +129 -0
- data/Rakefile +11 -0
- data/SCHEMA.md +223 -0
- data/examples/alias_collision_fix.rb +43 -0
- data/examples/demo.rb +28 -0
- data/examples/gmail_message_id_demo.rb +118 -0
- data/examples/postgres_vs_sqlite.rb +55 -0
- data/examples/sql_query.rb +47 -0
- data/lib/GMAIL_SEARCH_OPERATORS.md +58 -0
- data/lib/gmail_search_syntax/ast.rb +100 -0
- data/lib/gmail_search_syntax/parser.rb +224 -0
- data/lib/gmail_search_syntax/sql_visitor.rb +496 -0
- data/lib/gmail_search_syntax/tokenizer.rb +152 -0
- data/lib/gmail_search_syntax/version.rb +3 -0
- data/lib/gmail_search_syntax.rb +34 -0
- data/test/gmail_search_syntax_test.rb +691 -0
- data/test/integration_test.rb +668 -0
- data/test/postgres_visitor_test.rb +156 -0
- data/test/sql_visitor_test.rb +346 -0
- data/test/test_helper.rb +27 -0
- data/test/tokenizer_test.rb +185 -0
- metadata +115 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path("../lib", __dir__)
|
2
|
+
require "gmail_search_syntax"
|
3
|
+
|
4
|
+
puts "=" * 80
|
5
|
+
puts "Table Alias Uniqueness Fix"
|
6
|
+
puts "=" * 80
|
7
|
+
puts
|
8
|
+
puts "When using operators with subqueries (OR, AND, NOT, etc.), each subquery"
|
9
|
+
puts "needs its own visitor. Previously, each sub-visitor had its own alias"
|
10
|
+
puts "counter starting at 0, causing alias collisions like:"
|
11
|
+
puts
|
12
|
+
puts " WRONG: ... ma1 ... ma1 ... (same alias twice!)"
|
13
|
+
puts
|
14
|
+
puts "Now, all sub-visitors share a single counter ((1..).each enumerator), ensuring:"
|
15
|
+
puts
|
16
|
+
puts " RIGHT: ... ma1 ... ma3 ... (unique aliases)"
|
17
|
+
puts
|
18
|
+
puts "=" * 80
|
19
|
+
|
20
|
+
query = "from:alice@example.com OR from:bob@example.com"
|
21
|
+
puts "\nQuery: #{query}"
|
22
|
+
puts "-" * 80
|
23
|
+
|
24
|
+
ast = GmailSearchSyntax.parse!(query)
|
25
|
+
visitor = GmailSearchSyntax::SQLiteVisitor.new
|
26
|
+
visitor.visit(ast)
|
27
|
+
sql, _ = visitor.to_query.to_sql
|
28
|
+
|
29
|
+
puts "\nGenerated SQL:"
|
30
|
+
puts sql
|
31
|
+
puts
|
32
|
+
|
33
|
+
# Highlight the aliases
|
34
|
+
ma1_count = sql.scan(/\bma1\b/).size
|
35
|
+
ma3_count = sql.scan(/\bma3\b/).size
|
36
|
+
|
37
|
+
puts "Alias usage:"
|
38
|
+
puts " ma1: appears #{ma1_count} times"
|
39
|
+
puts " ma3: appears #{ma3_count} times"
|
40
|
+
puts
|
41
|
+
puts "✓ No alias collision! Each JOIN has a unique alias."
|
42
|
+
puts
|
43
|
+
puts "=" * 80
|
data/examples/demo.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require_relative "../lib/gmail_search_syntax"
|
2
|
+
|
3
|
+
puts "Gmail Search Syntax Parser - Demo"
|
4
|
+
puts "=" * 50
|
5
|
+
puts
|
6
|
+
|
7
|
+
queries = [
|
8
|
+
"from:amy@example.com",
|
9
|
+
"subject:meeting has:attachment",
|
10
|
+
"from:boss OR from:manager",
|
11
|
+
"{from:amy from:bob from:charlie}",
|
12
|
+
"dinner -movie",
|
13
|
+
"holiday AROUND 10 vacation",
|
14
|
+
'from:manager subject:"quarterly review" after:2024/01/01',
|
15
|
+
"is:unread label:important -label:spam",
|
16
|
+
"(from:team OR from:boss) subject:urgent",
|
17
|
+
"from:(mischa@ OR julik@) subject:meeting",
|
18
|
+
"to:(alice@ OR bob@ OR charlie@)",
|
19
|
+
"from:{mischa@ marc@}",
|
20
|
+
"from:{alice@ bob@} to:{charlie@ david@}"
|
21
|
+
]
|
22
|
+
|
23
|
+
queries.each do |query|
|
24
|
+
puts "Query: #{query}"
|
25
|
+
ast = GmailSearchSyntax.parse!(query)
|
26
|
+
puts "AST: #{ast.inspect}"
|
27
|
+
puts
|
28
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# This example demonstrates how Gmail message IDs encode timestamps
|
5
|
+
# Based on: https://www.metaspike.com/dates-gmail-message-id-thread-id-timestamps/
|
6
|
+
#
|
7
|
+
# Gmail Message IDs are hexadecimal values where the first part (all but last 5 digits)
|
8
|
+
# encodes the timestamp in milliseconds since epoch.
|
9
|
+
|
10
|
+
require "time"
|
11
|
+
|
12
|
+
def generate_gmail_message_id(time, random: Random.new)
|
13
|
+
# Get timestamp in milliseconds since epoch
|
14
|
+
timestamp_ms = (time.to_f * 1000).to_i
|
15
|
+
# Convert timestamp to hex and append first 5 hex digits from random bytes
|
16
|
+
timestamp_hex = timestamp_ms.to_s(16)
|
17
|
+
# Generate 3 random bytes (24 bits = 6 hex digits, we'll use first 5)
|
18
|
+
# Using bytes is much faster than generating individual hex digits
|
19
|
+
random_hex = random.bytes(3).unpack1("H*")[0, 5]
|
20
|
+
|
21
|
+
timestamp_hex + random_hex
|
22
|
+
end
|
23
|
+
|
24
|
+
def decode_gmail_message_id(message_id)
|
25
|
+
# Drop last 5 digits to get the timestamp part
|
26
|
+
timestamp_hex = message_id[0..-6]
|
27
|
+
|
28
|
+
# Convert from hex to milliseconds
|
29
|
+
timestamp_ms = timestamp_hex.to_i(16)
|
30
|
+
|
31
|
+
# Convert to Time object
|
32
|
+
Time.at(timestamp_ms / 1000.0)
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "Gmail Message ID Generation Demo"
|
36
|
+
puts "=" * 50
|
37
|
+
puts
|
38
|
+
|
39
|
+
# Example 1: Generate a message ID for the current time
|
40
|
+
puts "Example 1: Current timestamp"
|
41
|
+
current_time = Time.now
|
42
|
+
message_id = generate_gmail_message_id(current_time)
|
43
|
+
decoded_time = decode_gmail_message_id(message_id)
|
44
|
+
|
45
|
+
puts "Original time: #{current_time}"
|
46
|
+
puts "Message ID: #{message_id}"
|
47
|
+
puts "Decoded time: #{decoded_time}"
|
48
|
+
puts "Match: #{(current_time - decoded_time).abs < 0.001}"
|
49
|
+
puts
|
50
|
+
|
51
|
+
# Example 2: Examples from the article
|
52
|
+
puts "Example 2: Known Gmail message IDs from the article"
|
53
|
+
examples = {
|
54
|
+
"172ed79b0337c14f" => "Thursday, June 25, 2020 9:54:34.675 PM (UTC)",
|
55
|
+
"ffff3432161af8b" => "Wednesday, November 3, 2004 4:11:11.254 PM (UTC)"
|
56
|
+
}
|
57
|
+
|
58
|
+
examples.each do |msg_id, expected_date|
|
59
|
+
decoded = decode_gmail_message_id(msg_id)
|
60
|
+
puts "Message ID: #{msg_id}"
|
61
|
+
puts "Expected: #{expected_date}"
|
62
|
+
puts "Decoded: #{decoded.utc}"
|
63
|
+
puts
|
64
|
+
end
|
65
|
+
|
66
|
+
# Example 3: Generate IDs for specific dates
|
67
|
+
puts "Example 3: Generate IDs for specific dates"
|
68
|
+
dates = [
|
69
|
+
Time.parse("2020-01-01 00:00:00 UTC"),
|
70
|
+
Time.parse("2023-06-15 12:30:45 UTC"),
|
71
|
+
Time.parse("2024-12-25 18:45:00 UTC")
|
72
|
+
]
|
73
|
+
|
74
|
+
dates.each do |date|
|
75
|
+
msg_id = generate_gmail_message_id(date)
|
76
|
+
decoded = decode_gmail_message_id(msg_id)
|
77
|
+
puts "Original: #{date}"
|
78
|
+
puts "Message ID: #{msg_id}"
|
79
|
+
puts "Decoded: #{decoded}"
|
80
|
+
puts "Accurate: #{(date - decoded).abs < 0.001}"
|
81
|
+
puts
|
82
|
+
end
|
83
|
+
|
84
|
+
# Example 4: Using a seeded Random for reproducible IDs (useful for testing)
|
85
|
+
puts "Example 4: Reproducible IDs with seeded Random"
|
86
|
+
test_time = Time.parse("2024-06-15 12:00:00 UTC")
|
87
|
+
seeded_random = Random.new(12345)
|
88
|
+
|
89
|
+
# Generate the same ID twice with the same seed
|
90
|
+
id1 = generate_gmail_message_id(test_time, random: seeded_random)
|
91
|
+
seeded_random = Random.new(12345) # Reset seed
|
92
|
+
id2 = generate_gmail_message_id(test_time, random: seeded_random)
|
93
|
+
|
94
|
+
puts "Time: #{test_time}"
|
95
|
+
puts "ID (seed 1): #{id1}"
|
96
|
+
puts "ID (seed 2): #{id2}"
|
97
|
+
puts "Reproducible: #{id1 == id2}"
|
98
|
+
puts
|
99
|
+
|
100
|
+
# Generate different IDs with different seeds
|
101
|
+
seeded_random1 = Random.new(11111)
|
102
|
+
seeded_random2 = Random.new(22222)
|
103
|
+
id_a = generate_gmail_message_id(test_time, random: seeded_random1)
|
104
|
+
id_b = generate_gmail_message_id(test_time, random: seeded_random2)
|
105
|
+
|
106
|
+
puts "ID (seed 11111): #{id_a}"
|
107
|
+
puts "ID (seed 22222): #{id_b}"
|
108
|
+
puts "Different: #{id_a != id_b}"
|
109
|
+
puts
|
110
|
+
|
111
|
+
puts "=" * 50
|
112
|
+
puts "Key Points:"
|
113
|
+
puts "- Message IDs are 15-16 hexadecimal digits"
|
114
|
+
puts "- First (n-5) digits encode timestamp in milliseconds"
|
115
|
+
puts "- Last 5 digits are random for uniqueness"
|
116
|
+
puts "- Overflow happened on Nov 3, 2004 (10 → 11 digits for timestamp)"
|
117
|
+
puts "- Optional random: parameter allows reproducible testing"
|
118
|
+
puts "- Uses bytes for ~10x faster random generation"
|
@@ -0,0 +1,55 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path("../lib", __dir__)
|
2
|
+
require "gmail_search_syntax"
|
3
|
+
|
4
|
+
# This example demonstrates the difference between SQLite and PostgreSQL
|
5
|
+
# SQL generation, particularly for relative date operators
|
6
|
+
|
7
|
+
queries = [
|
8
|
+
"older_than:7d",
|
9
|
+
"newer_than:3m",
|
10
|
+
"from:alice@example.com older_than:1y",
|
11
|
+
"subject:meeting newer_than:2d"
|
12
|
+
]
|
13
|
+
|
14
|
+
queries.each do |query_string|
|
15
|
+
puts "\n" + "=" * 80
|
16
|
+
puts "Query: #{query_string}"
|
17
|
+
puts "=" * 80
|
18
|
+
|
19
|
+
ast = GmailSearchSyntax.parse!(query_string)
|
20
|
+
|
21
|
+
# SQLite version
|
22
|
+
puts "\n--- SQLite ---"
|
23
|
+
sqlite_visitor = GmailSearchSyntax::SQLiteVisitor.new(current_user_email: "me@example.com")
|
24
|
+
sqlite_visitor.visit(ast)
|
25
|
+
sqlite_sql, sqlite_params = sqlite_visitor.to_query.to_sql
|
26
|
+
|
27
|
+
puts "SQL:"
|
28
|
+
puts sqlite_sql
|
29
|
+
puts "\nParameters:"
|
30
|
+
sqlite_params.each_with_index do |param, idx|
|
31
|
+
puts " #{idx + 1}. #{param.inspect}"
|
32
|
+
end
|
33
|
+
|
34
|
+
# PostgreSQL version
|
35
|
+
puts "\n--- PostgreSQL ---"
|
36
|
+
postgres_visitor = GmailSearchSyntax::PostgresVisitor.new(current_user_email: "me@example.com")
|
37
|
+
postgres_visitor.visit(ast)
|
38
|
+
postgres_sql, postgres_params = postgres_visitor.to_query.to_sql
|
39
|
+
|
40
|
+
puts "SQL:"
|
41
|
+
puts postgres_sql
|
42
|
+
puts "\nParameters:"
|
43
|
+
postgres_params.each_with_index do |param, idx|
|
44
|
+
puts " #{idx + 1}. #{param.inspect}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
puts "\n" + "=" * 80
|
49
|
+
puts "Key Differences:"
|
50
|
+
puts "=" * 80
|
51
|
+
puts "- SQLite uses: datetime('now', '-7 days')"
|
52
|
+
puts "- PostgreSQL uses: NOW() - '7 days'::interval"
|
53
|
+
puts "- The relative time parameter format differs:"
|
54
|
+
puts " * SQLite: '-7 days' (negative number)"
|
55
|
+
puts " * PostgreSQL: '7 days' (positive number with cast)"
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require_relative "../lib/gmail_search_syntax"
|
2
|
+
|
3
|
+
queries = [
|
4
|
+
"from:amy@example.com",
|
5
|
+
"from:@example.com",
|
6
|
+
"from:amy AND to:bob",
|
7
|
+
"subject:meeting has:attachment",
|
8
|
+
"label:important is:unread",
|
9
|
+
"after:2024/01/01 -from:spam@example.com",
|
10
|
+
'from:(amy@example.com OR bob@example.com) subject:"urgent meeting"',
|
11
|
+
"larger:10M filename:pdf",
|
12
|
+
"category:primary is:starred"
|
13
|
+
]
|
14
|
+
|
15
|
+
queries.each do |query_string|
|
16
|
+
puts "\n" + "=" * 80
|
17
|
+
puts "Query: #{query_string}"
|
18
|
+
puts "=" * 80
|
19
|
+
|
20
|
+
ast = GmailSearchSyntax.parse!(query_string)
|
21
|
+
visitor = GmailSearchSyntax::SqlVisitor.new(current_user_email: "me@example.com")
|
22
|
+
visitor.visit(ast)
|
23
|
+
|
24
|
+
sql, params = visitor.to_query.to_sql
|
25
|
+
|
26
|
+
puts "\nSQL:"
|
27
|
+
puts sql
|
28
|
+
|
29
|
+
puts "\nParameters:"
|
30
|
+
params.each_with_index do |param, idx|
|
31
|
+
puts " #{idx + 1}. #{param.inspect}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "\n" + "=" * 80
|
36
|
+
puts "AROUND operator (generates no-op condition):"
|
37
|
+
puts "=" * 80
|
38
|
+
|
39
|
+
ast = GmailSearchSyntax.parse!("holiday AROUND 10 vacation")
|
40
|
+
visitor = GmailSearchSyntax::SqlVisitor.new
|
41
|
+
visitor.visit(ast)
|
42
|
+
sql, _ = visitor.to_query.to_sql
|
43
|
+
|
44
|
+
puts "\nSQL:"
|
45
|
+
puts sql
|
46
|
+
puts "\nParameters:"
|
47
|
+
puts " (none)"
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Gmail Search Operators Reference
|
2
|
+
|
3
|
+
Source: [Gmail Help - Refine searches in Gmail](https://support.google.com/mail/answer/7190?hl=en&co=GENIE.Platform%3DDesktop)
|
4
|
+
|
5
|
+
You can use words or symbols called search operators to filter your Gmail search results. You can also combine operators to filter your results even more.
|
6
|
+
|
7
|
+
## How to Use Search Operators
|
8
|
+
|
9
|
+
1. On your computer, go to Gmail.
|
10
|
+
2. At the top, click the search box.
|
11
|
+
3. Enter a search operator.
|
12
|
+
|
13
|
+
### Tips
|
14
|
+
|
15
|
+
- After you search, you can use the results to set up a filter for these messages.
|
16
|
+
- When using numbers as part of your query, a space or a dash (-) will separate a number while a dot (.) will be a decimal. For example, 01.2047-100 is considered 2 numbers: 01.2047 and 100.
|
17
|
+
|
18
|
+
## Search Operators
|
19
|
+
|
20
|
+
| Search Operator | Description | Example |
|
21
|
+
|---|---|---|
|
22
|
+
| `from:` | Find emails sent from a specific person. | `from:me`<br>`from:amy@example.com` |
|
23
|
+
| `to:` | Find emails sent to a specific person. | `to:me`<br>`to:john@example.com` |
|
24
|
+
| `cc:`<br>`bcc:` | Find emails that include specific people in the "Cc" or "Bcc" fields. | `cc:john@example.com`<br>`bcc:david@example.com` |
|
25
|
+
| `subject:` | Find emails by a word or phrase in the subject line. | `subject:dinner`<br>`subject:anniversary party` |
|
26
|
+
| `after:`<br>`before:`<br>`older:`<br>`newer:` | Search for emails received during a certain time period. | `after:2004/04/16`<br>`after:04/16/2004`<br>`before:2004/04/18`<br>`before:04/18/2004` |
|
27
|
+
| `older_than:`<br>`newer_than:` | Search for emails older or newer than a time period. Use d (day), m (month), or y (year). | `older_than:1y`<br>`newer_than:2d` |
|
28
|
+
| `OR` or `{ }` | Find emails that match one or more of your search criteria. | `from:amy OR from:david`<br>`{from:amy from:david}` |
|
29
|
+
| `AND` | Find emails that match all of your search criteria. | `from:amy AND to:david` |
|
30
|
+
| `-` | Exclude emails from your search criteria. | `dinner -movie` |
|
31
|
+
| `AROUND` | Find emails with words near each other. Use the number to say how many words apart the words can be. Add quotes to find messages in which the word you put first stays first. | `holiday AROUND 10 vacation`<br>`"secret AROUND 25 birthday"` |
|
32
|
+
| `label:` | Find emails under one of your labels. | `label:friends`<br>`label:important` |
|
33
|
+
| `category:` | If you use inbox categories, find emails under one of the categories. | `category:primary`<br>`category:social`<br>`category:promotions`<br>`category:updates`<br>`category:forums`<br>`category:reservations`<br>`category:purchases` |
|
34
|
+
| `has:` | Find emails that include:<br>- Attachments<br>- Inline images<br>- YouTube videos<br>- Drive files<br>- Google Docs<br>- Google Sheets<br>- Google Slides | `has:attachment`<br>`has:youtube`<br>`has:drive`<br>`has:document`<br>`has:spreadsheet`<br>`has:presentation` |
|
35
|
+
| `list:` | Find emails from a mailing list. | `list:info@example.com` |
|
36
|
+
| `filename:` | Find emails that have attachments with a certain name or file type. | `filename:pdf`<br>`filename:homework.txt` |
|
37
|
+
| `" "` | Search for emails with an exact word or phrase. | `"dinner and movie tonight"` |
|
38
|
+
| `( )` | Group multiple search terms together. | `subject:(dinner movie)` |
|
39
|
+
| `in:anywhere` | Find emails across Gmail. This includes emails in Spam and Trash. | `in:anywhere movie` |
|
40
|
+
| `in:archive` | Search for archived messages. | `in:archive payment reminder` |
|
41
|
+
| `in:snoozed` | Find emails that you snoozed. | `in:snoozed birthday reminder` |
|
42
|
+
| `is:muted` | Find emails that you muted. | `is:muted subject:team celebration` |
|
43
|
+
| `is:` | Search for emails by their status:<br>- Important<br>- Starred<br>- Unread<br>- Read | `is:important`<br>`is:starred`<br>`is:unread`<br>`is:read` |
|
44
|
+
| `has:yellow-star`<br>`has:orange-star`<br>`has:red-star`<br>`has:purple-star`<br>`has:blue-star`<br>`has:green-star`<br>`has:red-bang`<br>`has:orange-guillemet`<br>`has:yellow-bang`<br>`has:green-check`<br>`has:blue-info`<br>`has:purple-question` | If you set up different star options, you can search for emails under a star option. | `has:yellow-star OR has:purple-question` |
|
45
|
+
| `deliveredto:` | Find emails delivered to a specific email address. | `deliveredto:username@example.com` |
|
46
|
+
| `size:`<br>`larger:`<br>`smaller:` | Find emails by their size. | `size:1000000`<br>`larger:10M` |
|
47
|
+
| `+` | Find emails that match a word exactly. | `+unicorn` |
|
48
|
+
| `rfc822msgid:` | Find emails with a specific message-id header. | `rfc822msgid:200503292@example.com` |
|
49
|
+
| `has:userlabels`<br>`has:nouserlabels` | Find emails that have or don't have a label. Labels are only added to a message, and not an entire conversation. | `has:userlabels`<br>`has:nouserlabels` |
|
50
|
+
| `label:encryptedmail` | Find emails sent with Client-side encryption. | `label:encryptedmail` |
|
51
|
+
|
52
|
+
## Additional Notes
|
53
|
+
|
54
|
+
- You can combine operators to create complex searches
|
55
|
+
- Operators work with both implicit AND (space-separated terms) and explicit AND
|
56
|
+
- Use parentheses `()` or braces `{}` to group terms and create complex queries
|
57
|
+
- Operator values can contain expressions, e.g., `from:(alice@ OR bob@)` or `from:{alice@ bob@}`
|
58
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module GmailSearchSyntax
|
2
|
+
module AST
|
3
|
+
class Node
|
4
|
+
def ==(other)
|
5
|
+
self.class == other.class && attributes == other.attributes
|
6
|
+
end
|
7
|
+
|
8
|
+
def attributes
|
9
|
+
instance_variables.map { |var| instance_variable_get(var) }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Operator < Node
|
14
|
+
attr_reader :name, :value
|
15
|
+
|
16
|
+
def initialize(name, value)
|
17
|
+
@name = name
|
18
|
+
@value = value
|
19
|
+
end
|
20
|
+
|
21
|
+
def inspect
|
22
|
+
"#<Operator #{@name}: #{@value.inspect}>"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Text < Node
|
27
|
+
attr_reader :value
|
28
|
+
|
29
|
+
def initialize(value)
|
30
|
+
@value = value
|
31
|
+
end
|
32
|
+
|
33
|
+
def inspect
|
34
|
+
"#<Text #{@value.inspect}>"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class And < Node
|
39
|
+
attr_reader :operands
|
40
|
+
|
41
|
+
def initialize(operands)
|
42
|
+
@operands = operands
|
43
|
+
end
|
44
|
+
|
45
|
+
def inspect
|
46
|
+
"#<And #{@operands.map(&:inspect).join(" AND ")}>"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class Or < Node
|
51
|
+
attr_reader :operands
|
52
|
+
|
53
|
+
def initialize(operands)
|
54
|
+
@operands = operands
|
55
|
+
end
|
56
|
+
|
57
|
+
def inspect
|
58
|
+
"#<Or #{@operands.map(&:inspect).join(" OR ")}>"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class Not < Node
|
63
|
+
attr_reader :child
|
64
|
+
|
65
|
+
def initialize(child)
|
66
|
+
@child = child
|
67
|
+
end
|
68
|
+
|
69
|
+
def inspect
|
70
|
+
"#<Not #{@child.inspect}>"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class Group < Node
|
75
|
+
attr_reader :children
|
76
|
+
|
77
|
+
def initialize(children = [])
|
78
|
+
@children = children
|
79
|
+
end
|
80
|
+
|
81
|
+
def inspect
|
82
|
+
"#<Group #{@children.inspect}>"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class Around < Node
|
87
|
+
attr_reader :left, :distance, :right
|
88
|
+
|
89
|
+
def initialize(left, distance, right)
|
90
|
+
@left = left
|
91
|
+
@distance = distance
|
92
|
+
@right = right
|
93
|
+
end
|
94
|
+
|
95
|
+
def inspect
|
96
|
+
"#<Around #{@left.inspect} AROUND #{@distance} #{@right.inspect}>"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
module GmailSearchSyntax
|
2
|
+
class Parser
|
3
|
+
OPERATORS = %w[
|
4
|
+
from to cc bcc subject after before older newer older_than newer_than
|
5
|
+
label category has list filename in is deliveredto size larger smaller
|
6
|
+
rfc822msgid
|
7
|
+
].freeze
|
8
|
+
|
9
|
+
def initialize(tokens)
|
10
|
+
@tokens = tokens
|
11
|
+
@position = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse!
|
15
|
+
children = []
|
16
|
+
|
17
|
+
until eof?
|
18
|
+
node = parse_expression
|
19
|
+
children << node if node
|
20
|
+
end
|
21
|
+
|
22
|
+
if children.empty?
|
23
|
+
raise GmailSearchSyntax::EmptyQueryError, "Query cannot be empty"
|
24
|
+
end
|
25
|
+
|
26
|
+
children.first
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def current_token
|
32
|
+
@tokens[@position]
|
33
|
+
end
|
34
|
+
|
35
|
+
def peek_token(offset = 1)
|
36
|
+
@tokens[@position + offset]
|
37
|
+
end
|
38
|
+
|
39
|
+
def advance
|
40
|
+
@position += 1
|
41
|
+
end
|
42
|
+
|
43
|
+
def eof?
|
44
|
+
current_token.nil? || current_token.type == :eof
|
45
|
+
end
|
46
|
+
|
47
|
+
def parse_expression
|
48
|
+
parse_or_expression
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse_or_expression
|
52
|
+
operands = [parse_and_expression]
|
53
|
+
|
54
|
+
while current_token&.type == :or
|
55
|
+
advance
|
56
|
+
operands << parse_and_expression
|
57
|
+
end
|
58
|
+
|
59
|
+
(operands.length == 1) ? operands.first : AST::Or.new(operands)
|
60
|
+
end
|
61
|
+
|
62
|
+
def parse_and_expression
|
63
|
+
operands = []
|
64
|
+
|
65
|
+
first = parse_around_expression
|
66
|
+
operands << first if first
|
67
|
+
|
68
|
+
while current_token&.type == :and
|
69
|
+
advance
|
70
|
+
operand = parse_around_expression
|
71
|
+
operands << operand if operand
|
72
|
+
end
|
73
|
+
|
74
|
+
while !eof? && current_token.type != :or && current_token.type != :rparen &&
|
75
|
+
current_token.type != :rbrace && current_token.type != :and
|
76
|
+
operand = parse_around_expression
|
77
|
+
break unless operand
|
78
|
+
operands << operand
|
79
|
+
end
|
80
|
+
|
81
|
+
return nil if operands.empty?
|
82
|
+
(operands.length == 1) ? operands.first : AST::And.new(operands)
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse_around_expression
|
86
|
+
left = parse_unary_expression
|
87
|
+
|
88
|
+
if current_token&.type == :around
|
89
|
+
advance
|
90
|
+
distance = 5
|
91
|
+
|
92
|
+
if current_token&.type == :number
|
93
|
+
distance = current_token.value
|
94
|
+
advance
|
95
|
+
end
|
96
|
+
|
97
|
+
right = parse_unary_expression
|
98
|
+
return AST::Around.new(left, distance, right)
|
99
|
+
end
|
100
|
+
|
101
|
+
left
|
102
|
+
end
|
103
|
+
|
104
|
+
def parse_unary_expression
|
105
|
+
if current_token&.type == :minus
|
106
|
+
advance
|
107
|
+
child = parse_primary_expression
|
108
|
+
return AST::Not.new(child)
|
109
|
+
end
|
110
|
+
|
111
|
+
if current_token&.type == :plus
|
112
|
+
advance
|
113
|
+
return parse_primary_expression
|
114
|
+
end
|
115
|
+
|
116
|
+
parse_primary_expression
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_primary_expression
|
120
|
+
return nil if eof?
|
121
|
+
|
122
|
+
case current_token.type
|
123
|
+
when :lparen
|
124
|
+
parse_parentheses
|
125
|
+
when :lbrace
|
126
|
+
parse_braces
|
127
|
+
when :word
|
128
|
+
parse_operator_or_text
|
129
|
+
when :quoted_string
|
130
|
+
value = current_token.value
|
131
|
+
advance
|
132
|
+
AST::Text.new(value)
|
133
|
+
when :email, :number, :date, :relative_time
|
134
|
+
value = current_token.value
|
135
|
+
advance
|
136
|
+
AST::Text.new(value)
|
137
|
+
else
|
138
|
+
advance
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def parse_parentheses
|
144
|
+
advance
|
145
|
+
|
146
|
+
children = []
|
147
|
+
while !eof? && current_token.type != :rparen
|
148
|
+
node = parse_expression
|
149
|
+
children << node if node
|
150
|
+
break if current_token.type == :rparen
|
151
|
+
end
|
152
|
+
|
153
|
+
advance if current_token&.type == :rparen
|
154
|
+
|
155
|
+
(children.length == 1) ? children.first : AST::Group.new(children)
|
156
|
+
end
|
157
|
+
|
158
|
+
def parse_braces
|
159
|
+
advance
|
160
|
+
|
161
|
+
children = []
|
162
|
+
while !eof? && current_token.type != :rbrace
|
163
|
+
node = parse_unary_expression
|
164
|
+
children << node if node
|
165
|
+
break if current_token.type == :rbrace
|
166
|
+
end
|
167
|
+
|
168
|
+
advance if current_token&.type == :rbrace
|
169
|
+
|
170
|
+
(children.length == 1) ? children.first : AST::Or.new(children)
|
171
|
+
end
|
172
|
+
|
173
|
+
def parse_operator_or_text
|
174
|
+
word = current_token.value
|
175
|
+
|
176
|
+
if OPERATORS.include?(word.downcase) && peek_token&.type == :colon
|
177
|
+
operator_name = word.downcase
|
178
|
+
advance
|
179
|
+
advance
|
180
|
+
|
181
|
+
value = parse_operator_value
|
182
|
+
return AST::Operator.new(operator_name, value)
|
183
|
+
end
|
184
|
+
|
185
|
+
advance
|
186
|
+
AST::Text.new(word)
|
187
|
+
end
|
188
|
+
|
189
|
+
def parse_operator_value
|
190
|
+
return nil if eof?
|
191
|
+
|
192
|
+
case current_token.type
|
193
|
+
when :word
|
194
|
+
value = current_token.value
|
195
|
+
advance
|
196
|
+
value
|
197
|
+
when :email
|
198
|
+
value = current_token.value
|
199
|
+
advance
|
200
|
+
value
|
201
|
+
when :quoted_string
|
202
|
+
value = current_token.value
|
203
|
+
advance
|
204
|
+
value
|
205
|
+
when :number
|
206
|
+
value = current_token.value
|
207
|
+
advance
|
208
|
+
value
|
209
|
+
when :date
|
210
|
+
value = current_token.value
|
211
|
+
advance
|
212
|
+
value
|
213
|
+
when :relative_time
|
214
|
+
value = current_token.value
|
215
|
+
advance
|
216
|
+
value
|
217
|
+
when :lparen
|
218
|
+
parse_parentheses
|
219
|
+
when :lbrace
|
220
|
+
parse_braces
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|