RubyGems - gmail_search_syntax - Versions diffs - 0.1.0 → 0.1.2 - Mend

gmail_search_syntax 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +4 -4
data/GMAIL_BEHAVIOR_COMPARISON.md +166 -0
data/GMAIL_COMPATIBILITY_COMPLETE.md +236 -0
data/IMPLEMENTATION_NOTES.md +174 -0
data/README.md +2 -2
data/examples/escaped_quotes_demo.rb +152 -0
data/examples/gmail_comparison_demo.rb +82 -0
data/examples/text_vs_substring_demo.rb +93 -0
data/lib/gmail_search_syntax/ast.rb +14 -2
data/lib/gmail_search_syntax/parser.rb +45 -27
data/lib/gmail_search_syntax/sql_visitor.rb +22 -5
data/lib/gmail_search_syntax/tokenizer.rb +47 -12
data/lib/gmail_search_syntax/version.rb +1 -1
data/test/gmail_search_syntax_test.rb +246 -186
data/test/sql_visitor_test.rb +44 -1
data/test/tokenizer_test.rb +204 -118
metadata +7 -1

data/test/sql_visitor_test.rb CHANGED Viewed

@@ -244,9 +244,21 @@ class SqlVisitorTest < Minitest::Test
   def test_plain_text_search
     sql, params = parse_and_visit("meeting")
+    # Text nodes now use word boundary matching
+    assert_includes sql, "m0.subject = ?"
     assert_includes sql, "m0.subject LIKE ?"
+    assert_includes sql, "m0.body = ?"
     assert_includes sql, "m0.body LIKE ?"
-    assert_equal ["meeting", "%meeting%", "%meeting%"], params
+    assert_equal ["meeting", "meeting %", "% meeting", "% meeting %", "meeting", "meeting %", "% meeting", "% meeting %"], params
+  end
+  def test_quoted_text_search_uses_substring
+    sql, params = parse_and_visit('"meeting"')
+    # Quoted strings create Substring nodes which use LIKE %value%
+    assert_includes sql, "m0.subject LIKE ?"
+    assert_includes sql, "m0.body LIKE ?"
+    assert_equal ["%meeting%", "%meeting%"], params
   end
   def test_complex_query
@@ -343,4 +355,35 @@ class SqlVisitorTest < Minitest::Test
     join_count = sql.scan("INNER JOIN message_addresses").length
     assert_equal 2, join_count
   end
+  def test_quoted_string_with_escaped_quotes
+    sql, params = parse_and_visit('"She said \\"hello\\" to me"')
+    assert_includes sql, "m0.subject LIKE ?"
+    assert_includes sql, "m0.body LIKE ?"
+    assert_equal ['%She said "hello" to me%', '%She said "hello" to me%'], params
+  end
+  def test_subject_with_escaped_quotes
+    sql, params = parse_and_visit('subject:"Meeting: \\"Q1 Review\\""')
+    assert_includes sql, "m0.subject LIKE ?"
+    assert_equal ['%Meeting: "Q1 Review"%'], params
+  end
+  def test_unquoted_token_with_escaped_quote
+    sql, params = parse_and_visit('meeting\\"room')
+    # Unquoted tokens use word boundary matching
+    assert_includes sql, "m0.subject = ?"
+    assert_includes sql, "m0.body = ?"
+    assert_equal ['meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %', 'meeting"room', 'meeting"room %', '% meeting"room', '% meeting"room %'], params
+  end
+  def test_operator_with_unquoted_escaped_quote
+    sql, params = parse_and_visit('subject:test\\"value')
+    assert_includes sql, "m0.subject LIKE ?"
+    assert_equal ['%test"value%'], params
+  end
 end

data/test/tokenizer_test.rb CHANGED Viewed

@@ -5,181 +5,267 @@ class TokenizerTest < Minitest::Test
     GmailSearchSyntax::Tokenizer.new(input).tokenize
   end
+  def assert_token_stream(expected_tokens, actual_tokens)
+    assert expected_tokens.length > 0
+    assert_equal expected_tokens.length, actual_tokens.length, "Expected #{expected_tokens.length} tokens, got #{actual_tokens.length}"
+    expected_tokens.each_with_index do |expected_token, index|
+      actual_token = actual_tokens[index]
+      expected_token.each do |property, expected_value|
+        actual_value = actual_token.public_send(property)
+        assert_equal expected_value, actual_value, "Token #{index} #{actual_token}: expected #{property} to be #{expected_value.inspect}, got #{actual_value.inspect}"
+      end
+    end
+  end
   def test_tokenize_simple_from
     tokens = tokenize("from:amy@example.com")
-    assert_equal 4, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "from", tokens[0].value
-    assert_equal :colon, tokens[1].type
-    assert_equal :email, tokens[2].type
-    assert_equal "amy@example.com", tokens[2].value
-    assert_equal :eof, tokens[3].type
+    expected = [
+      {type: :word, value: "from"},
+      {type: :colon},
+      {type: :email, value: "amy@example.com"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_quoted_string
     tokens = tokenize('"hello world"')
-    assert_equal 2, tokens.length
-    assert_equal :quoted_string, tokens[0].type
-    assert_equal "hello world", tokens[0].value
+    expected = [
+      {type: :quoted_string, value: "hello world"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_operators
     tokens = tokenize("from:amy@example.com OR to:bob@example.com")
-    assert_equal 8, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "from", tokens[0].value
-    assert_equal :colon, tokens[1].type
-    assert_equal :email, tokens[2].type
-    assert_equal "amy@example.com", tokens[2].value
-    assert_equal :or, tokens[3].type
-    assert_equal :word, tokens[4].type
-    assert_equal "to", tokens[4].value
-    assert_equal :colon, tokens[5].type
-    assert_equal :email, tokens[6].type
-    assert_equal "bob@example.com", tokens[6].value
-    assert_equal :eof, tokens[7].type
+    expected = [
+      {type: :word, value: "from"},
+      {type: :colon},
+      {type: :email, value: "amy@example.com"},
+      {type: :or},
+      {type: :word, value: "to"},
+      {type: :colon},
+      {type: :email, value: "bob@example.com"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_parentheses
     tokens = tokenize("subject:(meeting call)")
-    assert_equal 7, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "subject", tokens[0].value
-    assert_equal :colon, tokens[1].type
-    assert_equal :lparen, tokens[2].type
-    assert_equal :word, tokens[3].type
-    assert_equal "meeting", tokens[3].value
-    assert_equal :word, tokens[4].type
-    assert_equal "call", tokens[4].value
-    assert_equal :rparen, tokens[5].type
-    assert_equal :eof, tokens[6].type
+    expected = [
+      {type: :word, value: "subject"},
+      {type: :colon},
+      {type: :lparen},
+      {type: :word, value: "meeting"},
+      {type: :word, value: "call"},
+      {type: :rparen},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_braces
     tokens = tokenize("{from:a from:b}")
-    assert_equal 9, tokens.length
-    assert_equal :lbrace, tokens[0].type
-    assert_equal :word, tokens[1].type
-    assert_equal "from", tokens[1].value
-    assert_equal :colon, tokens[2].type
-    assert_equal :word, tokens[3].type
-    assert_equal "a", tokens[3].value
-    assert_equal :word, tokens[4].type
-    assert_equal "from", tokens[4].value
-    assert_equal :colon, tokens[5].type
-    assert_equal :word, tokens[6].type
-    assert_equal "b", tokens[6].value
-    assert_equal :rbrace, tokens[7].type
-    assert_equal :eof, tokens[8].type
+    expected = [
+      {type: :lbrace},
+      {type: :word, value: "from"},
+      {type: :colon},
+      {type: :word, value: "a"},
+      {type: :word, value: "from"},
+      {type: :colon},
+      {type: :word, value: "b"},
+      {type: :rbrace},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_negation
     tokens = tokenize("dinner -movie")
-    assert_equal 4, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "dinner", tokens[0].value
-    assert_equal :minus, tokens[1].type
-    assert_equal :word, tokens[2].type
-    assert_equal "movie", tokens[2].value
+    expected = [
+      {type: :word, value: "dinner"},
+      {type: :minus},
+      {type: :word, value: "movie"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_around
     tokens = tokenize("holiday AROUND 10 vacation")
-    assert_equal 5, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "holiday", tokens[0].value
-    assert_equal :around, tokens[1].type
-    assert_equal :number, tokens[2].type
-    assert_equal 10, tokens[2].value
-    assert_equal :word, tokens[3].type
-    assert_equal "vacation", tokens[3].value
-    assert_equal :eof, tokens[4].type
+    expected = [
+      {type: :word, value: "holiday"},
+      {type: :around},
+      {type: :number, value: 10},
+      {type: :word, value: "vacation"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_date
     tokens = tokenize("after:2004/04/16")
-    assert_equal 4, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "after", tokens[0].value
-    assert_equal :colon, tokens[1].type
-    assert_equal :date, tokens[2].type
-    assert_equal "2004/04/16", tokens[2].value
+    expected = [
+      {type: :word, value: "after"},
+      {type: :colon},
+      {type: :date, value: "2004/04/16"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_relative_time
     tokens = tokenize("older_than:1y")
-    assert_equal 4, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "older_than", tokens[0].value
-    assert_equal :colon, tokens[1].type
-    assert_equal :relative_time, tokens[2].type
-    assert_equal "1y", tokens[2].value
+    expected = [
+      {type: :word, value: "older_than"},
+      {type: :colon},
+      {type: :relative_time, value: "1y"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_number
     tokens = tokenize("size:1000000")
-    assert_equal 4, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "size", tokens[0].value
-    assert_equal :colon, tokens[1].type
-    assert_equal :number, tokens[2].type
-    assert_equal 1000000, tokens[2].value
+    expected = [
+      {type: :word, value: "size"},
+      {type: :colon},
+      {type: :number, value: 1000000},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_and_operator
     tokens = tokenize("from:amy@example.com AND to:bob@example.com")
-    and_token = tokens.find { |t| t.type == :and }
-    refute_nil and_token
-    assert_equal "AND", and_token.value
+    expected = [
+      {type: :word, value: "from"},
+      {type: :colon},
+      {type: :email, value: "amy@example.com"},
+      {type: :and, value: "AND"},
+      {type: :word, value: "to"},
+      {type: :colon},
+      {type: :email, value: "bob@example.com"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_plus
     tokens = tokenize("+unicorn")
-    assert_equal 3, tokens.length
-    assert_equal :plus, tokens[0].type
-    assert_equal :word, tokens[1].type
-    assert_equal "unicorn", tokens[1].value
+    expected = [
+      {type: :plus},
+      {type: :word, value: "unicorn"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_complex_query
     tokens = tokenize('from:boss@example.com subject:"urgent meeting" has:attachment')
-    assert_equal 10, tokens.length
-    assert_equal :word, tokens[0].type
-    assert_equal "from", tokens[0].value
-    assert_equal :colon, tokens[1].type
-    assert_equal :email, tokens[2].type
-    assert_equal "boss@example.com", tokens[2].value
-    assert_equal :word, tokens[3].type
-    assert_equal "subject", tokens[3].value
-    assert_equal :colon, tokens[4].type
-    assert_equal :quoted_string, tokens[5].type
-    assert_equal "urgent meeting", tokens[5].value
-    assert_equal :word, tokens[6].type
-    assert_equal "has", tokens[6].value
-    assert_equal :colon, tokens[7].type
-    assert_equal :word, tokens[8].type
-    assert_equal "attachment", tokens[8].value
-    assert_equal :eof, tokens[9].type
+    expected = [
+      {type: :word, value: "from"},
+      {type: :colon},
+      {type: :email, value: "boss@example.com"},
+      {type: :word, value: "subject"},
+      {type: :colon},
+      {type: :quoted_string, value: "urgent meeting"},
+      {type: :word, value: "has"},
+      {type: :colon},
+      {type: :word, value: "attachment"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_email_with_plus
     tokens = tokenize("to:user+tag@example.com")
-    email_token = tokens.find { |t| t.type == :email }
-    assert_equal "user+tag@example.com", email_token.value
+    expected = [
+      {type: :word, value: "to"},
+      {type: :colon},
+      {type: :email, value: "user+tag@example.com"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
   def test_tokenize_multiple_words
     tokens = tokenize("project report meeting")
-    word_tokens = tokens.select { |t| t.type == :word }
-    assert_equal 3, word_tokens.length
-    assert_equal "project", word_tokens[0].value
-    assert_equal "report", word_tokens[1].value
-    assert_equal "meeting", word_tokens[2].value
+    expected = [
+      {type: :word, value: "project"},
+      {type: :word, value: "report"},
+      {type: :word, value: "meeting"},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
+  end
+  def test_tokenize_quoted_string_with_escaped_quote
+    tokens = tokenize('"She said \\"hello\\" to me"')
+    expected = [
+      {type: :quoted_string, value: 'She said "hello" to me'},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
+  end
+  def test_tokenize_quoted_string_with_escaped_backslash
+    tokens = tokenize('"path\\\\to\\\\file"')
+    expected = [
+      {type: :quoted_string, value: 'path\\to\\file'},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
+  end
+  def test_tokenize_quoted_string_with_multiple_escapes
+    tokens = tokenize('"test \\"nested\\" and \\\\ slash"')
+    expected = [
+      {type: :quoted_string, value: 'test "nested" and \\ slash'},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
+  end
+  def test_tokenize_word_with_escaped_quote
+    tokens = tokenize('meeting\\"room')
+    expected = [
+      {type: :word, value: 'meeting"room'},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
+  end
+  def test_tokenize_word_with_escaped_backslash
+    tokens = tokenize('path\\\\to')
+    expected = [
+      {type: :word, value: 'path\\to'},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
+  end
+  def test_tokenize_multiple_words_with_escapes
+    tokens = tokenize('meeting\\"room another\\\\word')
+    expected = [
+      {type: :word, value: 'meeting"room'},
+      {type: :word, value: 'another\\word'},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
+  end
+  def test_tokenize_operator_value_with_escaped_quote
+    tokens = tokenize('subject:test\\"value')
+    expected = [
+      {type: :word, value: "subject"},
+      {type: :colon},
+      {type: :word, value: 'test"value'},
+      {type: :eof}
+    ]
+    assert_token_stream(expected, tokens)
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: gmail_search_syntax
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.2
 platform: ruby
 authors:
 - me@julik.nl
@@ -70,14 +70,20 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ARCHITECTURE.md
+- GMAIL_BEHAVIOR_COMPARISON.md
+- GMAIL_COMPATIBILITY_COMPLETE.md
+- IMPLEMENTATION_NOTES.md
 - README.md
 - Rakefile
 - SCHEMA.md
 - examples/alias_collision_fix.rb
 - examples/demo.rb
+- examples/escaped_quotes_demo.rb
+- examples/gmail_comparison_demo.rb
 - examples/gmail_message_id_demo.rb
 - examples/postgres_vs_sqlite.rb
 - examples/sql_query.rb
+- examples/text_vs_substring_demo.rb
 - lib/GMAIL_SEARCH_OPERATORS.md
 - lib/gmail_search_syntax.rb
 - lib/gmail_search_syntax/ast.rb