pg_fulltext 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +11 -7
  3. data/lib/pg_fulltext/query.rb +23 -43
  4. metadata +15 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6e4fa6010e29f5c15b3381805189d6b01e917324260dd2184aa9b15d2c33366f
4
- data.tar.gz: 578ccbf2ffe79bb48c37971a33e86566693f10130c555f218bc0b86fd2190c08
3
+ metadata.gz: 44e2749ca060cca87ad4d046c44544150a6ec7d68a4e0f834a2f51284c86107d
4
+ data.tar.gz: fe18a7c9bbe15f6914aa0ae659f3728330bbace4dbae5a68850f80636234090f
5
5
  SHA512:
6
- metadata.gz: 8b394cce3e4e6691c346a28252582eec471fff714dc09a95c2627a0c2e1dc411c11969ad5de86c5b2d26d5e156c8d05cc0bbdf9c367b40cee71d3b2eef6e7299
7
- data.tar.gz: 17e35ccf3e285df319a30382220e3dd5faf1495d46b80d9a5b0fd6d9e8cc29ad98d98c0fc7e09da530462d5ec42fb3d5b2c455c1ce804be5a4297fa206df8403
6
+ metadata.gz: c239868d0fd3cf6823a5ffc95820fe2388c23365ce18f1da1441a6e78a55d74ce2b9fb94147b2ff4b836c38ddca15a41a845d958adaa292ab09af9fe34e4d29b
7
+ data.tar.gz: edda1972f580fe94582c8256cc17cb701cab36673e415e665e908146070d3f6d35a38075477e21cb8198da1c4512a346e30811d0de3344e162b95951cfbaf1a3
data/README.md CHANGED
@@ -41,11 +41,12 @@ configured per the following options:
41
41
 
42
42
  | Option | Default | Description |
43
43
  | ---------------- | :------ | :----------- |
44
- | `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here |
45
- | `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated |
46
- | `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false` |
47
- | `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank` |
48
- | `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true` |
44
+ | `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here. |
45
+ | `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated. |
46
+ | `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false`. |
47
+ | `prefix` | `true` | Default search will match partial words as well as whole words. Set this to `false` if only whole words should be matched. |
48
+ | `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank`. |
49
+ | `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true`. |
49
50
  | `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
50
51
 
51
52
  ## Standalone Configuration
@@ -56,10 +57,13 @@ string for you to use as you wish.
56
57
  Something this should do the trick:
57
58
 
58
59
  ```ruby
60
+ db = PG.connect(dbname: 'mydb')
59
61
  search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
60
- sql_query = <<~SQL
62
+ tsv_query = db.escape_string(PgFulltext::Query.to_tsquery_string(search_string))
63
+ sql = <<~SQL
61
64
  SELECT *
62
65
  FROM my_model
63
- WHERE tsv @@ to_tsvector('portuguese', #{PgFulltext::Query.to_tsquery_string(query)})
66
+ WHERE tsv @@ to_tsquery('portuguese', '#{tsv_query}')
64
67
  SQL
68
+ db.exec(sql)
65
69
  ```
@@ -1,52 +1,34 @@
1
+ require 'rltk/lexer'
2
+
1
3
  module PgFulltext
2
4
  module Query
3
- def self.to_tsquery_string(query, prefix: true, operator: '&')
4
-
5
- # Normalize search string to a more parseable equivalent
6
- query = normalize_query(query)
7
-
8
- # Collect terms
9
- terms = []
5
+ class Lexer < RLTK::Lexer
6
+ rule(/!+/)
7
+ rule(/"+/)
10
8
 
11
- # Phrase mode
12
- if query.count('"') > 0 && query.count('"') % 2 == 0
13
- phrase_terms = []
14
- negate_phrase = false
9
+ rule(/(!?)[\p{L}!]+/) { |v| [:WORD, v] }
10
+ rule(/"[\p{L}\s!]+"/) { |v| [:PHRASE, v[1..-2]] }
11
+ rule(/!"[\p{L}\s!]+"/) { |v| [:NOT_PHRASE, v[2..-2]] }
15
12
 
16
- query_parts = query.split(' ')
17
- query_parts.each do |term|
13
+ rule(/\s+/)
14
+ rule(/[^\p{L}^\s^"]+/)
15
+ end
18
16
 
19
- # Skip if completely comprised of non-unicode word characters
20
- next if term.gsub(/[^\s\p{L}]/, '') == ''
17
+ def self.to_tsquery_string(query, prefix: true, operator: '&')
18
+ query = normalize_query(query)
21
19
 
22
- if term.start_with?('!"') && term.end_with?('"')
23
- terms << format_term("!#{term[2..-2]}", prefix: prefix)
24
- elsif term.start_with?('"') && term.end_with?('"')
25
- terms << format_term(term[1..-2], prefix: prefix)
26
- elsif term.start_with?('!"') && !term.end_with?('"')
27
- phrase_terms << format_term(term[2..-1], prefix: prefix)
28
- negate_phrase = true
29
- elsif term.start_with?('"') && !term.end_with?('"')
30
- phrase_terms << format_term(term[1..-1], prefix: prefix)
31
- elsif phrase_terms.length > 0
32
- if term.end_with?('"')
33
- phrase_terms << format_term(term[0..-2], prefix: prefix)
34
- terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms).join(' <-> ')})"
35
- phrase_terms = []
36
- negate_phrase = false
37
- else
38
- phrase_terms << format_term(term, prefix: prefix)
39
- end
40
- else
41
- terms << format_term(term, prefix: prefix)
42
- end
20
+ terms = []
21
+ Lexer.lex(query).each do |token|
22
+ if token.type == :WORD
23
+ terms << format_term(token.value, prefix: prefix)
24
+ elsif %i[PHRASE NOT_PHRASE].include?(token.type)
25
+ phrase_terms = Lexer.lex(token.value).map do |phrase_term|
26
+ phrase_term.value.nil? ? nil : format_term(phrase_term.value, prefix: prefix)
27
+ end.compact
28
+ terms << "#{'!' if token.type == :NOT_PHRASE}(#{phrase_terms.join(' <-> ')})"
43
29
  end
44
- else
45
- query.gsub! /["]/, ''
46
- terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) })
47
30
  end
48
31
 
49
- # Join terms with operator
50
32
  terms.join(" #{operator} ")
51
33
  end
52
34
 
@@ -55,9 +37,7 @@ module PgFulltext
55
37
  def self.normalize_query(query)
56
38
  query
57
39
  .gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
58
- .gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode, quote ("), and bangs (!)
59
- .gsub(/"+/, '"') # Replace Repeat quotes with single double-quote
60
- .gsub(/!+/, '!') # Replace Repeat bangs with single bang
40
+ .gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode, whitespace, quotes ("), and bangs (!)
61
41
  .gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
62
42
  .strip # Strip space from beginning and end of line
63
43
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Robertson
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rltk
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 3.0.1
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement