pg_fulltext 0.2.3 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f4f614bb0fa282ac0bb258122139efd64f81ac8f04eb3dd4fd2c0e80c9f9d5a
4
- data.tar.gz: c4292d5984bf2e3774b7f73948173ea2b4f022d2d29989b3a57222b3b516b02a
3
+ metadata.gz: 176bed19822e89a050d214f8e7b077fa1082b7c5d46bad5686d4c122cdcf9947
4
+ data.tar.gz: a70d4baec8fde810854725dfadb101a7942a5e62e2d69de4861f8560cdec44ec
5
5
  SHA512:
6
- metadata.gz: 1890385e76798f005d56c8005aa32b23badbd9ad770822b61962f37016cbe9997ebe9dc090d0948654c29a0178b939c6c472ed80b4c4fed671e0015781444ed5
7
- data.tar.gz: af5b0b8d46c2a5ee73ee5de50cdc5c6b9f8e36bcb616830207db36cd32072a6a2fe7b173baf35da5fb9f938b80da14cda44cc9d70f966e6e3704a8b230ab0a2e
6
+ metadata.gz: 9c7652680618afe308366943bff2d55c87a54dbad8b5707758a0670942facf35c5f67100aebe23eb716e2ab02443494b4740f54862dc682ec1b8c42c178b44c0
7
+ data.tar.gz: 9ab936b7086175869500406f63778b31dd96ae734d28b13af1913f1c3ee80e91ca8e299eabb1d9e699e96c34cc2df067564978dec8cdc7a04743d54c7f4bc8ee
data/README.md CHANGED
@@ -33,7 +33,7 @@ end
33
33
  You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
34
34
 
35
35
  ```ruby
36
- MyModel.search('foo bar "include this phrase" !butnotthis !"and and also not this"')
36
+ MyModel.search('foo bar "include this phrase" -butnotthis -"and and also not this phrase"')
37
37
  ```
38
38
 
39
39
  The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
@@ -46,24 +46,4 @@ configured per the following options:
46
46
  | `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false`. |
47
47
  | `prefix` | `true` | Default search will match partial words as well as whole words. Set this to `false` if only whole words should be matched. |
48
48
  | `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank`. |
49
- | `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true`. |
50
49
  | `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
51
-
52
- ## Standalone Configuration
53
-
54
- There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
55
- string for you to use as you wish.
56
-
57
- Something this should do the trick:
58
-
59
- ```ruby
60
- db = PG.connect(dbname: 'mydb')
61
- search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
62
- tsv_query = db.escape_string(PgFulltext::Query.to_tsquery_string(search_string))
63
- sql = <<~SQL
64
- SELECT *
65
- FROM my_model
66
- WHERE tsv @@ to_tsquery('portuguese', '#{tsv_query}')
67
- SQL
68
- db.exec(sql)
69
- ```
@@ -23,7 +23,6 @@ module PgFulltext
23
23
  order: true,
24
24
  prefix: true,
25
25
  reorder: false,
26
- any_word: false,
27
26
  ignore_accents: false
28
27
  )
29
28
  serial = SecureRandom.hex(4)
@@ -36,7 +35,6 @@ module PgFulltext
36
35
  query,
37
36
  tsvector_column: tsvector_column,
38
37
  search_type: search_type,
39
- any_word: any_word,
40
38
  prefix: prefix,
41
39
  ignore_accents: ignore_accents,
42
40
  )
@@ -58,16 +56,15 @@ module PgFulltext
58
56
  query,
59
57
  tsvector_column: :tsv,
60
58
  search_type: nil,
61
- any_word: false,
62
59
  prefix: true,
63
60
  ignore_accents: false
64
61
  )
65
- tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&', prefix: prefix))
62
+ tsquery_string_quoted = connection.quote(query)
66
63
  tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
67
64
  column_quoted = connection.quote_column_name(tsvector_column)
68
65
  fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
69
- tsquery = "to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
70
-
66
+ tsquery = "websearch_to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
67
+ tsquery = "regexp_replace(#{tsquery}::text, '''([a-z0-9\\-_@.]+)''', '''\\1'':*', 'g')::tsquery" if prefix
71
68
  relation
72
69
  .select(:id, "ts_rank_cd(#{fqc_quoted}, #{tsquery}) AS rank")
73
70
  .where("#{fqc_quoted} @@ #{tsquery}")
data/lib/pg_fulltext.rb CHANGED
@@ -1,4 +1,3 @@
1
1
  module PgFulltext; end
2
2
 
3
3
  require 'pg_fulltext/active_record'
4
- require 'pg_fulltext/query'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Robertson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-22 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '5.0'
41
- - !ruby/object:Gem::Dependency
42
- name: rltk
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: 3.0.1
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: 3.0.1
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: rspec
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -90,7 +76,6 @@ files:
90
76
  - Rakefile
91
77
  - lib/pg_fulltext.rb
92
78
  - lib/pg_fulltext/active_record.rb
93
- - lib/pg_fulltext/query.rb
94
79
  homepage: https://github.com/arcreative/pg_fulltext
95
80
  licenses:
96
81
  - MIT
@@ -112,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
97
  - !ruby/object:Gem::Version
113
98
  version: '0'
114
99
  requirements: []
115
- rubygems_version: 3.1.6
100
+ rubygems_version: 3.2.33
116
101
  signing_key:
117
102
  specification_version: 4
118
103
  summary: PostgreSQL fulltext search
@@ -1,58 +0,0 @@
1
- require 'rltk/lexer'
2
-
3
- module PgFulltext
4
- module Query
5
- class Lexer < RLTK::Lexer
6
- rule(/!+/)
7
- rule(/"+/)
8
-
9
- rule(/(!?)[0-9\p{L}!]+/) { |v| [:WORD, v] }
10
- rule(/"[\p{L}\s!]+"/) { |v| [:PHRASE, v[1..-2]] }
11
- rule(/!"[\p{L}\s!]+"/) { |v| [:NOT_PHRASE, v[2..-2]] }
12
-
13
- rule(/\s+/)
14
- rule(/[^\p{L}\s"]+/)
15
- end
16
-
17
- def self.to_tsquery_string(query, prefix: true, operator: '&')
18
- query = normalize_query(query)
19
-
20
- terms = []
21
- Lexer.lex(query).each do |token|
22
- if token.type == :WORD
23
- terms << format_term(token.value, prefix: prefix)
24
- elsif %i[PHRASE NOT_PHRASE].include?(token.type)
25
- phrase_terms = Lexer.lex(token.value).map do |phrase_term|
26
- phrase_term.value.nil? ? nil : format_term(phrase_term.value, prefix: prefix)
27
- end.compact
28
- terms << "#{'!' if token.type == :NOT_PHRASE}(#{phrase_terms.join(' <-> ')})"
29
- end
30
- end
31
-
32
- terms.join(" #{operator} ")
33
- end
34
-
35
- private
36
-
37
- def self.normalize_query(query)
38
- query
39
- .gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
40
- .gsub(/[^\s\p{L}0-9"!]/, '') # Remove all non-unicode, whitespace, numbers, quotes ("), and bangs (!)
41
- .gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
42
- .strip # Strip space from beginning and end of line
43
- end
44
-
45
- def self.format_term(term, prefix: true)
46
- # Remove any ! that's not at the beginning of the term, as it will break the query
47
- term.gsub!(/(?<!^)!/, '')
48
-
49
- # Add the prefix if prefix is set
50
- "#{term}#{':*' if prefix}"
51
- end
52
-
53
- def self.reject_falsy(terms)
54
- false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
55
- terms.reject { |v| false_values.include?(v) }
56
- end
57
- end
58
- end