pg_fulltext 0.2.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -21
- data/lib/pg_fulltext/active_record.rb +3 -6
- data/lib/pg_fulltext.rb +0 -1
- metadata +3 -4
- data/lib/pg_fulltext/query.rb +0 -58
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5561eb4580813730f611fe05d6bbf17e6a152f09d79e91f4c90f3e754407c184
|
4
|
+
data.tar.gz: 1843ebf4389d67f22666787d2203d869a365e953d4fa9705db6efbfacdc28428
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9852ff9a4843ff90aa94ef5ec1500621d293d29bdd0ea68846d82435be174e2b4f7f4286765830c1a4b66e3cd416386581fbcb971a95eed5711c3574ab11b30b
|
7
|
+
data.tar.gz: bed84f7603d1b0f926ce21ccb81c0202103fe3e25966a0729735bad4979ba1b1d3605c571cb380dcd7f3e68ad5ae1573a8dfb710747b0f87835dd578010cc650
|
data/README.md
CHANGED
@@ -33,7 +33,7 @@ end
|
|
33
33
|
You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
|
34
34
|
|
35
35
|
```ruby
|
36
|
-
MyModel.search('foo bar "include this phrase"
|
36
|
+
MyModel.search('foo bar "include this phrase" -butnotthis -"and and also not this phrase"')
|
37
37
|
```
|
38
38
|
|
39
39
|
The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
|
@@ -46,24 +46,4 @@ configured per the following options:
|
|
46
46
|
| `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false`. |
|
47
47
|
| `prefix` | `true` | Default search will match partial words as well as whole words. Set this to `false` if only whole words should be matched. |
|
48
48
|
| `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank`. |
|
49
|
-
| `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true`. |
|
50
49
|
| `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
|
51
|
-
|
52
|
-
## Standalone Configuration
|
53
|
-
|
54
|
-
There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
|
55
|
-
string for you to use as you wish.
|
56
|
-
|
57
|
-
Something this should do the trick:
|
58
|
-
|
59
|
-
```ruby
|
60
|
-
db = PG.connect(dbname: 'mydb')
|
61
|
-
search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
|
62
|
-
tsv_query = db.escape_string(PgFulltext::Query.to_tsquery_string(search_string))
|
63
|
-
sql = <<~SQL
|
64
|
-
SELECT *
|
65
|
-
FROM my_model
|
66
|
-
WHERE tsv @@ to_tsquery('portuguese', '#{tsv_query}')
|
67
|
-
SQL
|
68
|
-
db.exec(sql)
|
69
|
-
```
|
@@ -23,7 +23,6 @@ module PgFulltext
|
|
23
23
|
order: true,
|
24
24
|
prefix: true,
|
25
25
|
reorder: false,
|
26
|
-
any_word: false,
|
27
26
|
ignore_accents: false
|
28
27
|
)
|
29
28
|
serial = SecureRandom.hex(4)
|
@@ -36,7 +35,6 @@ module PgFulltext
|
|
36
35
|
query,
|
37
36
|
tsvector_column: tsvector_column,
|
38
37
|
search_type: search_type,
|
39
|
-
any_word: any_word,
|
40
38
|
prefix: prefix,
|
41
39
|
ignore_accents: ignore_accents,
|
42
40
|
)
|
@@ -58,16 +56,15 @@ module PgFulltext
|
|
58
56
|
query,
|
59
57
|
tsvector_column: :tsv,
|
60
58
|
search_type: nil,
|
61
|
-
any_word: false,
|
62
59
|
prefix: true,
|
63
60
|
ignore_accents: false
|
64
61
|
)
|
65
|
-
tsquery_string_quoted = connection.quote(
|
62
|
+
tsquery_string_quoted = connection.quote(query)
|
66
63
|
tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
|
67
64
|
column_quoted = connection.quote_column_name(tsvector_column)
|
68
65
|
fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
|
69
|
-
tsquery = "
|
70
|
-
|
66
|
+
tsquery = "websearch_to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
|
67
|
+
tsquery = "regexp_replace(#{tsquery}::text, '''([a-z0-9\\-_@.]+)''', '''\\1'':*', 'g')::tsquery" if prefix
|
71
68
|
relation
|
72
69
|
.select(:id, "ts_rank_cd(#{fqc_quoted}, #{tsquery}) AS rank")
|
73
70
|
.where("#{fqc_quoted} @@ #{tsquery}")
|
data/lib/pg_fulltext.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Robertson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -90,7 +90,6 @@ files:
|
|
90
90
|
- Rakefile
|
91
91
|
- lib/pg_fulltext.rb
|
92
92
|
- lib/pg_fulltext/active_record.rb
|
93
|
-
- lib/pg_fulltext/query.rb
|
94
93
|
homepage: https://github.com/arcreative/pg_fulltext
|
95
94
|
licenses:
|
96
95
|
- MIT
|
@@ -112,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
111
|
- !ruby/object:Gem::Version
|
113
112
|
version: '0'
|
114
113
|
requirements: []
|
115
|
-
rubygems_version: 3.
|
114
|
+
rubygems_version: 3.2.33
|
116
115
|
signing_key:
|
117
116
|
specification_version: 4
|
118
117
|
summary: PostgreSQL fulltext search
|
data/lib/pg_fulltext/query.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require 'rltk/lexer'
|
2
|
-
|
3
|
-
module PgFulltext
|
4
|
-
module Query
|
5
|
-
class Lexer < RLTK::Lexer
|
6
|
-
rule(/!+/)
|
7
|
-
rule(/"+/)
|
8
|
-
|
9
|
-
rule(/(!?)[0-9\p{L}!]+/) { |v| [:WORD, v] }
|
10
|
-
rule(/"[\p{L}\s!]+"/) { |v| [:PHRASE, v[1..-2]] }
|
11
|
-
rule(/!"[\p{L}\s!]+"/) { |v| [:NOT_PHRASE, v[2..-2]] }
|
12
|
-
|
13
|
-
rule(/\s+/)
|
14
|
-
rule(/[^\p{L}\s"]+/)
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.to_tsquery_string(query, prefix: true, operator: '&')
|
18
|
-
query = normalize_query(query)
|
19
|
-
|
20
|
-
terms = []
|
21
|
-
Lexer.lex(query).each do |token|
|
22
|
-
if token.type == :WORD
|
23
|
-
terms << format_term(token.value, prefix: prefix)
|
24
|
-
elsif %i[PHRASE NOT_PHRASE].include?(token.type)
|
25
|
-
phrase_terms = Lexer.lex(token.value).map do |phrase_term|
|
26
|
-
phrase_term.value.nil? ? nil : format_term(phrase_term.value, prefix: prefix)
|
27
|
-
end.compact
|
28
|
-
terms << "#{'!' if token.type == :NOT_PHRASE}(#{phrase_terms.join(' <-> ')})"
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
terms.join(" #{operator} ")
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def self.normalize_query(query)
|
38
|
-
query
|
39
|
-
.gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
|
40
|
-
.gsub(/[^\s\p{L}0-9"!]/, '') # Remove all non-unicode, whitespace, numbers, quotes ("), and bangs (!)
|
41
|
-
.gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
|
42
|
-
.strip # Strip space from beginning and end of line
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.format_term(term, prefix: true)
|
46
|
-
# Remove any ! that's not at the beginning of the term, as it will break the query
|
47
|
-
term.gsub!(/(?<!^)!/, '')
|
48
|
-
|
49
|
-
# Add the prefix if prefix is set
|
50
|
-
"#{term}#{':*' if prefix}"
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.reject_falsy(terms)
|
54
|
-
false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
|
55
|
-
terms.reject { |v| false_values.include?(v) }
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|