pg_fulltext 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +65 -0
- data/Rakefile +7 -0
- data/lib/pg_fulltext/active_record.rb +7 -6
- data/lib/pg_fulltext/query.rb +22 -10
- metadata +64 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e4fa6010e29f5c15b3381805189d6b01e917324260dd2184aa9b15d2c33366f
|
4
|
+
data.tar.gz: 578ccbf2ffe79bb48c37971a33e86566693f10130c555f218bc0b86fd2190c08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b394cce3e4e6691c346a28252582eec471fff714dc09a95c2627a0c2e1dc411c11969ad5de86c5b2d26d5e156c8d05cc0bbdf9c367b40cee71d3b2eef6e7299
|
7
|
+
data.tar.gz: 17e35ccf3e285df319a30382220e3dd5faf1495d46b80d9a5b0fd6d9e8cc29ad98d98c0fc7e09da530462d5ec42fb3d5b2c455c1ce804be5a4297fa206df8403
|
data/README.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# pg_fulltext
|
2
|
+
|
3
|
+
A pretty reasonable PostgreSQL fulltext implementation with minimal configuration
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```shell
|
8
|
+
gem install pg_fulltext
|
9
|
+
```
|
10
|
+
|
11
|
+
or add the following to your `Gemfile`:
|
12
|
+
|
13
|
+
```shell
|
14
|
+
gem 'pg_fulltext'
|
15
|
+
```
|
16
|
+
|
17
|
+
and run `bundle install`
|
18
|
+
|
19
|
+
## Rails Configuration
|
20
|
+
|
21
|
+
This implementation assumes you have a `tsv` column on your model, and that you're generating a string appropriate for
|
22
|
+
the language you're using (we don't specify a default, so it will probably default to `'english'` depending on your
|
23
|
+
Postgres implementation):
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
class MyModel
|
27
|
+
include PgFulltext::ActiveRecord
|
28
|
+
|
29
|
+
add_search_scope
|
30
|
+
end
|
31
|
+
```
|
32
|
+
|
33
|
+
You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
MyModel.search('foo bar "include this phrase" !butnotthis !"and and also not this"')
|
37
|
+
```
|
38
|
+
|
39
|
+
The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
|
40
|
+
configured per the following options:
|
41
|
+
|
42
|
+
| Option | Default | Description |
|
43
|
+
| ---------------- | :------ | :----------- |
|
44
|
+
| `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here |
|
45
|
+
| `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated |
|
46
|
+
| `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false` |
|
47
|
+
| `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank` |
|
48
|
+
| `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true` |
|
49
|
+
| `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
|
50
|
+
|
51
|
+
## Standalone Configuration
|
52
|
+
|
53
|
+
There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
|
54
|
+
string for you to use as you wish.
|
55
|
+
|
56
|
+
Something this should do the trick:
|
57
|
+
|
58
|
+
```ruby
|
59
|
+
search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
|
60
|
+
sql_query = <<~SQL
|
61
|
+
SELECT *
|
62
|
+
FROM my_model
|
63
|
+
WHERE tsv @@ to_tsvector('portuguese', #{PgFulltext::Query.to_tsquery_string(query)})
|
64
|
+
SQL
|
65
|
+
```
|
data/Rakefile
ADDED
@@ -19,15 +19,15 @@ module PgFulltext
|
|
19
19
|
relation,
|
20
20
|
query,
|
21
21
|
tsvector_column: :tsv,
|
22
|
-
search_type:
|
22
|
+
search_type: nil,
|
23
23
|
order: true,
|
24
|
+
prefix: true,
|
24
25
|
reorder: false,
|
25
26
|
any_word: false,
|
26
27
|
ignore_accents: false
|
27
28
|
)
|
28
29
|
serial = SecureRandom.hex(4)
|
29
|
-
|
30
|
-
pk_quoted = "#{table_quoted}.#{connection.quote_column_name(primary_key)}"
|
30
|
+
pk_quoted = "#{quoted_table_name}.#{connection.quote_column_name(primary_key)}"
|
31
31
|
fulltext_join_name = "pg_fulltext_#{serial}"
|
32
32
|
|
33
33
|
# Build the search relation to join on
|
@@ -37,6 +37,7 @@ module PgFulltext
|
|
37
37
|
tsvector_column: tsvector_column,
|
38
38
|
search_type: search_type,
|
39
39
|
any_word: any_word,
|
40
|
+
prefix: prefix,
|
40
41
|
ignore_accents: ignore_accents,
|
41
42
|
)
|
42
43
|
|
@@ -58,13 +59,13 @@ module PgFulltext
|
|
58
59
|
tsvector_column: :tsv,
|
59
60
|
search_type: nil,
|
60
61
|
any_word: false,
|
62
|
+
prefix: true,
|
61
63
|
ignore_accents: false
|
62
64
|
)
|
63
|
-
tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&'))
|
65
|
+
tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&', prefix: prefix))
|
64
66
|
tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
|
65
|
-
table_quoted = connection.quote_table_name(table_name)
|
66
67
|
column_quoted = connection.quote_column_name(tsvector_column)
|
67
|
-
fqc_quoted = "#{
|
68
|
+
fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
|
68
69
|
tsquery = "to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
|
69
70
|
|
70
71
|
relation
|
data/lib/pg_fulltext/query.rb
CHANGED
@@ -2,10 +2,8 @@ module PgFulltext
|
|
2
2
|
module Query
|
3
3
|
def self.to_tsquery_string(query, prefix: true, operator: '&')
|
4
4
|
|
5
|
-
#
|
6
|
-
query
|
7
|
-
query.gsub!(/"+/, '"')
|
8
|
-
query.gsub!(/\s+/, ' ')
|
5
|
+
# Normalize search string to a more parseable equivalent
|
6
|
+
query = normalize_query(query)
|
9
7
|
|
10
8
|
# Collect terms
|
11
9
|
terms = []
|
@@ -21,15 +19,19 @@ module PgFulltext
|
|
21
19
|
# Skip if completely comprised of non-unicode word characters
|
22
20
|
next if term.gsub(/[^\s\p{L}]/, '') == ''
|
23
21
|
|
24
|
-
if term.start_with?('!"') &&
|
25
|
-
|
22
|
+
if term.start_with?('!"') && term.end_with?('"')
|
23
|
+
terms << format_term("!#{term[2..-2]}", prefix: prefix)
|
24
|
+
elsif term.start_with?('"') && term.end_with?('"')
|
25
|
+
terms << format_term(term[1..-2], prefix: prefix)
|
26
|
+
elsif term.start_with?('!"') && !term.end_with?('"')
|
27
|
+
phrase_terms << format_term(term[2..-1], prefix: prefix)
|
26
28
|
negate_phrase = true
|
27
29
|
elsif term.start_with?('"') && !term.end_with?('"')
|
28
|
-
phrase_terms << format_term(term[1..-1], prefix:
|
30
|
+
phrase_terms << format_term(term[1..-1], prefix: prefix)
|
29
31
|
elsif phrase_terms.length > 0
|
30
32
|
if term.end_with?('"')
|
31
33
|
phrase_terms << format_term(term[0..-2], prefix: prefix)
|
32
|
-
terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms
|
34
|
+
terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms).join(' <-> ')})"
|
33
35
|
phrase_terms = []
|
34
36
|
negate_phrase = false
|
35
37
|
else
|
@@ -41,7 +43,7 @@ module PgFulltext
|
|
41
43
|
end
|
42
44
|
else
|
43
45
|
query.gsub! /["]/, ''
|
44
|
-
terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) }
|
46
|
+
terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) })
|
45
47
|
end
|
46
48
|
|
47
49
|
# Join terms with operator
|
@@ -50,6 +52,16 @@ module PgFulltext
|
|
50
52
|
|
51
53
|
private
|
52
54
|
|
55
|
+
def self.normalize_query(query)
|
56
|
+
query
|
57
|
+
.gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
|
58
|
+
.gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode, quote ("), and bangs (!)
|
59
|
+
.gsub(/"+/, '"') # Replace Repeat quotes with single double-quote
|
60
|
+
.gsub(/!+/, '!') # Replace Repeat bangs with single bang
|
61
|
+
.gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
|
62
|
+
.strip # Strip space from beginning and end of line
|
63
|
+
end
|
64
|
+
|
53
65
|
def self.format_term(term, prefix: true)
|
54
66
|
# Remove any ! that's not at the beginning of the term, as it will break the query
|
55
67
|
term.gsub!(/(?<!^)!/, '')
|
@@ -58,7 +70,7 @@ module PgFulltext
|
|
58
70
|
"#{term}#{':*' if prefix}"
|
59
71
|
end
|
60
72
|
|
61
|
-
def self.reject_falsy(terms
|
73
|
+
def self.reject_falsy(terms)
|
62
74
|
false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
|
63
75
|
terms.reject { |v| false_values.include?(v) }
|
64
76
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Robertson
|
@@ -9,20 +9,80 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2021-10-11 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activesupport
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: with_model
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
13
69
|
description: Allows simple searching with a variety of options
|
14
70
|
email: adam@arcreative.net
|
15
71
|
executables: []
|
16
72
|
extensions: []
|
17
73
|
extra_rdoc_files: []
|
18
74
|
files:
|
75
|
+
- README.md
|
76
|
+
- Rakefile
|
19
77
|
- lib/pg_fulltext.rb
|
20
78
|
- lib/pg_fulltext/active_record.rb
|
21
79
|
- lib/pg_fulltext/query.rb
|
22
80
|
homepage: https://github.com/arcreative/pg_fulltext
|
23
81
|
licenses:
|
24
82
|
- MIT
|
25
|
-
metadata:
|
83
|
+
metadata:
|
84
|
+
homepage_uri: https://github.com/arcreative/pg_fulltext
|
85
|
+
source_code_uri: https://github.com/arcreative/pg_fulltext
|
26
86
|
post_install_message:
|
27
87
|
rdoc_options: []
|
28
88
|
require_paths:
|
@@ -38,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
38
98
|
- !ruby/object:Gem::Version
|
39
99
|
version: '0'
|
40
100
|
requirements: []
|
41
|
-
rubygems_version: 3.1.
|
101
|
+
rubygems_version: 3.1.6
|
42
102
|
signing_key:
|
43
103
|
specification_version: 4
|
44
104
|
summary: PostgreSQL fulltext search
|