pg_fulltext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +65 -0
- data/Rakefile +7 -0
- data/lib/pg_fulltext/active_record.rb +7 -6
- data/lib/pg_fulltext/query.rb +22 -10
- metadata +64 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e4fa6010e29f5c15b3381805189d6b01e917324260dd2184aa9b15d2c33366f
|
4
|
+
data.tar.gz: 578ccbf2ffe79bb48c37971a33e86566693f10130c555f218bc0b86fd2190c08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b394cce3e4e6691c346a28252582eec471fff714dc09a95c2627a0c2e1dc411c11969ad5de86c5b2d26d5e156c8d05cc0bbdf9c367b40cee71d3b2eef6e7299
|
7
|
+
data.tar.gz: 17e35ccf3e285df319a30382220e3dd5faf1495d46b80d9a5b0fd6d9e8cc29ad98d98c0fc7e09da530462d5ec42fb3d5b2c455c1ce804be5a4297fa206df8403
|
data/README.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# pg_fulltext
|
2
|
+
|
3
|
+
A pretty reasonable PostgreSQL fulltext implementation with minimal configuration
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```shell
|
8
|
+
gem install pg_fulltext
|
9
|
+
```
|
10
|
+
|
11
|
+
or add the following to your `Gemfile`:
|
12
|
+
|
13
|
+
```shell
|
14
|
+
gem 'pg_fulltext'
|
15
|
+
```
|
16
|
+
|
17
|
+
and run `bundle install`
|
18
|
+
|
19
|
+
## Rails Configuration
|
20
|
+
|
21
|
+
This implementation assumes you have a `tsv` column on your model, and that you're generating a string appropriate for
|
22
|
+
the language you're using (we don't specify a default, so it will probably default to `'english'` depending on your
|
23
|
+
Postgres implementation):
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
class MyModel
|
27
|
+
include PgFulltext::ActiveRecord
|
28
|
+
|
29
|
+
add_search_scope
|
30
|
+
end
|
31
|
+
```
|
32
|
+
|
33
|
+
You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
MyModel.search('foo bar "include this phrase" !butnotthis !"and and also not this"')
|
37
|
+
```
|
38
|
+
|
39
|
+
The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
|
40
|
+
configured per the following options:
|
41
|
+
|
42
|
+
| Option | Default | Description |
|
43
|
+
| ---------------- | :------ | :----------- |
|
44
|
+
| `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here |
|
45
|
+
| `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated |
|
46
|
+
| `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false` |
|
47
|
+
| `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank` |
|
48
|
+
| `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true` |
|
49
|
+
| `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
|
50
|
+
|
51
|
+
## Standalone Configuration
|
52
|
+
|
53
|
+
There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
|
54
|
+
string for you to use as you wish.
|
55
|
+
|
56
|
+
Something this should do the trick:
|
57
|
+
|
58
|
+
```ruby
|
59
|
+
search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
|
60
|
+
sql_query = <<~SQL
|
61
|
+
SELECT *
|
62
|
+
FROM my_model
|
63
|
+
WHERE tsv @@ to_tsvector('portuguese', #{PgFulltext::Query.to_tsquery_string(query)})
|
64
|
+
SQL
|
65
|
+
```
|
data/Rakefile
ADDED
@@ -19,15 +19,15 @@ module PgFulltext
|
|
19
19
|
relation,
|
20
20
|
query,
|
21
21
|
tsvector_column: :tsv,
|
22
|
-
search_type:
|
22
|
+
search_type: nil,
|
23
23
|
order: true,
|
24
|
+
prefix: true,
|
24
25
|
reorder: false,
|
25
26
|
any_word: false,
|
26
27
|
ignore_accents: false
|
27
28
|
)
|
28
29
|
serial = SecureRandom.hex(4)
|
29
|
-
|
30
|
-
pk_quoted = "#{table_quoted}.#{connection.quote_column_name(primary_key)}"
|
30
|
+
pk_quoted = "#{quoted_table_name}.#{connection.quote_column_name(primary_key)}"
|
31
31
|
fulltext_join_name = "pg_fulltext_#{serial}"
|
32
32
|
|
33
33
|
# Build the search relation to join on
|
@@ -37,6 +37,7 @@ module PgFulltext
|
|
37
37
|
tsvector_column: tsvector_column,
|
38
38
|
search_type: search_type,
|
39
39
|
any_word: any_word,
|
40
|
+
prefix: prefix,
|
40
41
|
ignore_accents: ignore_accents,
|
41
42
|
)
|
42
43
|
|
@@ -58,13 +59,13 @@ module PgFulltext
|
|
58
59
|
tsvector_column: :tsv,
|
59
60
|
search_type: nil,
|
60
61
|
any_word: false,
|
62
|
+
prefix: true,
|
61
63
|
ignore_accents: false
|
62
64
|
)
|
63
|
-
tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&'))
|
65
|
+
tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&', prefix: prefix))
|
64
66
|
tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
|
65
|
-
table_quoted = connection.quote_table_name(table_name)
|
66
67
|
column_quoted = connection.quote_column_name(tsvector_column)
|
67
|
-
fqc_quoted = "#{
|
68
|
+
fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
|
68
69
|
tsquery = "to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
|
69
70
|
|
70
71
|
relation
|
data/lib/pg_fulltext/query.rb
CHANGED
@@ -2,10 +2,8 @@ module PgFulltext
|
|
2
2
|
module Query
|
3
3
|
def self.to_tsquery_string(query, prefix: true, operator: '&')
|
4
4
|
|
5
|
-
#
|
6
|
-
query
|
7
|
-
query.gsub!(/"+/, '"')
|
8
|
-
query.gsub!(/\s+/, ' ')
|
5
|
+
# Normalize search string to a more parseable equivalent
|
6
|
+
query = normalize_query(query)
|
9
7
|
|
10
8
|
# Collect terms
|
11
9
|
terms = []
|
@@ -21,15 +19,19 @@ module PgFulltext
|
|
21
19
|
# Skip if completely comprised of non-unicode word characters
|
22
20
|
next if term.gsub(/[^\s\p{L}]/, '') == ''
|
23
21
|
|
24
|
-
if term.start_with?('!"') &&
|
25
|
-
|
22
|
+
if term.start_with?('!"') && term.end_with?('"')
|
23
|
+
terms << format_term("!#{term[2..-2]}", prefix: prefix)
|
24
|
+
elsif term.start_with?('"') && term.end_with?('"')
|
25
|
+
terms << format_term(term[1..-2], prefix: prefix)
|
26
|
+
elsif term.start_with?('!"') && !term.end_with?('"')
|
27
|
+
phrase_terms << format_term(term[2..-1], prefix: prefix)
|
26
28
|
negate_phrase = true
|
27
29
|
elsif term.start_with?('"') && !term.end_with?('"')
|
28
|
-
phrase_terms << format_term(term[1..-1], prefix:
|
30
|
+
phrase_terms << format_term(term[1..-1], prefix: prefix)
|
29
31
|
elsif phrase_terms.length > 0
|
30
32
|
if term.end_with?('"')
|
31
33
|
phrase_terms << format_term(term[0..-2], prefix: prefix)
|
32
|
-
terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms
|
34
|
+
terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms).join(' <-> ')})"
|
33
35
|
phrase_terms = []
|
34
36
|
negate_phrase = false
|
35
37
|
else
|
@@ -41,7 +43,7 @@ module PgFulltext
|
|
41
43
|
end
|
42
44
|
else
|
43
45
|
query.gsub! /["]/, ''
|
44
|
-
terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) }
|
46
|
+
terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) })
|
45
47
|
end
|
46
48
|
|
47
49
|
# Join terms with operator
|
@@ -50,6 +52,16 @@ module PgFulltext
|
|
50
52
|
|
51
53
|
private
|
52
54
|
|
55
|
+
def self.normalize_query(query)
|
56
|
+
query
|
57
|
+
.gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
|
58
|
+
.gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode, quote ("), and bangs (!)
|
59
|
+
.gsub(/"+/, '"') # Replace Repeat quotes with single double-quote
|
60
|
+
.gsub(/!+/, '!') # Replace Repeat bangs with single bang
|
61
|
+
.gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
|
62
|
+
.strip # Strip space from beginning and end of line
|
63
|
+
end
|
64
|
+
|
53
65
|
def self.format_term(term, prefix: true)
|
54
66
|
# Remove any ! that's not at the beginning of the term, as it will break the query
|
55
67
|
term.gsub!(/(?<!^)!/, '')
|
@@ -58,7 +70,7 @@ module PgFulltext
|
|
58
70
|
"#{term}#{':*' if prefix}"
|
59
71
|
end
|
60
72
|
|
61
|
-
def self.reject_falsy(terms
|
73
|
+
def self.reject_falsy(terms)
|
62
74
|
false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
|
63
75
|
terms.reject { |v| false_values.include?(v) }
|
64
76
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Robertson
|
@@ -9,20 +9,80 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2021-10-11 00:00:00.000000000 Z
|
12
|
-
dependencies:
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activesupport
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: with_model
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
13
69
|
description: Allows simple searching with a variety of options
|
14
70
|
email: adam@arcreative.net
|
15
71
|
executables: []
|
16
72
|
extensions: []
|
17
73
|
extra_rdoc_files: []
|
18
74
|
files:
|
75
|
+
- README.md
|
76
|
+
- Rakefile
|
19
77
|
- lib/pg_fulltext.rb
|
20
78
|
- lib/pg_fulltext/active_record.rb
|
21
79
|
- lib/pg_fulltext/query.rb
|
22
80
|
homepage: https://github.com/arcreative/pg_fulltext
|
23
81
|
licenses:
|
24
82
|
- MIT
|
25
|
-
metadata:
|
83
|
+
metadata:
|
84
|
+
homepage_uri: https://github.com/arcreative/pg_fulltext
|
85
|
+
source_code_uri: https://github.com/arcreative/pg_fulltext
|
26
86
|
post_install_message:
|
27
87
|
rdoc_options: []
|
28
88
|
require_paths:
|
@@ -38,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
38
98
|
- !ruby/object:Gem::Version
|
39
99
|
version: '0'
|
40
100
|
requirements: []
|
41
|
-
rubygems_version: 3.1.
|
101
|
+
rubygems_version: 3.1.6
|
42
102
|
signing_key:
|
43
103
|
specification_version: 4
|
44
104
|
summary: PostgreSQL fulltext search
|