pg_fulltext 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -7
- data/lib/pg_fulltext/query.rb +23 -43
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44e2749ca060cca87ad4d046c44544150a6ec7d68a4e0f834a2f51284c86107d
|
4
|
+
data.tar.gz: fe18a7c9bbe15f6914aa0ae659f3728330bbace4dbae5a68850f80636234090f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c239868d0fd3cf6823a5ffc95820fe2388c23365ce18f1da1441a6e78a55d74ce2b9fb94147b2ff4b836c38ddca15a41a845d958adaa292ab09af9fe34e4d29b
|
7
|
+
data.tar.gz: edda1972f580fe94582c8256cc17cb701cab36673e415e665e908146070d3f6d35a38075477e21cb8198da1c4512a346e30811d0de3344e162b95951cfbaf1a3
|
data/README.md
CHANGED
@@ -41,11 +41,12 @@ configured per the following options:
|
|
41
41
|
|
42
42
|
| Option | Default | Description |
|
43
43
|
| ---------------- | :------ | :----------- |
|
44
|
-
| `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here |
|
45
|
-
| `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated |
|
46
|
-
| `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false
|
47
|
-
| `
|
48
|
-
| `
|
44
|
+
| `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here. |
|
45
|
+
| `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated. |
|
46
|
+
| `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false`. |
|
47
|
+
| `prefix` | `true` | Default search will match partial words as well as whole words. Set this to `false` if only whole words should be matched. |
|
48
|
+
| `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank`. |
|
49
|
+
| `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true`. |
|
49
50
|
| `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
|
50
51
|
|
51
52
|
## Standalone Configuration
|
@@ -56,10 +57,13 @@ string for you to use as you wish.
|
|
56
57
|
Something this should do the trick:
|
57
58
|
|
58
59
|
```ruby
|
60
|
+
db = PG.connect(dbname: 'mydb')
|
59
61
|
search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
|
60
|
-
|
62
|
+
tsv_query = db.escape_string(PgFulltext::Query.to_tsquery_string(search_string))
|
63
|
+
sql = <<~SQL
|
61
64
|
SELECT *
|
62
65
|
FROM my_model
|
63
|
-
WHERE tsv @@
|
66
|
+
WHERE tsv @@ to_tsquery('portuguese', '#{tsv_query}')
|
64
67
|
SQL
|
68
|
+
db.exec(sql)
|
65
69
|
```
|
data/lib/pg_fulltext/query.rb
CHANGED
@@ -1,52 +1,34 @@
|
|
1
|
+
require 'rltk/lexer'
|
2
|
+
|
1
3
|
module PgFulltext
|
2
4
|
module Query
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
query = normalize_query(query)
|
7
|
-
|
8
|
-
# Collect terms
|
9
|
-
terms = []
|
5
|
+
class Lexer < RLTK::Lexer
|
6
|
+
rule(/!+/)
|
7
|
+
rule(/"+/)
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
negate_phrase = false
|
9
|
+
rule(/(!?)[\p{L}!]+/) { |v| [:WORD, v] }
|
10
|
+
rule(/"[\p{L}\s!]+"/) { |v| [:PHRASE, v[1..-2]] }
|
11
|
+
rule(/!"[\p{L}\s!]+"/) { |v| [:NOT_PHRASE, v[2..-2]] }
|
15
12
|
|
16
|
-
|
17
|
-
|
13
|
+
rule(/\s+/)
|
14
|
+
rule(/[^\p{L}^\s^"]+/)
|
15
|
+
end
|
18
16
|
|
19
|
-
|
20
|
-
|
17
|
+
def self.to_tsquery_string(query, prefix: true, operator: '&')
|
18
|
+
query = normalize_query(query)
|
21
19
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
elsif phrase_terms.length > 0
|
32
|
-
if term.end_with?('"')
|
33
|
-
phrase_terms << format_term(term[0..-2], prefix: prefix)
|
34
|
-
terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms).join(' <-> ')})"
|
35
|
-
phrase_terms = []
|
36
|
-
negate_phrase = false
|
37
|
-
else
|
38
|
-
phrase_terms << format_term(term, prefix: prefix)
|
39
|
-
end
|
40
|
-
else
|
41
|
-
terms << format_term(term, prefix: prefix)
|
42
|
-
end
|
20
|
+
terms = []
|
21
|
+
Lexer.lex(query).each do |token|
|
22
|
+
if token.type == :WORD
|
23
|
+
terms << format_term(token.value, prefix: prefix)
|
24
|
+
elsif %i[PHRASE NOT_PHRASE].include?(token.type)
|
25
|
+
phrase_terms = Lexer.lex(token.value).map do |phrase_term|
|
26
|
+
phrase_term.value.nil? ? nil : format_term(phrase_term.value, prefix: prefix)
|
27
|
+
end.compact
|
28
|
+
terms << "#{'!' if token.type == :NOT_PHRASE}(#{phrase_terms.join(' <-> ')})"
|
43
29
|
end
|
44
|
-
else
|
45
|
-
query.gsub! /["]/, ''
|
46
|
-
terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) })
|
47
30
|
end
|
48
31
|
|
49
|
-
# Join terms with operator
|
50
32
|
terms.join(" #{operator} ")
|
51
33
|
end
|
52
34
|
|
@@ -55,9 +37,7 @@ module PgFulltext
|
|
55
37
|
def self.normalize_query(query)
|
56
38
|
query
|
57
39
|
.gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
|
58
|
-
.gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode,
|
59
|
-
.gsub(/"+/, '"') # Replace Repeat quotes with single double-quote
|
60
|
-
.gsub(/!+/, '!') # Replace Repeat bangs with single bang
|
40
|
+
.gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode, whitespace, quotes ("), and bangs (!)
|
61
41
|
.gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
|
62
42
|
.strip # Strip space from beginning and end of line
|
63
43
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Robertson
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '5.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rltk
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 3.0.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 3.0.1
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rspec
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|