pg_fulltext 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49c8f3b3250945a7e3e600a100951abffe80a0537e14fcf392d96778d9648777
4
- data.tar.gz: efef8f9be9540e961015472dc3f498203c62a37856ef4f7c66f00fad30e7e5a2
3
+ metadata.gz: 6e4fa6010e29f5c15b3381805189d6b01e917324260dd2184aa9b15d2c33366f
4
+ data.tar.gz: 578ccbf2ffe79bb48c37971a33e86566693f10130c555f218bc0b86fd2190c08
5
5
  SHA512:
6
- metadata.gz: 2c324ce42e53dd725bec4ee7e11a8763927fe8080059a504ba31e5546b8d528ee76c040ae4a93c6b272a744d413f695583d413d991d1db6afb846f2e8750162e
7
- data.tar.gz: 90e7cd23c65e9ee883c4c65c8fb5c21d08078eedf134f9d42cf989faf8928e2edc05a40ab008c07b367191326873773704de5bc458588aa5c0bcff7edce2ae11
6
+ metadata.gz: 8b394cce3e4e6691c346a28252582eec471fff714dc09a95c2627a0c2e1dc411c11969ad5de86c5b2d26d5e156c8d05cc0bbdf9c367b40cee71d3b2eef6e7299
7
+ data.tar.gz: 17e35ccf3e285df319a30382220e3dd5faf1495d46b80d9a5b0fd6d9e8cc29ad98d98c0fc7e09da530462d5ec42fb3d5b2c455c1ce804be5a4297fa206df8403
data/README.md ADDED
@@ -0,0 +1,65 @@
1
+ # pg_fulltext
2
+
3
+ A pretty reasonable PostgreSQL fulltext implementation with minimal configuration
4
+
5
+ ## Installation
6
+
7
+ ```shell
8
+ gem install pg_fulltext
9
+ ```
10
+
11
+ or add the following to your `Gemfile`:
12
+
13
+ ```shell
14
+ gem 'pg_fulltext'
15
+ ```
16
+
17
+ and run `bundle install`
18
+
19
+ ## Rails Configuration
20
+
21
+ This implementation assumes you have a `tsv` column on your model, and that you're generating a string appropriate for
22
+ the language you're using (we don't specify a default, so it will probably default to `'english'` depending on your
23
+ Postgres implementation):
24
+
25
+ ```ruby
26
+ class MyModel
27
+ include PgFulltext::ActiveRecord
28
+
29
+ add_search_scope
30
+ end
31
+ ```
32
+
33
+ You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
34
+
35
+ ```ruby
36
+ MyModel.search('foo bar "include this phrase" !butnotthis !"and and also not this"')
37
+ ```
38
+
39
+ The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
40
+ configured per the following options:
41
+
42
+ | Option | Default | Description |
43
+ | ---------------- | :------ | :----------- |
44
+ | `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here |
45
+ | `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated |
46
+ | `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false` |
47
+ | `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank` |
48
+ | `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true` |
49
+ | `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
50
+
51
+ ## Standalone Configuration
52
+
53
+ There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
54
+ string for you to use as you wish.
55
+
56
+ Something this should do the trick:
57
+
58
+ ```ruby
59
+ search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
60
+ sql_query = <<~SQL
61
+ SELECT *
62
+ FROM my_model
63
+ WHERE tsv @@ to_tsvector('portuguese', #{PgFulltext::Query.to_tsquery_string(query)})
64
+ SQL
65
+ ```
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: %w[spec]
@@ -19,15 +19,15 @@ module PgFulltext
19
19
  relation,
20
20
  query,
21
21
  tsvector_column: :tsv,
22
- search_type: :simple,
22
+ search_type: nil,
23
23
  order: true,
24
+ prefix: true,
24
25
  reorder: false,
25
26
  any_word: false,
26
27
  ignore_accents: false
27
28
  )
28
29
  serial = SecureRandom.hex(4)
29
- table_quoted = connection.quote_table_name(table_name)
30
- pk_quoted = "#{table_quoted}.#{connection.quote_column_name(primary_key)}"
30
+ pk_quoted = "#{quoted_table_name}.#{connection.quote_column_name(primary_key)}"
31
31
  fulltext_join_name = "pg_fulltext_#{serial}"
32
32
 
33
33
  # Build the search relation to join on
@@ -37,6 +37,7 @@ module PgFulltext
37
37
  tsvector_column: tsvector_column,
38
38
  search_type: search_type,
39
39
  any_word: any_word,
40
+ prefix: prefix,
40
41
  ignore_accents: ignore_accents,
41
42
  )
42
43
 
@@ -58,13 +59,13 @@ module PgFulltext
58
59
  tsvector_column: :tsv,
59
60
  search_type: nil,
60
61
  any_word: false,
62
+ prefix: true,
61
63
  ignore_accents: false
62
64
  )
63
- tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&'))
65
+ tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&', prefix: prefix))
64
66
  tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
65
- table_quoted = connection.quote_table_name(table_name)
66
67
  column_quoted = connection.quote_column_name(tsvector_column)
67
- fqc_quoted = "#{table_quoted}.#{column_quoted}"
68
+ fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
68
69
  tsquery = "to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
69
70
 
70
71
  relation
@@ -2,10 +2,8 @@ module PgFulltext
2
2
  module Query
3
3
  def self.to_tsquery_string(query, prefix: true, operator: '&')
4
4
 
5
- # Parse out all [unicode] non-word and non-quote characters
6
- query.gsub!(/[^\s\p{L}"!]/, '')
7
- query.gsub!(/"+/, '"')
8
- query.gsub!(/\s+/, ' ')
5
+ # Normalize search string to a more parseable equivalent
6
+ query = normalize_query(query)
9
7
 
10
8
  # Collect terms
11
9
  terms = []
@@ -21,15 +19,19 @@ module PgFulltext
21
19
  # Skip if completely comprised of non-unicode word characters
22
20
  next if term.gsub(/[^\s\p{L}]/, '') == ''
23
21
 
24
- if term.start_with?('!"') && !term.end_with?('"')
25
- phrase_terms << format_term(term[2..-1], prefix: true)
22
+ if term.start_with?('!"') && term.end_with?('"')
23
+ terms << format_term("!#{term[2..-2]}", prefix: prefix)
24
+ elsif term.start_with?('"') && term.end_with?('"')
25
+ terms << format_term(term[1..-2], prefix: prefix)
26
+ elsif term.start_with?('!"') && !term.end_with?('"')
27
+ phrase_terms << format_term(term[2..-1], prefix: prefix)
26
28
  negate_phrase = true
27
29
  elsif term.start_with?('"') && !term.end_with?('"')
28
- phrase_terms << format_term(term[1..-1], prefix: true)
30
+ phrase_terms << format_term(term[1..-1], prefix: prefix)
29
31
  elsif phrase_terms.length > 0
30
32
  if term.end_with?('"')
31
33
  phrase_terms << format_term(term[0..-2], prefix: prefix)
32
- terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms, prefix: prefix).join(' <-> ')})"
34
+ terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms).join(' <-> ')})"
33
35
  phrase_terms = []
34
36
  negate_phrase = false
35
37
  else
@@ -41,7 +43,7 @@ module PgFulltext
41
43
  end
42
44
  else
43
45
  query.gsub! /["]/, ''
44
- terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) }, prefix: prefix)
46
+ terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) })
45
47
  end
46
48
 
47
49
  # Join terms with operator
@@ -50,6 +52,16 @@ module PgFulltext
50
52
 
51
53
  private
52
54
 
55
+ def self.normalize_query(query)
56
+ query
57
+ .gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
58
+ .gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode, quote ("), and bangs (!)
59
+ .gsub(/"+/, '"') # Replace Repeat quotes with single double-quote
60
+ .gsub(/!+/, '!') # Replace Repeat bangs with single bang
61
+ .gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
62
+ .strip # Strip space from beginning and end of line
63
+ end
64
+
53
65
  def self.format_term(term, prefix: true)
54
66
  # Remove any ! that's not at the beginning of the term, as it will break the query
55
67
  term.gsub!(/(?<!^)!/, '')
@@ -58,7 +70,7 @@ module PgFulltext
58
70
  "#{term}#{':*' if prefix}"
59
71
  end
60
72
 
61
- def self.reject_falsy(terms, prefix: true)
73
+ def self.reject_falsy(terms)
62
74
  false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
63
75
  terms.reject { |v| false_values.include?(v) }
64
76
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Robertson
@@ -9,20 +9,80 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2021-10-11 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: with_model
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
13
69
  description: Allows simple searching with a variety of options
14
70
  email: adam@arcreative.net
15
71
  executables: []
16
72
  extensions: []
17
73
  extra_rdoc_files: []
18
74
  files:
75
+ - README.md
76
+ - Rakefile
19
77
  - lib/pg_fulltext.rb
20
78
  - lib/pg_fulltext/active_record.rb
21
79
  - lib/pg_fulltext/query.rb
22
80
  homepage: https://github.com/arcreative/pg_fulltext
23
81
  licenses:
24
82
  - MIT
25
- metadata: {}
83
+ metadata:
84
+ homepage_uri: https://github.com/arcreative/pg_fulltext
85
+ source_code_uri: https://github.com/arcreative/pg_fulltext
26
86
  post_install_message:
27
87
  rdoc_options: []
28
88
  require_paths:
@@ -38,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
98
  - !ruby/object:Gem::Version
39
99
  version: '0'
40
100
  requirements: []
41
- rubygems_version: 3.1.4
101
+ rubygems_version: 3.1.6
42
102
  signing_key:
43
103
  specification_version: 4
44
104
  summary: PostgreSQL fulltext search