pg_fulltext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49c8f3b3250945a7e3e600a100951abffe80a0537e14fcf392d96778d9648777
4
- data.tar.gz: efef8f9be9540e961015472dc3f498203c62a37856ef4f7c66f00fad30e7e5a2
3
+ metadata.gz: 6e4fa6010e29f5c15b3381805189d6b01e917324260dd2184aa9b15d2c33366f
4
+ data.tar.gz: 578ccbf2ffe79bb48c37971a33e86566693f10130c555f218bc0b86fd2190c08
5
5
  SHA512:
6
- metadata.gz: 2c324ce42e53dd725bec4ee7e11a8763927fe8080059a504ba31e5546b8d528ee76c040ae4a93c6b272a744d413f695583d413d991d1db6afb846f2e8750162e
7
- data.tar.gz: 90e7cd23c65e9ee883c4c65c8fb5c21d08078eedf134f9d42cf989faf8928e2edc05a40ab008c07b367191326873773704de5bc458588aa5c0bcff7edce2ae11
6
+ metadata.gz: 8b394cce3e4e6691c346a28252582eec471fff714dc09a95c2627a0c2e1dc411c11969ad5de86c5b2d26d5e156c8d05cc0bbdf9c367b40cee71d3b2eef6e7299
7
+ data.tar.gz: 17e35ccf3e285df319a30382220e3dd5faf1495d46b80d9a5b0fd6d9e8cc29ad98d98c0fc7e09da530462d5ec42fb3d5b2c455c1ce804be5a4297fa206df8403
data/README.md ADDED
@@ -0,0 +1,65 @@
1
+ # pg_fulltext
2
+
3
+ A pretty reasonable PostgreSQL fulltext implementation with minimal configuration
4
+
5
+ ## Installation
6
+
7
+ ```shell
8
+ gem install pg_fulltext
9
+ ```
10
+
11
+ or add the following to your `Gemfile`:
12
+
13
+ ```shell
14
+ gem 'pg_fulltext'
15
+ ```
16
+
17
+ and run `bundle install`
18
+
19
+ ## Rails Configuration
20
+
21
+ This implementation assumes you have a `tsv` column on your model, and that you're generating a string appropriate for
22
+ the language you're using (we don't specify a default, so it will probably default to `'english'` depending on your
23
+ Postgres implementation):
24
+
25
+ ```ruby
26
+ class MyModel
27
+ include PgFulltext::ActiveRecord
28
+
29
+ add_search_scope
30
+ end
31
+ ```
32
+
33
+ You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
34
+
35
+ ```ruby
36
+ MyModel.search('foo bar "include this phrase" !butnotthis !"and and also not this"')
37
+ ```
38
+
39
+ The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
40
+ configured per the following options:
41
+
42
+ | Option | Default | Description |
43
+ | ---------------- | :------ | :----------- |
44
+ | `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here |
45
+ | `search_type` | `nil` | Your PostgreSQL probably defaults to `'english'`, but set this to match the tsvector you've generated |
46
+ | `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false` |
47
+ | `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank` |
48
+ | `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true` |
49
+ | `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
50
+
51
+ ## Standalone Configuration
52
+
53
+ There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
54
+ string for you to use as you wish.
55
+
56
+ Something this should do the trick:
57
+
58
+ ```ruby
59
+ search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
60
+ sql_query = <<~SQL
61
+ SELECT *
62
+ FROM my_model
63
+ WHERE tsv @@ to_tsvector('portuguese', #{PgFulltext::Query.to_tsquery_string(query)})
64
+ SQL
65
+ ```
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: %w[spec]
@@ -19,15 +19,15 @@ module PgFulltext
19
19
  relation,
20
20
  query,
21
21
  tsvector_column: :tsv,
22
- search_type: :simple,
22
+ search_type: nil,
23
23
  order: true,
24
+ prefix: true,
24
25
  reorder: false,
25
26
  any_word: false,
26
27
  ignore_accents: false
27
28
  )
28
29
  serial = SecureRandom.hex(4)
29
- table_quoted = connection.quote_table_name(table_name)
30
- pk_quoted = "#{table_quoted}.#{connection.quote_column_name(primary_key)}"
30
+ pk_quoted = "#{quoted_table_name}.#{connection.quote_column_name(primary_key)}"
31
31
  fulltext_join_name = "pg_fulltext_#{serial}"
32
32
 
33
33
  # Build the search relation to join on
@@ -37,6 +37,7 @@ module PgFulltext
37
37
  tsvector_column: tsvector_column,
38
38
  search_type: search_type,
39
39
  any_word: any_word,
40
+ prefix: prefix,
40
41
  ignore_accents: ignore_accents,
41
42
  )
42
43
 
@@ -58,13 +59,13 @@ module PgFulltext
58
59
  tsvector_column: :tsv,
59
60
  search_type: nil,
60
61
  any_word: false,
62
+ prefix: true,
61
63
  ignore_accents: false
62
64
  )
63
- tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&'))
65
+ tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&', prefix: prefix))
64
66
  tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
65
- table_quoted = connection.quote_table_name(table_name)
66
67
  column_quoted = connection.quote_column_name(tsvector_column)
67
- fqc_quoted = "#{table_quoted}.#{column_quoted}"
68
+ fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
68
69
  tsquery = "to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
69
70
 
70
71
  relation
@@ -2,10 +2,8 @@ module PgFulltext
2
2
  module Query
3
3
  def self.to_tsquery_string(query, prefix: true, operator: '&')
4
4
 
5
- # Parse out all [unicode] non-word and non-quote characters
6
- query.gsub!(/[^\s\p{L}"!]/, '')
7
- query.gsub!(/"+/, '"')
8
- query.gsub!(/\s+/, ' ')
5
+ # Normalize search string to a more parseable equivalent
6
+ query = normalize_query(query)
9
7
 
10
8
  # Collect terms
11
9
  terms = []
@@ -21,15 +19,19 @@ module PgFulltext
21
19
  # Skip if completely comprised of non-unicode word characters
22
20
  next if term.gsub(/[^\s\p{L}]/, '') == ''
23
21
 
24
- if term.start_with?('!"') && !term.end_with?('"')
25
- phrase_terms << format_term(term[2..-1], prefix: true)
22
+ if term.start_with?('!"') && term.end_with?('"')
23
+ terms << format_term("!#{term[2..-2]}", prefix: prefix)
24
+ elsif term.start_with?('"') && term.end_with?('"')
25
+ terms << format_term(term[1..-2], prefix: prefix)
26
+ elsif term.start_with?('!"') && !term.end_with?('"')
27
+ phrase_terms << format_term(term[2..-1], prefix: prefix)
26
28
  negate_phrase = true
27
29
  elsif term.start_with?('"') && !term.end_with?('"')
28
- phrase_terms << format_term(term[1..-1], prefix: true)
30
+ phrase_terms << format_term(term[1..-1], prefix: prefix)
29
31
  elsif phrase_terms.length > 0
30
32
  if term.end_with?('"')
31
33
  phrase_terms << format_term(term[0..-2], prefix: prefix)
32
- terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms, prefix: prefix).join(' <-> ')})"
34
+ terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms).join(' <-> ')})"
33
35
  phrase_terms = []
34
36
  negate_phrase = false
35
37
  else
@@ -41,7 +43,7 @@ module PgFulltext
41
43
  end
42
44
  else
43
45
  query.gsub! /["]/, ''
44
- terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) }, prefix: prefix)
46
+ terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) })
45
47
  end
46
48
 
47
49
  # Join terms with operator
@@ -50,6 +52,16 @@ module PgFulltext
50
52
 
51
53
  private
52
54
 
55
+ def self.normalize_query(query)
56
+ query
57
+ .gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
58
+ .gsub(/[^\s\p{L}"!]/, '') # Remove all non-unicode, quote ("), and bangs (!)
59
+ .gsub(/"+/, '"') # Replace Repeat quotes with single double-quote
60
+ .gsub(/!+/, '!') # Replace Repeat bangs with single bang
61
+ .gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
62
+ .strip # Strip space from beginning and end of line
63
+ end
64
+
53
65
  def self.format_term(term, prefix: true)
54
66
  # Remove any ! that's not at the beginning of the term, as it will break the query
55
67
  term.gsub!(/(?<!^)!/, '')
@@ -58,7 +70,7 @@ module PgFulltext
58
70
  "#{term}#{':*' if prefix}"
59
71
  end
60
72
 
61
- def self.reject_falsy(terms, prefix: true)
73
+ def self.reject_falsy(terms)
62
74
  false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
63
75
  terms.reject { |v| false_values.include?(v) }
64
76
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Robertson
@@ -9,20 +9,80 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2021-10-11 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: with_model
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
13
69
  description: Allows simple searching with a variety of options
14
70
  email: adam@arcreative.net
15
71
  executables: []
16
72
  extensions: []
17
73
  extra_rdoc_files: []
18
74
  files:
75
+ - README.md
76
+ - Rakefile
19
77
  - lib/pg_fulltext.rb
20
78
  - lib/pg_fulltext/active_record.rb
21
79
  - lib/pg_fulltext/query.rb
22
80
  homepage: https://github.com/arcreative/pg_fulltext
23
81
  licenses:
24
82
  - MIT
25
- metadata: {}
83
+ metadata:
84
+ homepage_uri: https://github.com/arcreative/pg_fulltext
85
+ source_code_uri: https://github.com/arcreative/pg_fulltext
26
86
  post_install_message:
27
87
  rdoc_options: []
28
88
  require_paths:
@@ -38,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
98
  - !ruby/object:Gem::Version
39
99
  version: '0'
40
100
  requirements: []
41
- rubygems_version: 3.1.4
101
+ rubygems_version: 3.1.6
42
102
  signing_key:
43
103
  specification_version: 4
44
104
  summary: PostgreSQL fulltext search