pg_fulltext 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49c8f3b3250945a7e3e600a100951abffe80a0537e14fcf392d96778d9648777
4
- data.tar.gz: efef8f9be9540e961015472dc3f498203c62a37856ef4f7c66f00fad30e7e5a2
3
+ metadata.gz: 2f4f614bb0fa282ac0bb258122139efd64f81ac8f04eb3dd4fd2c0e80c9f9d5a
4
+ data.tar.gz: c4292d5984bf2e3774b7f73948173ea2b4f022d2d29989b3a57222b3b516b02a
5
5
  SHA512:
6
- metadata.gz: 2c324ce42e53dd725bec4ee7e11a8763927fe8080059a504ba31e5546b8d528ee76c040ae4a93c6b272a744d413f695583d413d991d1db6afb846f2e8750162e
7
- data.tar.gz: 90e7cd23c65e9ee883c4c65c8fb5c21d08078eedf134f9d42cf989faf8928e2edc05a40ab008c07b367191326873773704de5bc458588aa5c0bcff7edce2ae11
6
+ metadata.gz: 1890385e76798f005d56c8005aa32b23badbd9ad770822b61962f37016cbe9997ebe9dc090d0948654c29a0178b939c6c472ed80b4c4fed671e0015781444ed5
7
+ data.tar.gz: af5b0b8d46c2a5ee73ee5de50cdc5c6b9f8e36bcb616830207db36cd32072a6a2fe7b173baf35da5fb9f938b80da14cda44cc9d70f966e6e3704a8b230ab0a2e
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # pg_fulltext
2
+
3
+ A pretty reasonable PostgreSQL fulltext implementation with minimal configuration
4
+
5
+ ## Installation
6
+
7
+ ```shell
8
+ gem install pg_fulltext
9
+ ```
10
+
11
+ or add the following to your `Gemfile`:
12
+
13
+ ```shell
14
+ gem 'pg_fulltext'
15
+ ```
16
+
17
+ and run `bundle install`
18
+
19
+ ## Rails Configuration
20
+
21
+ This implementation assumes you have a `tsv` column on your model, and that you're generating a string appropriate for
22
+ the language you're using (we don't specify a default, so it will probably default to `'english'` depending on your
23
+ Postgres implementation):
24
+
25
+ ```ruby
26
+ class MyModel
27
+ include PgFulltext::ActiveRecord
28
+
29
+ add_search_scope
30
+ end
31
+ ```
32
+
33
+ You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
34
+
35
+ ```ruby
36
+ MyModel.search('foo bar "include this phrase" !butnotthis !"and and also not this"')
37
+ ```
38
+
39
+ The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
40
+ configured per the following options:
41
+
42
+ | Option | Default | Description |
43
+ | ---------------- | :------- | :----------- |
44
+ | `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here. |
45
+ | `search_type` | `simple` | Your PostgreSQL probably defaults to `'english'`, but the default for this option is `simple` to ensure the most predictable behavior. IMPORTANT: Set this to match the tsvector you've generated. |
46
+ | `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false`. |
47
+ | `prefix` | `true` | Default search will match partial words as well as whole words. Set this to `false` if only whole words should be matched. |
48
+ | `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank`. |
49
+ | `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true`. |
50
+ | `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
51
+
52
+ ## Standalone Configuration
53
+
54
+ There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
55
+ string for you to use as you wish.
56
+
57
+ Something this should do the trick:
58
+
59
+ ```ruby
60
+ db = PG.connect(dbname: 'mydb')
61
+ search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
62
+ tsv_query = db.escape_string(PgFulltext::Query.to_tsquery_string(search_string))
63
+ sql = <<~SQL
64
+ SELECT *
65
+ FROM my_model
66
+ WHERE tsv @@ to_tsquery('portuguese', '#{tsv_query}')
67
+ SQL
68
+ db.exec(sql)
69
+ ```
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: %w[spec]
@@ -21,13 +21,13 @@ module PgFulltext
21
21
  tsvector_column: :tsv,
22
22
  search_type: :simple,
23
23
  order: true,
24
+ prefix: true,
24
25
  reorder: false,
25
26
  any_word: false,
26
27
  ignore_accents: false
27
28
  )
28
29
  serial = SecureRandom.hex(4)
29
- table_quoted = connection.quote_table_name(table_name)
30
- pk_quoted = "#{table_quoted}.#{connection.quote_column_name(primary_key)}"
30
+ pk_quoted = "#{quoted_table_name}.#{connection.quote_column_name(primary_key)}"
31
31
  fulltext_join_name = "pg_fulltext_#{serial}"
32
32
 
33
33
  # Build the search relation to join on
@@ -37,6 +37,7 @@ module PgFulltext
37
37
  tsvector_column: tsvector_column,
38
38
  search_type: search_type,
39
39
  any_word: any_word,
40
+ prefix: prefix,
40
41
  ignore_accents: ignore_accents,
41
42
  )
42
43
 
@@ -58,13 +59,13 @@ module PgFulltext
58
59
  tsvector_column: :tsv,
59
60
  search_type: nil,
60
61
  any_word: false,
62
+ prefix: true,
61
63
  ignore_accents: false
62
64
  )
63
- tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&'))
65
+ tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&', prefix: prefix))
64
66
  tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
65
- table_quoted = connection.quote_table_name(table_name)
66
67
  column_quoted = connection.quote_column_name(tsvector_column)
67
- fqc_quoted = "#{table_quoted}.#{column_quoted}"
68
+ fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
68
69
  tsquery = "to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
69
70
 
70
71
  relation
@@ -1,55 +1,47 @@
1
+ require 'rltk/lexer'
2
+
1
3
  module PgFulltext
2
4
  module Query
3
- def self.to_tsquery_string(query, prefix: true, operator: '&')
4
-
5
- # Parse out all [unicode] non-word and non-quote characters
6
- query.gsub!(/[^\s\p{L}"!]/, '')
7
- query.gsub!(/"+/, '"')
8
- query.gsub!(/\s+/, ' ')
9
-
10
- # Collect terms
11
- terms = []
5
+ class Lexer < RLTK::Lexer
6
+ rule(/!+/)
7
+ rule(/"+/)
12
8
 
13
- # Phrase mode
14
- if query.count('"') > 0 && query.count('"') % 2 == 0
15
- phrase_terms = []
16
- negate_phrase = false
9
+ rule(/(!?)[0-9\p{L}!]+/) { |v| [:WORD, v] }
10
+ rule(/"[\p{L}\s!]+"/) { |v| [:PHRASE, v[1..-2]] }
11
+ rule(/!"[\p{L}\s!]+"/) { |v| [:NOT_PHRASE, v[2..-2]] }
17
12
 
18
- query_parts = query.split(' ')
19
- query_parts.each do |term|
13
+ rule(/\s+/)
14
+ rule(/[^\p{L}\s"]+/)
15
+ end
20
16
 
21
- # Skip if completely comprised of non-unicode word characters
22
- next if term.gsub(/[^\s\p{L}]/, '') == ''
17
+ def self.to_tsquery_string(query, prefix: true, operator: '&')
18
+ query = normalize_query(query)
23
19
 
24
- if term.start_with?('!"') && !term.end_with?('"')
25
- phrase_terms << format_term(term[2..-1], prefix: true)
26
- negate_phrase = true
27
- elsif term.start_with?('"') && !term.end_with?('"')
28
- phrase_terms << format_term(term[1..-1], prefix: true)
29
- elsif phrase_terms.length > 0
30
- if term.end_with?('"')
31
- phrase_terms << format_term(term[0..-2], prefix: prefix)
32
- terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms, prefix: prefix).join(' <-> ')})"
33
- phrase_terms = []
34
- negate_phrase = false
35
- else
36
- phrase_terms << format_term(term, prefix: prefix)
37
- end
38
- else
39
- terms << format_term(term, prefix: prefix)
40
- end
20
+ terms = []
21
+ Lexer.lex(query).each do |token|
22
+ if token.type == :WORD
23
+ terms << format_term(token.value, prefix: prefix)
24
+ elsif %i[PHRASE NOT_PHRASE].include?(token.type)
25
+ phrase_terms = Lexer.lex(token.value).map do |phrase_term|
26
+ phrase_term.value.nil? ? nil : format_term(phrase_term.value, prefix: prefix)
27
+ end.compact
28
+ terms << "#{'!' if token.type == :NOT_PHRASE}(#{phrase_terms.join(' <-> ')})"
41
29
  end
42
- else
43
- query.gsub! /["]/, ''
44
- terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) }, prefix: prefix)
45
30
  end
46
31
 
47
- # Join terms with operator
48
32
  terms.join(" #{operator} ")
49
33
  end
50
34
 
51
35
  private
52
36
 
37
+ def self.normalize_query(query)
38
+ query
39
+ .gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
40
+ .gsub(/[^\s\p{L}0-9"!]/, '') # Remove all non-unicode, whitespace, numbers, quotes ("), and bangs (!)
41
+ .gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
42
+ .strip # Strip space from beginning and end of line
43
+ end
44
+
53
45
  def self.format_term(term, prefix: true)
54
46
  # Remove any ! that's not at the beginning of the term, as it will break the query
55
47
  term.gsub!(/(?<!^)!/, '')
@@ -58,7 +50,7 @@ module PgFulltext
58
50
  "#{term}#{':*' if prefix}"
59
51
  end
60
52
 
61
- def self.reject_falsy(terms, prefix: true)
53
+ def self.reject_falsy(terms)
62
54
  false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
63
55
  terms.reject { |v| false_values.include?(v) }
64
56
  end
metadata CHANGED
@@ -1,28 +1,102 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Robertson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-11 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2021-11-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rltk
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 3.0.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: with_model
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
13
83
  description: Allows simple searching with a variety of options
14
84
  email: adam@arcreative.net
15
85
  executables: []
16
86
  extensions: []
17
87
  extra_rdoc_files: []
18
88
  files:
89
+ - README.md
90
+ - Rakefile
19
91
  - lib/pg_fulltext.rb
20
92
  - lib/pg_fulltext/active_record.rb
21
93
  - lib/pg_fulltext/query.rb
22
94
  homepage: https://github.com/arcreative/pg_fulltext
23
95
  licenses:
24
96
  - MIT
25
- metadata: {}
97
+ metadata:
98
+ homepage_uri: https://github.com/arcreative/pg_fulltext
99
+ source_code_uri: https://github.com/arcreative/pg_fulltext
26
100
  post_install_message:
27
101
  rdoc_options: []
28
102
  require_paths:
@@ -38,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
112
  - !ruby/object:Gem::Version
39
113
  version: '0'
40
114
  requirements: []
41
- rubygems_version: 3.1.4
115
+ rubygems_version: 3.1.6
42
116
  signing_key:
43
117
  specification_version: 4
44
118
  summary: PostgreSQL fulltext search