pg_fulltext 0.1.0 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49c8f3b3250945a7e3e600a100951abffe80a0537e14fcf392d96778d9648777
4
- data.tar.gz: efef8f9be9540e961015472dc3f498203c62a37856ef4f7c66f00fad30e7e5a2
3
+ metadata.gz: 2f4f614bb0fa282ac0bb258122139efd64f81ac8f04eb3dd4fd2c0e80c9f9d5a
4
+ data.tar.gz: c4292d5984bf2e3774b7f73948173ea2b4f022d2d29989b3a57222b3b516b02a
5
5
  SHA512:
6
- metadata.gz: 2c324ce42e53dd725bec4ee7e11a8763927fe8080059a504ba31e5546b8d528ee76c040ae4a93c6b272a744d413f695583d413d991d1db6afb846f2e8750162e
7
- data.tar.gz: 90e7cd23c65e9ee883c4c65c8fb5c21d08078eedf134f9d42cf989faf8928e2edc05a40ab008c07b367191326873773704de5bc458588aa5c0bcff7edce2ae11
6
+ metadata.gz: 1890385e76798f005d56c8005aa32b23badbd9ad770822b61962f37016cbe9997ebe9dc090d0948654c29a0178b939c6c472ed80b4c4fed671e0015781444ed5
7
+ data.tar.gz: af5b0b8d46c2a5ee73ee5de50cdc5c6b9f8e36bcb616830207db36cd32072a6a2fe7b173baf35da5fb9f938b80da14cda44cc9d70f966e6e3704a8b230ab0a2e
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # pg_fulltext
2
+
3
+ A pretty reasonable PostgreSQL fulltext implementation with minimal configuration
4
+
5
+ ## Installation
6
+
7
+ ```shell
8
+ gem install pg_fulltext
9
+ ```
10
+
11
+ or add the following to your `Gemfile`:
12
+
13
+ ```shell
14
+ gem 'pg_fulltext'
15
+ ```
16
+
17
+ and run `bundle install`
18
+
19
+ ## Rails Configuration
20
+
21
+ This implementation assumes you have a `tsv` column on your model, and that you're generating a string appropriate for
22
+ the language you're using (we don't specify a default, so it will probably default to `'english'` depending on your
23
+ Postgres implementation):
24
+
25
+ ```ruby
26
+ class MyModel
27
+ include PgFulltext::ActiveRecord
28
+
29
+ add_search_scope
30
+ end
31
+ ```
32
+
33
+ You can then use the `search` method (configurable via the first parameter of the `add_search_scope` method):
34
+
35
+ ```ruby
36
+ MyModel.search('foo bar "include this phrase" !butnotthis !"and and also not this"')
37
+ ```
38
+
39
+ The defaults for this include support for negation, phrases, phrase negation, and prefix searches, but those can be
40
+ configured per the following options:
41
+
42
+ | Option | Default | Description |
43
+ | ---------------- | :------- | :----------- |
44
+ | `tsvector_column` | `tsv` | If you have a different column containing your tsvector, specify it here. |
45
+ | `search_type` | `simple` | Your PostgreSQL probably defaults to `'english'`, but the default for this option is `simple` to ensure the most predictable behavior. IMPORTANT: Set this to match the tsvector you've generated. |
46
+ | `order` | `true` | Whether or not the `order` method should be applied against the generated `rank` for the fulltext query. If you just care about returning matches and not their respective rank, set this to `false`. |
47
+ | `prefix` | `true` | Default search will match partial words as well as whole words. Set this to `false` if only whole words should be matched. |
48
+ | `reorder` | `false` | If you already have `order` set on this relation, it will take precedence over the fulltext `rank`. `reorder` will call clear, effectively clearing the existing order and applying `rank`. |
49
+ | `any_word` | `false` | Default search uses the `&` operator, ensuring that all terms are matched in the query. If you want to match _any_ term in the query, set this to `true`. |
50
+ | `ignore_accents` | `false` | By default, search queries with accents will be sent through as-is. Setting this to `true` will `unaccent()` the query, which helps match `tsv` columns that have also been unaccented. Alternatively, you can have your `tsv` column be a combination of both, and this option will be unnecesary. Requires the `unaccent` Postgres extension. |
51
+
52
+ ## Standalone Configuration
53
+
54
+ There's not much, here, but the `PgFulltext::Query.to_tsquery_string` method will generate a nice `tsvector`-compatible
55
+ string for you to use as you wish.
56
+
57
+ Something this should do the trick:
58
+
59
+ ```ruby
60
+ db = PG.connect(dbname: 'mydb')
61
+ search_string = 'foo bar "include this phrase" !butnotthis !"and and also not this"'
62
+ tsv_query = db.escape_string(PgFulltext::Query.to_tsquery_string(search_string))
63
+ sql = <<~SQL
64
+ SELECT *
65
+ FROM my_model
66
+ WHERE tsv @@ to_tsquery('portuguese', '#{tsv_query}')
67
+ SQL
68
+ db.exec(sql)
69
+ ```
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: %w[spec]
@@ -21,13 +21,13 @@ module PgFulltext
21
21
  tsvector_column: :tsv,
22
22
  search_type: :simple,
23
23
  order: true,
24
+ prefix: true,
24
25
  reorder: false,
25
26
  any_word: false,
26
27
  ignore_accents: false
27
28
  )
28
29
  serial = SecureRandom.hex(4)
29
- table_quoted = connection.quote_table_name(table_name)
30
- pk_quoted = "#{table_quoted}.#{connection.quote_column_name(primary_key)}"
30
+ pk_quoted = "#{quoted_table_name}.#{connection.quote_column_name(primary_key)}"
31
31
  fulltext_join_name = "pg_fulltext_#{serial}"
32
32
 
33
33
  # Build the search relation to join on
@@ -37,6 +37,7 @@ module PgFulltext
37
37
  tsvector_column: tsvector_column,
38
38
  search_type: search_type,
39
39
  any_word: any_word,
40
+ prefix: prefix,
40
41
  ignore_accents: ignore_accents,
41
42
  )
42
43
 
@@ -58,13 +59,13 @@ module PgFulltext
58
59
  tsvector_column: :tsv,
59
60
  search_type: nil,
60
61
  any_word: false,
62
+ prefix: true,
61
63
  ignore_accents: false
62
64
  )
63
- tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&'))
65
+ tsquery_string_quoted = connection.quote(PgFulltext::Query.to_tsquery_string(query, operator: any_word ? '|' : '&', prefix: prefix))
64
66
  tsquery_string_quoted = "unaccent(#{tsquery_string_quoted})" if ignore_accents
65
- table_quoted = connection.quote_table_name(table_name)
66
67
  column_quoted = connection.quote_column_name(tsvector_column)
67
- fqc_quoted = "#{table_quoted}.#{column_quoted}"
68
+ fqc_quoted = "#{quoted_table_name}.#{column_quoted}"
68
69
  tsquery = "to_tsquery(#{"#{connection.quote search_type}, " if search_type.present?}#{tsquery_string_quoted})"
69
70
 
70
71
  relation
@@ -1,55 +1,47 @@
1
+ require 'rltk/lexer'
2
+
1
3
  module PgFulltext
2
4
  module Query
3
- def self.to_tsquery_string(query, prefix: true, operator: '&')
4
-
5
- # Parse out all [unicode] non-word and non-quote characters
6
- query.gsub!(/[^\s\p{L}"!]/, '')
7
- query.gsub!(/"+/, '"')
8
- query.gsub!(/\s+/, ' ')
9
-
10
- # Collect terms
11
- terms = []
5
+ class Lexer < RLTK::Lexer
6
+ rule(/!+/)
7
+ rule(/"+/)
12
8
 
13
- # Phrase mode
14
- if query.count('"') > 0 && query.count('"') % 2 == 0
15
- phrase_terms = []
16
- negate_phrase = false
9
+ rule(/(!?)[0-9\p{L}!]+/) { |v| [:WORD, v] }
10
+ rule(/"[\p{L}\s!]+"/) { |v| [:PHRASE, v[1..-2]] }
11
+ rule(/!"[\p{L}\s!]+"/) { |v| [:NOT_PHRASE, v[2..-2]] }
17
12
 
18
- query_parts = query.split(' ')
19
- query_parts.each do |term|
13
+ rule(/\s+/)
14
+ rule(/[^\p{L}\s"]+/)
15
+ end
20
16
 
21
- # Skip if completely comprised of non-unicode word characters
22
- next if term.gsub(/[^\s\p{L}]/, '') == ''
17
+ def self.to_tsquery_string(query, prefix: true, operator: '&')
18
+ query = normalize_query(query)
23
19
 
24
- if term.start_with?('!"') && !term.end_with?('"')
25
- phrase_terms << format_term(term[2..-1], prefix: true)
26
- negate_phrase = true
27
- elsif term.start_with?('"') && !term.end_with?('"')
28
- phrase_terms << format_term(term[1..-1], prefix: true)
29
- elsif phrase_terms.length > 0
30
- if term.end_with?('"')
31
- phrase_terms << format_term(term[0..-2], prefix: prefix)
32
- terms << "#{'!' if negate_phrase}(#{reject_falsy(phrase_terms, prefix: prefix).join(' <-> ')})"
33
- phrase_terms = []
34
- negate_phrase = false
35
- else
36
- phrase_terms << format_term(term, prefix: prefix)
37
- end
38
- else
39
- terms << format_term(term, prefix: prefix)
40
- end
20
+ terms = []
21
+ Lexer.lex(query).each do |token|
22
+ if token.type == :WORD
23
+ terms << format_term(token.value, prefix: prefix)
24
+ elsif %i[PHRASE NOT_PHRASE].include?(token.type)
25
+ phrase_terms = Lexer.lex(token.value).map do |phrase_term|
26
+ phrase_term.value.nil? ? nil : format_term(phrase_term.value, prefix: prefix)
27
+ end.compact
28
+ terms << "#{'!' if token.type == :NOT_PHRASE}(#{phrase_terms.join(' <-> ')})"
41
29
  end
42
- else
43
- query.gsub! /["]/, ''
44
- terms = reject_falsy(query.split(' ').map { |v| format_term(v, prefix: prefix) }, prefix: prefix)
45
30
  end
46
31
 
47
- # Join terms with operator
48
32
  terms.join(" #{operator} ")
49
33
  end
50
34
 
51
35
  private
52
36
 
37
+ def self.normalize_query(query)
38
+ query
39
+ .gsub(/[.,]/, ' ') # Replace all periods and commas with spaces (reasonable delimiters)
40
+ .gsub(/[^\s\p{L}0-9"!]/, '') # Remove all non-unicode, whitespace, numbers, quotes ("), and bangs (!)
41
+ .gsub(/\s+/, ' ') # Replace repeat whitespace occurrences with single spaces
42
+ .strip # Strip space from beginning and end of line
43
+ end
44
+
53
45
  def self.format_term(term, prefix: true)
54
46
  # Remove any ! that's not at the beginning of the term, as it will break the query
55
47
  term.gsub!(/(?<!^)!/, '')
@@ -58,7 +50,7 @@ module PgFulltext
58
50
  "#{term}#{':*' if prefix}"
59
51
  end
60
52
 
61
- def self.reject_falsy(terms, prefix: true)
53
+ def self.reject_falsy(terms)
62
54
  false_values = [nil, '', '"', '!', ':*', '":*', '!:*']
63
55
  terms.reject { |v| false_values.include?(v) }
64
56
  end
metadata CHANGED
@@ -1,28 +1,102 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Robertson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-11 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2021-11-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rltk
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 3.0.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: with_model
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
13
83
  description: Allows simple searching with a variety of options
14
84
  email: adam@arcreative.net
15
85
  executables: []
16
86
  extensions: []
17
87
  extra_rdoc_files: []
18
88
  files:
89
+ - README.md
90
+ - Rakefile
19
91
  - lib/pg_fulltext.rb
20
92
  - lib/pg_fulltext/active_record.rb
21
93
  - lib/pg_fulltext/query.rb
22
94
  homepage: https://github.com/arcreative/pg_fulltext
23
95
  licenses:
24
96
  - MIT
25
- metadata: {}
97
+ metadata:
98
+ homepage_uri: https://github.com/arcreative/pg_fulltext
99
+ source_code_uri: https://github.com/arcreative/pg_fulltext
26
100
  post_install_message:
27
101
  rdoc_options: []
28
102
  require_paths:
@@ -38,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
112
  - !ruby/object:Gem::Version
39
113
  version: '0'
40
114
  requirements: []
41
- rubygems_version: 3.1.4
115
+ rubygems_version: 3.1.6
42
116
  signing_key:
43
117
  specification_version: 4
44
118
  summary: PostgreSQL fulltext search