knitsearch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +11 -0
- data/README.md +652 -0
- data/lib/generators/knitsearch/install/install_generator.rb +208 -0
- data/lib/generators/knitsearch/install/templates/migration.rb.tt +7 -0
- data/lib/generators/knitsearch/multisearch_install/multisearch_install_generator.rb +89 -0
- data/lib/knitsearch/document.rb +12 -0
- data/lib/knitsearch/engine.rb +22 -0
- data/lib/knitsearch/fuzzy_corrector.rb +79 -0
- data/lib/knitsearch/has_many_dependent.rb +62 -0
- data/lib/knitsearch/has_many_through_join_dependent.rb +47 -0
- data/lib/knitsearch/has_many_through_target_dependent.rb +54 -0
- data/lib/knitsearch/highlighter.rb +36 -0
- data/lib/knitsearch/levenshtein.rb +35 -0
- data/lib/knitsearch/migration.rb +235 -0
- data/lib/knitsearch/model.rb +613 -0
- data/lib/knitsearch/multisearchable.rb +24 -0
- data/lib/knitsearch/multisearchable_sync.rb +38 -0
- data/lib/knitsearch/query.rb +57 -0
- data/lib/knitsearch/version.rb +5 -0
- data/lib/knitsearch.rb +129 -0
- data/lib/tasks/knitsearch.rake +33 -0
- metadata +125 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Knitsearch
|
|
4
|
+
# FTS5 match-string builder. Escapes user input so it can't break out of FTS5
|
|
5
|
+
# syntax. Quotes each token, doubles internal quotes, strips control characters,
|
|
6
|
+
# and joins with the specified operator. Returns nil for empty input — the caller
|
|
7
|
+
# decides what to do (typically: return an empty relation).
|
|
8
|
+
module Query
|
|
9
|
+
extend self
|
|
10
|
+
|
|
11
|
+
CONTROL_CHARACTERS = /[\x00-\x1f\x7f]/
|
|
12
|
+
|
|
13
|
+
def escape(input, operator: :and, prefix: false, match: :word)
|
|
14
|
+
return nil if input.nil?
|
|
15
|
+
|
|
16
|
+
cleaned = input.to_s.gsub(CONTROL_CHARACTERS, " ").strip
|
|
17
|
+
return nil if cleaned.empty?
|
|
18
|
+
|
|
19
|
+
tokens = cleaned.split(/\s+/).reject(&:empty?)
|
|
20
|
+
return nil if tokens.empty?
|
|
21
|
+
|
|
22
|
+
case match
|
|
23
|
+
when :word
|
|
24
|
+
build_word_match(tokens, operator, prefix)
|
|
25
|
+
when :phrase
|
|
26
|
+
build_phrase_match(tokens, operator)
|
|
27
|
+
else
|
|
28
|
+
raise ArgumentError, "match must be :word or :phrase, got: #{match.inspect}"
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
def build_word_match(tokens, operator, prefix)
|
|
34
|
+
quoted = tokens.map { |t| %("#{t.gsub('"', '""')}") }
|
|
35
|
+
quoted = quoted.map { |t| "#{t}*" } if prefix
|
|
36
|
+
|
|
37
|
+
case operator
|
|
38
|
+
when :and
|
|
39
|
+
quoted.join(" ")
|
|
40
|
+
when :or
|
|
41
|
+
quoted.join(" OR ")
|
|
42
|
+
else
|
|
43
|
+
raise ArgumentError, "operator must be :and or :or, got: #{operator.inspect}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def build_phrase_match(tokens, operator)
|
|
48
|
+
if operator == :or
|
|
49
|
+
raise ArgumentError,
|
|
50
|
+
"match: :phrase cannot be combined with operator: :or — a phrase is a single contiguous unit, not a set of terms to OR together"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
escaped_tokens = tokens.map { |t| t.gsub('"', '""') }
|
|
54
|
+
%("#{escaped_tokens.join(' ')}")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
data/lib/knitsearch.rb
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support"
|
|
4
|
+
|
|
5
|
+
require "knitsearch/version"
|
|
6
|
+
require "knitsearch/engine"
|
|
7
|
+
require "knitsearch/has_many_dependent"
|
|
8
|
+
require "knitsearch/has_many_through_join_dependent"
|
|
9
|
+
require "knitsearch/has_many_through_target_dependent"
|
|
10
|
+
require "knitsearch/document"
|
|
11
|
+
require "knitsearch/multisearchable"
|
|
12
|
+
require "knitsearch/multisearchable_sync"
|
|
13
|
+
|
|
14
|
+
require "zeitwerk"
|
|
15
|
+
|
|
16
|
+
loader = Zeitwerk::Loader.for_gem
|
|
17
|
+
loader.ignore("#{__dir__}/generators")
|
|
18
|
+
loader.setup
|
|
19
|
+
|
|
20
|
+
module Knitsearch
|
|
21
|
+
TOKENIZER_PRESETS = {
|
|
22
|
+
unicode: "unicode61 remove_diacritics 2",
|
|
23
|
+
ascii: "ascii",
|
|
24
|
+
porter: "porter",
|
|
25
|
+
trigram: "trigram"
|
|
26
|
+
}.freeze
|
|
27
|
+
|
|
28
|
+
WEIGHT_BUCKETS = {
|
|
29
|
+
"A" => 8,
|
|
30
|
+
"B" => 4,
|
|
31
|
+
"C" => 2,
|
|
32
|
+
"D" => 1
|
|
33
|
+
}.freeze
|
|
34
|
+
|
|
35
|
+
SUPPORTED_DICTIONARIES = %w[simple english trigram].freeze
|
|
36
|
+
|
|
37
|
+
class Error < StandardError; end
|
|
38
|
+
class SchemaMismatchError < Error; end
|
|
39
|
+
class ColumnError < Error; end
|
|
40
|
+
class UnknownDictionaryError < Error; end
|
|
41
|
+
class ConfigurationError < Error; end
|
|
42
|
+
|
|
43
|
+
class << self
|
|
44
|
+
attr_reader :belongs_to_dependents, :has_many_dependents, :has_many_through_dependents, :has_many_through_target_dependents
|
|
45
|
+
|
|
46
|
+
def belongs_to_dependents
|
|
47
|
+
@belongs_to_dependents ||= Hash.new { |h, k| h[k] = [] }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def has_many_dependents
|
|
51
|
+
@has_many_dependents ||= Hash.new { |h, k| h[k] = [] }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def has_many_through_dependents
|
|
55
|
+
@has_many_through_dependents ||= Hash.new { |h, k| h[k] = [] }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def has_many_through_target_dependents
|
|
59
|
+
@has_many_through_target_dependents ||= Hash.new { |h, k| h[k] = [] }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def multisearch(query, limit: nil)
|
|
63
|
+
return Document.none if query.blank?
|
|
64
|
+
|
|
65
|
+
escaped = Knitsearch::Query.escape(query, operator: :and, prefix: false, match: :word)
|
|
66
|
+
return Document.none if escaped.nil?
|
|
67
|
+
|
|
68
|
+
relation = Document
|
|
69
|
+
.joins("INNER JOIN knitsearches_fts ON knitsearches_fts.rowid = knitsearches.id")
|
|
70
|
+
.where("knitsearches_fts MATCH ?", escaped)
|
|
71
|
+
.order(Arel.sql("bm25(knitsearches_fts)"))
|
|
72
|
+
|
|
73
|
+
limit ? relation.limit(limit) : relation
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def register_belongs_to_dependent(parent_class, child_class, foreign_key, shadow_map)
|
|
77
|
+
belongs_to_dependents[parent_class] << {
|
|
78
|
+
model: child_class,
|
|
79
|
+
foreign_key: foreign_key,
|
|
80
|
+
columns: shadow_map
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
# Install after_update_commit hook on parent class (idempotent)
|
|
84
|
+
unless parent_class.instance_variable_defined?(:@knitsearch_dependents_installed)
|
|
85
|
+
parent_class.instance_variable_set(:@knitsearch_dependents_installed, true)
|
|
86
|
+
parent_class.after_update_commit :knitsearch_cascade_to_children
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def register_has_many_dependent(child_class, parent_class, inverse_fk, shadow_map, parent_assoc)
|
|
91
|
+
has_many_dependents[child_class] << {
|
|
92
|
+
parent: parent_class,
|
|
93
|
+
inverse_fk: inverse_fk,
|
|
94
|
+
columns: shadow_map,
|
|
95
|
+
parent_assoc: parent_assoc
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
child_class.include(HasManyDependent) unless child_class.include?(HasManyDependent)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def register_has_many_through_dependent(join_class:, target_class:, parent_class:, parent_fk:, target_fk:, parent_assoc:, shadow_map:)
|
|
102
|
+
# Store on join class side (for join create/destroy callbacks)
|
|
103
|
+
has_many_through_dependents[join_class] << {
|
|
104
|
+
parent_class: parent_class,
|
|
105
|
+
parent_fk: parent_fk,
|
|
106
|
+
target_class: target_class,
|
|
107
|
+
target_fk: target_fk,
|
|
108
|
+
parent_assoc: parent_assoc,
|
|
109
|
+
columns: shadow_map
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
# Store on target class side (for target update callbacks)
|
|
113
|
+
source_columns = shadow_map.values
|
|
114
|
+
has_many_through_target_dependents[target_class] << {
|
|
115
|
+
join_class: join_class,
|
|
116
|
+
parent_class: parent_class,
|
|
117
|
+
parent_fk: parent_fk,
|
|
118
|
+
target_fk: target_fk,
|
|
119
|
+
parent_assoc: parent_assoc,
|
|
120
|
+
columns: shadow_map,
|
|
121
|
+
source_columns: source_columns
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Install callbacks on both join and target classes
|
|
125
|
+
join_class.include(HasManyThroughJoinDependent) unless join_class.include?(HasManyThroughJoinDependent)
|
|
126
|
+
target_class.include(HasManyThroughTargetDependent) unless target_class.include?(HasManyThroughTargetDependent)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
namespace :knitsearch do
|
|
4
|
+
desc "Backfill the FTS index from existing records. Usage: bin/rails knitsearch:backfill[Article]"
|
|
5
|
+
task :backfill, [:model_name] => :environment do |_task, args|
|
|
6
|
+
model_name = args[:model_name]
|
|
7
|
+
raise "Model name required: bin/rails knitsearch:backfill[ModelName]" if model_name.blank?
|
|
8
|
+
|
|
9
|
+
model_class = model_name.classify.constantize
|
|
10
|
+
raise "#{model_name} does not include Knitsearch::Model" unless model_class.respond_to?(:knitsearch_backfill!)
|
|
11
|
+
|
|
12
|
+
puts "Backfilling search index for #{model_class.name}..."
|
|
13
|
+
model_class.knitsearch_backfill!
|
|
14
|
+
puts "Backfill complete. Source table has #{model_class.count} rows."
|
|
15
|
+
rescue NameError
|
|
16
|
+
raise "Could not find model #{model_name}"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
desc "Rebuild the FTS5 index for a model. Usage: bin/rails knitsearch:reindex[Article]"
|
|
20
|
+
task :reindex, [:model_name] => :environment do |_task, args|
|
|
21
|
+
model_name = args[:model_name]
|
|
22
|
+
raise "Model name required: bin/rails knitsearch:reindex[ModelName]" if model_name.blank?
|
|
23
|
+
|
|
24
|
+
model_class = model_name.classify.constantize
|
|
25
|
+
raise "#{model_name} does not include Knitsearch::Model" unless model_class.respond_to?(:reindex!)
|
|
26
|
+
|
|
27
|
+
puts "Reindexing search for #{model_class.name}..."
|
|
28
|
+
model_class.reindex!
|
|
29
|
+
puts "Reindex complete for #{model_class.name}. Source table has #{model_class.count} rows."
|
|
30
|
+
rescue NameError
|
|
31
|
+
raise "Could not find model #{model_name}"
|
|
32
|
+
end
|
|
33
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: knitsearch
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- knitsearch contributors
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: activerecord
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '8.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '8.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: railties
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '8.0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '8.0'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: sqlite3
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '2.0'
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '2.0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: minitest
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '5.0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '5.0'
|
|
68
|
+
description: Knitsearch adds FTS5-backed full-text search to ActiveRecord models.
|
|
69
|
+
Search by rich text, associated records, or multiple models in one query. Index
|
|
70
|
+
updates synchronously via SQLite triggers, atomic with source writes. BM25-ranked
|
|
71
|
+
results returned as a chainable Relation. Supports typo tolerance, phrase matching,
|
|
72
|
+
prefix matching, highlighting, snippets, and more.
|
|
73
|
+
email:
|
|
74
|
+
- noreply@driveton.com
|
|
75
|
+
executables: []
|
|
76
|
+
extensions: []
|
|
77
|
+
extra_rdoc_files: []
|
|
78
|
+
files:
|
|
79
|
+
- CHANGELOG.md
|
|
80
|
+
- README.md
|
|
81
|
+
- lib/generators/knitsearch/install/install_generator.rb
|
|
82
|
+
- lib/generators/knitsearch/install/templates/migration.rb.tt
|
|
83
|
+
- lib/generators/knitsearch/multisearch_install/multisearch_install_generator.rb
|
|
84
|
+
- lib/knitsearch.rb
|
|
85
|
+
- lib/knitsearch/document.rb
|
|
86
|
+
- lib/knitsearch/engine.rb
|
|
87
|
+
- lib/knitsearch/fuzzy_corrector.rb
|
|
88
|
+
- lib/knitsearch/has_many_dependent.rb
|
|
89
|
+
- lib/knitsearch/has_many_through_join_dependent.rb
|
|
90
|
+
- lib/knitsearch/has_many_through_target_dependent.rb
|
|
91
|
+
- lib/knitsearch/highlighter.rb
|
|
92
|
+
- lib/knitsearch/levenshtein.rb
|
|
93
|
+
- lib/knitsearch/migration.rb
|
|
94
|
+
- lib/knitsearch/model.rb
|
|
95
|
+
- lib/knitsearch/multisearchable.rb
|
|
96
|
+
- lib/knitsearch/multisearchable_sync.rb
|
|
97
|
+
- lib/knitsearch/query.rb
|
|
98
|
+
- lib/knitsearch/version.rb
|
|
99
|
+
- lib/tasks/knitsearch.rake
|
|
100
|
+
homepage: https://github.com/driveton/knitsearch
|
|
101
|
+
licenses:
|
|
102
|
+
- MIT
|
|
103
|
+
metadata:
|
|
104
|
+
source_code_uri: https://github.com/driveton/knitsearch
|
|
105
|
+
changelog_uri: https://github.com/driveton/knitsearch/blob/main/CHANGELOG.md
|
|
106
|
+
rubygems_mfa_required: 'true'
|
|
107
|
+
rdoc_options: []
|
|
108
|
+
require_paths:
|
|
109
|
+
- lib
|
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
111
|
+
requirements:
|
|
112
|
+
- - ">="
|
|
113
|
+
- !ruby/object:Gem::Version
|
|
114
|
+
version: 3.2.0
|
|
115
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
|
+
requirements:
|
|
117
|
+
- - ">="
|
|
118
|
+
- !ruby/object:Gem::Version
|
|
119
|
+
version: '0'
|
|
120
|
+
requirements: []
|
|
121
|
+
rubygems_version: 4.0.6
|
|
122
|
+
specification_version: 4
|
|
123
|
+
summary: Full-text search for Rails 8 + SQLite with ActionText, associations, and
|
|
124
|
+
multi-model search.
|
|
125
|
+
test_files: []
|