knitsearch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Knitsearch
4
+ # FTS5 match-string builder. Escapes user input so it can't break out of FTS5
5
+ # syntax. Quotes each token, doubles internal quotes, strips control characters,
6
+ # and joins with the specified operator. Returns nil for empty input — the caller
7
+ # decides what to do (typically: return an empty relation).
8
+ module Query
9
+ extend self
10
+
11
+ CONTROL_CHARACTERS = /[\x00-\x1f\x7f]/
12
+
13
+ def escape(input, operator: :and, prefix: false, match: :word)
14
+ return nil if input.nil?
15
+
16
+ cleaned = input.to_s.gsub(CONTROL_CHARACTERS, " ").strip
17
+ return nil if cleaned.empty?
18
+
19
+ tokens = cleaned.split(/\s+/).reject(&:empty?)
20
+ return nil if tokens.empty?
21
+
22
+ case match
23
+ when :word
24
+ build_word_match(tokens, operator, prefix)
25
+ when :phrase
26
+ build_phrase_match(tokens, operator)
27
+ else
28
+ raise ArgumentError, "match must be :word or :phrase, got: #{match.inspect}"
29
+ end
30
+ end
31
+
32
+ private
33
+ def build_word_match(tokens, operator, prefix)
34
+ quoted = tokens.map { |t| %("#{t.gsub('"', '""')}") }
35
+ quoted = quoted.map { |t| "#{t}*" } if prefix
36
+
37
+ case operator
38
+ when :and
39
+ quoted.join(" ")
40
+ when :or
41
+ quoted.join(" OR ")
42
+ else
43
+ raise ArgumentError, "operator must be :and or :or, got: #{operator.inspect}"
44
+ end
45
+ end
46
+
47
+ def build_phrase_match(tokens, operator)
48
+ if operator == :or
49
+ raise ArgumentError,
50
+ "match: :phrase cannot be combined with operator: :or — a phrase is a single contiguous unit, not a set of terms to OR together"
51
+ end
52
+
53
+ escaped_tokens = tokens.map { |t| t.gsub('"', '""') }
54
+ %("#{escaped_tokens.join(' ')}")
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Knitsearch
4
+ VERSION = "0.1.0"
5
+ end
data/lib/knitsearch.rb ADDED
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support"
4
+
5
+ require "knitsearch/version"
6
+ require "knitsearch/engine"
7
+ require "knitsearch/has_many_dependent"
8
+ require "knitsearch/has_many_through_join_dependent"
9
+ require "knitsearch/has_many_through_target_dependent"
10
+ require "knitsearch/document"
11
+ require "knitsearch/multisearchable"
12
+ require "knitsearch/multisearchable_sync"
13
+
14
+ require "zeitwerk"
15
+
16
+ loader = Zeitwerk::Loader.for_gem
17
+ loader.ignore("#{__dir__}/generators")
18
+ loader.setup
19
+
20
+ module Knitsearch
21
+ TOKENIZER_PRESETS = {
22
+ unicode: "unicode61 remove_diacritics 2",
23
+ ascii: "ascii",
24
+ porter: "porter",
25
+ trigram: "trigram"
26
+ }.freeze
27
+
28
+ WEIGHT_BUCKETS = {
29
+ "A" => 8,
30
+ "B" => 4,
31
+ "C" => 2,
32
+ "D" => 1
33
+ }.freeze
34
+
35
+ SUPPORTED_DICTIONARIES = %w[simple english trigram].freeze
36
+
37
+ class Error < StandardError; end
38
+ class SchemaMismatchError < Error; end
39
+ class ColumnError < Error; end
40
+ class UnknownDictionaryError < Error; end
41
+ class ConfigurationError < Error; end
42
+
43
+ class << self
44
+ attr_reader :belongs_to_dependents, :has_many_dependents, :has_many_through_dependents, :has_many_through_target_dependents
45
+
46
+ def belongs_to_dependents
47
+ @belongs_to_dependents ||= Hash.new { |h, k| h[k] = [] }
48
+ end
49
+
50
+ def has_many_dependents
51
+ @has_many_dependents ||= Hash.new { |h, k| h[k] = [] }
52
+ end
53
+
54
+ def has_many_through_dependents
55
+ @has_many_through_dependents ||= Hash.new { |h, k| h[k] = [] }
56
+ end
57
+
58
+ def has_many_through_target_dependents
59
+ @has_many_through_target_dependents ||= Hash.new { |h, k| h[k] = [] }
60
+ end
61
+
62
+ def multisearch(query, limit: nil)
63
+ return Document.none if query.blank?
64
+
65
+ escaped = Knitsearch::Query.escape(query, operator: :and, prefix: false, match: :word)
66
+ return Document.none if escaped.nil?
67
+
68
+ relation = Document
69
+ .joins("INNER JOIN knitsearches_fts ON knitsearches_fts.rowid = knitsearches.id")
70
+ .where("knitsearches_fts MATCH ?", escaped)
71
+ .order(Arel.sql("bm25(knitsearches_fts)"))
72
+
73
+ limit ? relation.limit(limit) : relation
74
+ end
75
+
76
+ def register_belongs_to_dependent(parent_class, child_class, foreign_key, shadow_map)
77
+ belongs_to_dependents[parent_class] << {
78
+ model: child_class,
79
+ foreign_key: foreign_key,
80
+ columns: shadow_map
81
+ }
82
+
83
+ # Install after_update_commit hook on parent class (idempotent)
84
+ unless parent_class.instance_variable_defined?(:@knitsearch_dependents_installed)
85
+ parent_class.instance_variable_set(:@knitsearch_dependents_installed, true)
86
+ parent_class.after_update_commit :knitsearch_cascade_to_children
87
+ end
88
+ end
89
+
90
+ def register_has_many_dependent(child_class, parent_class, inverse_fk, shadow_map, parent_assoc)
91
+ has_many_dependents[child_class] << {
92
+ parent: parent_class,
93
+ inverse_fk: inverse_fk,
94
+ columns: shadow_map,
95
+ parent_assoc: parent_assoc
96
+ }
97
+
98
+ child_class.include(HasManyDependent) unless child_class.include?(HasManyDependent)
99
+ end
100
+
101
+ def register_has_many_through_dependent(join_class:, target_class:, parent_class:, parent_fk:, target_fk:, parent_assoc:, shadow_map:)
102
+ # Store on join class side (for join create/destroy callbacks)
103
+ has_many_through_dependents[join_class] << {
104
+ parent_class: parent_class,
105
+ parent_fk: parent_fk,
106
+ target_class: target_class,
107
+ target_fk: target_fk,
108
+ parent_assoc: parent_assoc,
109
+ columns: shadow_map
110
+ }
111
+
112
+ # Store on target class side (for target update callbacks)
113
+ source_columns = shadow_map.values
114
+ has_many_through_target_dependents[target_class] << {
115
+ join_class: join_class,
116
+ parent_class: parent_class,
117
+ parent_fk: parent_fk,
118
+ target_fk: target_fk,
119
+ parent_assoc: parent_assoc,
120
+ columns: shadow_map,
121
+ source_columns: source_columns
122
+ }
123
+
124
+ # Install callbacks on both join and target classes
125
+ join_class.include(HasManyThroughJoinDependent) unless join_class.include?(HasManyThroughJoinDependent)
126
+ target_class.include(HasManyThroughTargetDependent) unless target_class.include?(HasManyThroughTargetDependent)
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ namespace :knitsearch do
4
+ desc "Backfill the FTS index from existing records. Usage: bin/rails knitsearch:backfill[Article]"
5
+ task :backfill, [:model_name] => :environment do |_task, args|
6
+ model_name = args[:model_name]
7
+ raise "Model name required: bin/rails knitsearch:backfill[ModelName]" if model_name.blank?
8
+
9
+ model_class = model_name.classify.constantize
10
+ raise "#{model_name} does not include Knitsearch::Model" unless model_class.respond_to?(:knitsearch_backfill!)
11
+
12
+ puts "Backfilling search index for #{model_class.name}..."
13
+ model_class.knitsearch_backfill!
14
+ puts "Backfill complete. Source table has #{model_class.count} rows."
15
+ rescue NameError
16
+ raise "Could not find model #{model_name}"
17
+ end
18
+
19
+ desc "Rebuild the FTS5 index for a model. Usage: bin/rails knitsearch:reindex[Article]"
20
+ task :reindex, [:model_name] => :environment do |_task, args|
21
+ model_name = args[:model_name]
22
+ raise "Model name required: bin/rails knitsearch:reindex[ModelName]" if model_name.blank?
23
+
24
+ model_class = model_name.classify.constantize
25
+ raise "#{model_name} does not include Knitsearch::Model" unless model_class.respond_to?(:reindex!)
26
+
27
+ puts "Reindexing search for #{model_class.name}..."
28
+ model_class.reindex!
29
+ puts "Reindex complete for #{model_class.name}. Source table has #{model_class.count} rows."
30
+ rescue NameError
31
+ raise "Could not find model #{model_name}"
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,125 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: knitsearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - knitsearch contributors
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: activerecord
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '8.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '8.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: railties
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '8.0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '8.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: sqlite3
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '2.0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '2.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: minitest
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '5.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '5.0'
68
+ description: Knitsearch adds FTS5-backed full-text search to ActiveRecord models.
69
+ Search by rich text, associated records, or multiple models in one query. Index
70
+ updates synchronously via SQLite triggers, atomic with source writes. BM25-ranked
71
+ results returned as a chainable Relation. Supports typo tolerance, phrase matching,
72
+ prefix matching, highlighting, snippets, and more.
73
+ email:
74
+ - noreply@driveton.com
75
+ executables: []
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - CHANGELOG.md
80
+ - README.md
81
+ - lib/generators/knitsearch/install/install_generator.rb
82
+ - lib/generators/knitsearch/install/templates/migration.rb.tt
83
+ - lib/generators/knitsearch/multisearch_install/multisearch_install_generator.rb
84
+ - lib/knitsearch.rb
85
+ - lib/knitsearch/document.rb
86
+ - lib/knitsearch/engine.rb
87
+ - lib/knitsearch/fuzzy_corrector.rb
88
+ - lib/knitsearch/has_many_dependent.rb
89
+ - lib/knitsearch/has_many_through_join_dependent.rb
90
+ - lib/knitsearch/has_many_through_target_dependent.rb
91
+ - lib/knitsearch/highlighter.rb
92
+ - lib/knitsearch/levenshtein.rb
93
+ - lib/knitsearch/migration.rb
94
+ - lib/knitsearch/model.rb
95
+ - lib/knitsearch/multisearchable.rb
96
+ - lib/knitsearch/multisearchable_sync.rb
97
+ - lib/knitsearch/query.rb
98
+ - lib/knitsearch/version.rb
99
+ - lib/tasks/knitsearch.rake
100
+ homepage: https://github.com/driveton/knitsearch
101
+ licenses:
102
+ - MIT
103
+ metadata:
104
+ source_code_uri: https://github.com/driveton/knitsearch
105
+ changelog_uri: https://github.com/driveton/knitsearch/blob/main/CHANGELOG.md
106
+ rubygems_mfa_required: 'true'
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: 3.2.0
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubygems_version: 4.0.6
122
+ specification_version: 4
123
+ summary: Full-text search for Rails 8 + SQLite with ActionText, associations, and
124
+ multi-model search.
125
+ test_files: []