smart_rag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +33 -0
- data/README.en.md +115 -0
- data/README.md +144 -0
- data/config/database.yml +42 -0
- data/config/fulltext_search.yml +111 -0
- data/config/llm_config.yml +15 -0
- data/config/smart_rag.yml +156 -0
- data/db/fix_search_issues.sql +81 -0
- data/db/migrations/001_create_source_documents.rb +26 -0
- data/db/migrations/002_create_source_sections.rb +20 -0
- data/db/migrations/003_create_tags.rb +17 -0
- data/db/migrations/004_create_research_topics.rb +16 -0
- data/db/migrations/005_create_relationship_tables.rb +42 -0
- data/db/migrations/006_create_text_search_configs.rb +28 -0
- data/db/migrations/007_create_section_fts.rb +109 -0
- data/db/migrations/008_create_embeddings.rb +28 -0
- data/db/migrations/009_create_search_logs.rb +30 -0
- data/db/migrations/010_add_metadata_to_source_documents.rb +10 -0
- data/db/migrations/011_add_source_fields_to_source_documents.rb +23 -0
- data/db/rebuild_fts_complete.sql +51 -0
- data/db/seeds/text_search_configs.sql +28 -0
- data/examples/01_quick_start.rb +32 -0
- data/examples/02_document_management.rb +41 -0
- data/examples/03_search_operations.rb +46 -0
- data/examples/04_topics_and_tags.rb +38 -0
- data/examples/05_advanced_patterns.rb +154 -0
- data/examples/06_error_handling_and_retry.rb +64 -0
- data/examples/README.md +42 -0
- data/examples/common.rb +57 -0
- data/lib/smart_rag/chunker/markdown_chunker.rb +315 -0
- data/lib/smart_rag/config.rb +126 -0
- data/lib/smart_rag/core/document_processor.rb +537 -0
- data/lib/smart_rag/core/embedding.rb +340 -0
- data/lib/smart_rag/core/fulltext_manager.rb +483 -0
- data/lib/smart_rag/core/markitdown_bridge.rb +85 -0
- data/lib/smart_rag/core/query_processor.rb +577 -0
- data/lib/smart_rag/errors.rb +88 -0
- data/lib/smart_rag/models/embedding.rb +140 -0
- data/lib/smart_rag/models/model_base.rb +106 -0
- data/lib/smart_rag/models/research_topic.rb +171 -0
- data/lib/smart_rag/models/research_topic_section.rb +86 -0
- data/lib/smart_rag/models/research_topic_tag.rb +89 -0
- data/lib/smart_rag/models/search_log.rb +198 -0
- data/lib/smart_rag/models/section_fts.rb +170 -0
- data/lib/smart_rag/models/section_tag.rb +81 -0
- data/lib/smart_rag/models/source_document.rb +204 -0
- data/lib/smart_rag/models/source_section.rb +201 -0
- data/lib/smart_rag/models/tag.rb +214 -0
- data/lib/smart_rag/models/text_search_config.rb +168 -0
- data/lib/smart_rag/models.rb +116 -0
- data/lib/smart_rag/parsers/query_parser.rb +291 -0
- data/lib/smart_rag/retrieve.rb +745 -0
- data/lib/smart_rag/services/embedding_service.rb +278 -0
- data/lib/smart_rag/services/fulltext_search_service.rb +456 -0
- data/lib/smart_rag/services/hybrid_search_service.rb +768 -0
- data/lib/smart_rag/services/summarization_service.rb +322 -0
- data/lib/smart_rag/services/tag_service.rb +614 -0
- data/lib/smart_rag/services/vector_search_service.rb +347 -0
- data/lib/smart_rag/smart_chunking/chunk.rb +10 -0
- data/lib/smart_rag/smart_chunking/media_context.rb +9 -0
- data/lib/smart_rag/smart_chunking/merger.rb +94 -0
- data/lib/smart_rag/smart_chunking/parser.rb +75 -0
- data/lib/smart_rag/smart_chunking/pipeline.rb +45 -0
- data/lib/smart_rag/smart_chunking/section.rb +11 -0
- data/lib/smart_rag/smart_chunking/structure_detector.rb +31 -0
- data/lib/smart_rag/smart_chunking/tokenizer.rb +24 -0
- data/lib/smart_rag/version.rb +3 -0
- data/lib/smart_rag.rb +986 -0
- data/workers/analyze_content.rb +6 -0
- data/workers/get_embedding.rb +7 -0
- metadata +311 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
require_relative '../core/fulltext_manager'
|
|
2
|
+
|
|
3
|
+
module SmartRAG
|
|
4
|
+
module Services
|
|
5
|
+
# FulltextSearchService executes full-text keyword search with multi-language support
|
|
6
|
+
# Provides a clean interface for full-text search operations
|
|
7
|
+
class FulltextSearchService
|
|
8
|
+
attr_reader :fulltext_manager, :query_parser, :config, :logger
|
|
9
|
+
|
|
10
|
+
# Default service configuration
|
|
11
|
+
DEFAULT_CONFIG = {
|
|
12
|
+
default_language: 'en',
|
|
13
|
+
max_results: 100,
|
|
14
|
+
default_limit: 20,
|
|
15
|
+
enable_highlighting: true,
|
|
16
|
+
highlight_options: {
|
|
17
|
+
max_words: 50,
|
|
18
|
+
min_words: 15,
|
|
19
|
+
max_fragments: 3,
|
|
20
|
+
start_sel: '<mark>',
|
|
21
|
+
stop_sel: '</mark>'
|
|
22
|
+
},
|
|
23
|
+
enable_spellcheck: false,
|
|
24
|
+
enable_suggestions: false,
|
|
25
|
+
min_search_length: 2, # Minimum query length
|
|
26
|
+
max_search_length: 1000 # Maximum query length
|
|
27
|
+
}.freeze
|
|
28
|
+
|
|
29
|
+
# Initialize FulltextSearchService
|
|
30
|
+
# @param fulltext_manager [FulltextManager] Full-text manager instance
|
|
31
|
+
# @param query_parser [QueryParser] Query parser instance
|
|
32
|
+
# @param options [Hash] Service configuration options
|
|
33
|
+
def initialize(fulltext_manager, query_parser = nil, options = {})
|
|
34
|
+
@fulltext_manager = fulltext_manager
|
|
35
|
+
@query_parser = query_parser || fulltext_manager.query_parser
|
|
36
|
+
@config = DEFAULT_CONFIG.merge(options)
|
|
37
|
+
@logger = options[:logger] || Logger.new(STDOUT)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Perform full-text search
|
|
41
|
+
# @param query [String] Search query text
|
|
42
|
+
# @param options [Hash] Search options
|
|
43
|
+
# @option options [String] :language Language code (auto-detect if nil)
|
|
44
|
+
# @option options [Integer] :limit Maximum results (default: 20)
|
|
45
|
+
# @option options [Boolean] :enable_highlighting Enable highlighting (default: true)
|
|
46
|
+
# @option options [Hash] :filters Search filters
|
|
47
|
+
# @option options [Array<Integer>] :document_ids Filter by document IDs
|
|
48
|
+
# @option options [Array<Integer>] :tag_ids Filter by tag IDs
|
|
49
|
+
# @option options [DateTime] :date_from Filter by start date
|
|
50
|
+
# @option options [DateTime] :date_to Filter by end date
|
|
51
|
+
# @option options [Boolean] :include_content Include full content in results
|
|
52
|
+
# @option options [Boolean] :include_metadata Include metadata in results
|
|
53
|
+
# @return [Hash] Search results with metadata
|
|
54
|
+
def search(query, options = {})
|
|
55
|
+
# Validate query
|
|
56
|
+
validation_error = validate_query(query)
|
|
57
|
+
raise ArgumentError, validation_error if validation_error
|
|
58
|
+
|
|
59
|
+
# Parse advanced queries if needed
|
|
60
|
+
query_info = analyze_query(query)
|
|
61
|
+
|
|
62
|
+
# Extract options
|
|
63
|
+
language = options[:language] || detect_language(query)
|
|
64
|
+
limit = options[:limit] || config[:default_limit]
|
|
65
|
+
filters = options[:filters] || extract_filters(options)
|
|
66
|
+
|
|
67
|
+
# Log search start
|
|
68
|
+
@logger.info "Full-text search: '#{query}', language: #{language}, limit: #{limit}"
|
|
69
|
+
|
|
70
|
+
# Execute search
|
|
71
|
+
start_time = Time.now
|
|
72
|
+
results = if filters.empty?
|
|
73
|
+
fulltext_manager.search_by_text(query, language, limit)
|
|
74
|
+
else
|
|
75
|
+
fulltext_manager.search_with_filters(query, filters, {
|
|
76
|
+
language: language,
|
|
77
|
+
limit: limit
|
|
78
|
+
})
|
|
79
|
+
end
|
|
80
|
+
execution_time = ((Time.now - start_time) * 1000).round
|
|
81
|
+
|
|
82
|
+
# Format results with highlighting and metadata
|
|
83
|
+
formatted_results = format_search_results(results, options)
|
|
84
|
+
|
|
85
|
+
# Generate response
|
|
86
|
+
response = {
|
|
87
|
+
query: query,
|
|
88
|
+
query_info: query_info,
|
|
89
|
+
results: formatted_results,
|
|
90
|
+
metadata: {
|
|
91
|
+
total_count: results.length,
|
|
92
|
+
execution_time_ms: execution_time,
|
|
93
|
+
language: language,
|
|
94
|
+
has_more: results.length >= limit
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# Add spellcheck/suggestions if enabled
|
|
99
|
+
response[:suggestions] = generate_suggestions(query, language) if config[:enable_spellcheck] && results.empty?
|
|
100
|
+
|
|
101
|
+
# Log search completion
|
|
102
|
+
log_search(query, results.length, execution_time)
|
|
103
|
+
|
|
104
|
+
response
|
|
105
|
+
rescue ArgumentError => e
|
|
106
|
+
# Re-raise ArgumentError (validation errors) without wrapping
|
|
107
|
+
log_search(query, 0, 0, e.message)
|
|
108
|
+
raise e
|
|
109
|
+
rescue StandardError => e
|
|
110
|
+
@logger.error "Full-text search failed: #{e.message}"
|
|
111
|
+
@logger.error e.backtrace.join("\n")
|
|
112
|
+
log_search(query, 0, 0, e.message)
|
|
113
|
+
raise Errors::FulltextSearchServiceError, "Search failed: #{e.message}"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Quick search without metadata
|
|
117
|
+
# @param query [String] Search query
|
|
118
|
+
# @param limit [Integer] Result limit
|
|
119
|
+
# @return [Array] Simple result list
|
|
120
|
+
def quick_search(query, limit = 10)
|
|
121
|
+
results = fulltext_manager.search_by_text(query, nil, limit)
|
|
122
|
+
results.map { |r| simplify_result(r) }
|
|
123
|
+
rescue StandardError => e
|
|
124
|
+
@logger.error "Quick search failed: #{e.message}"
|
|
125
|
+
[]
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Search with highlighting
|
|
129
|
+
# @param query [String] Search query
|
|
130
|
+
# @param options [Hash] Search options
|
|
131
|
+
# @return [Hash] Results with highlighted snippets
|
|
132
|
+
def search_with_highlighting(query, options = {})
|
|
133
|
+
# Force highlighting on
|
|
134
|
+
options = options.merge(enable_highlighting: true)
|
|
135
|
+
search(query, options)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Advanced search with filter support
|
|
139
|
+
# @param query [String] Search query
|
|
140
|
+
# @param filters [Hash] Search filters
|
|
141
|
+
# @param options [Hash] Search options
|
|
142
|
+
# @return [Hash] Filtered search results
|
|
143
|
+
def advanced_search(query, filters, options = {})
|
|
144
|
+
options[:filters] = filters
|
|
145
|
+
search(query, options)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Multi-language search
|
|
149
|
+
# @param query [String] Search query
|
|
150
|
+
# @param languages [Array<String>] Target languages
|
|
151
|
+
# @param options [Hash] Search options
|
|
152
|
+
# @return [Hash] Results from all languages
|
|
153
|
+
def multilingual_search(query, languages, options = {})
|
|
154
|
+
all_results = []
|
|
155
|
+
total_time = 0
|
|
156
|
+
|
|
157
|
+
languages.each do |lang|
|
|
158
|
+
lang_results = search(query, options.merge(language: lang))
|
|
159
|
+
all_results.concat(lang_results[:results].map { |r| r.merge(language: lang) })
|
|
160
|
+
total_time += lang_results[:metadata][:execution_time_ms]
|
|
161
|
+
rescue StandardError => e
|
|
162
|
+
@logger.error "Search failed for language #{lang}: #{e.message}"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Sort by rank score across all languages
|
|
166
|
+
all_results.sort_by! { |r| -(r[:rank_score] || 0) }
|
|
167
|
+
|
|
168
|
+
# Apply limit
|
|
169
|
+
limit = options[:limit] || config[:default_limit]
|
|
170
|
+
all_results = all_results.first(limit)
|
|
171
|
+
|
|
172
|
+
{
|
|
173
|
+
query: query,
|
|
174
|
+
languages: languages,
|
|
175
|
+
results: all_results,
|
|
176
|
+
metadata: {
|
|
177
|
+
total_count: all_results.length,
|
|
178
|
+
execution_time_ms: total_time,
|
|
179
|
+
multilingual: true
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Search suggestions (auto-complete)
|
|
185
|
+
# @param prefix [String] Query prefix
|
|
186
|
+
# @param options [Hash] Options
|
|
187
|
+
# @return [Array] Suggestion list
|
|
188
|
+
def suggestions(prefix, options = {})
|
|
189
|
+
return [] if prefix.to_s.strip.length < 2
|
|
190
|
+
|
|
191
|
+
limit = options[:limit] || 10
|
|
192
|
+
language = options[:language] || config[:default_language]
|
|
193
|
+
|
|
194
|
+
# Simple implementation - in production, use a dedicated suggest index
|
|
195
|
+
suggestions = db[:section_fts]
|
|
196
|
+
.join(:source_sections, id: Sequel[:section_fts][:section_id])
|
|
197
|
+
.select(
|
|
198
|
+
Sequel[:source_sections][:content]
|
|
199
|
+
)
|
|
200
|
+
.where do
|
|
201
|
+
(Sequel[:section_fts][:language] =~ language) &
|
|
202
|
+
(Sequel[:source_sections][:content] =~ /#{prefix}/i)
|
|
203
|
+
end
|
|
204
|
+
.limit(limit * 10) # Get more to process
|
|
205
|
+
.map { |row| row[:content] }
|
|
206
|
+
|
|
207
|
+
# Extract words starting with prefix
|
|
208
|
+
words = suggestions.flat_map { |text| extract_words(text, prefix) }
|
|
209
|
+
|
|
210
|
+
# Count frequencies and return top suggestions
|
|
211
|
+
word_freq = words.group_by(&:downcase).transform_values(&:count)
|
|
212
|
+
word_freq
|
|
213
|
+
.sort_by { |_, count| -count }
|
|
214
|
+
.first(limit)
|
|
215
|
+
.map { |word, _| word }
|
|
216
|
+
rescue StandardError => e
|
|
217
|
+
@logger.error "Suggestions generation failed: #{e.message}"
|
|
218
|
+
[]
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Get search statistics
|
|
222
|
+
# @return [Hash] Search statistics
|
|
223
|
+
def statistics
|
|
224
|
+
{
|
|
225
|
+
total_indexed: fulltext_manager.stats[:total_indexed],
|
|
226
|
+
search_performance: get_performance_stats,
|
|
227
|
+
language_distribution: get_language_distribution,
|
|
228
|
+
popular_queries: get_popular_queries
|
|
229
|
+
}
|
|
230
|
+
rescue StandardError => e
|
|
231
|
+
@logger.error "Failed to get statistics: #{e.message}"
|
|
232
|
+
{}
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
private
|
|
236
|
+
|
|
237
|
+
# Validate search query
|
|
238
|
+
def validate_query(query)
|
|
239
|
+
return 'Query cannot be nil' if query.nil?
|
|
240
|
+
return 'Query cannot be empty' if query.strip.empty?
|
|
241
|
+
|
|
242
|
+
length = query.strip.length
|
|
243
|
+
if length < config[:min_search_length]
|
|
244
|
+
return "Query too short (minimum #{config[:min_search_length]} characters)"
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
if length > config[:max_search_length]
|
|
248
|
+
return "Query too long (maximum #{config[:max_search_length]} characters)"
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
nil
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Analyze query to extract metadata
|
|
255
|
+
def analyze_query(query)
|
|
256
|
+
@query_parser.parse_advanced_query(query)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Detect language for query
|
|
260
|
+
def detect_language(query)
|
|
261
|
+
@query_parser.detect_language(query)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Extract filters from options
|
|
265
|
+
def extract_filters(options)
|
|
266
|
+
filters = {}
|
|
267
|
+
filters[:document_ids] = options[:document_ids] if options[:document_ids]
|
|
268
|
+
filters[:tag_ids] = options[:tag_ids] if options[:tag_ids]
|
|
269
|
+
filters[:date_from] = options[:date_from] if options[:date_from]
|
|
270
|
+
filters[:date_to] = options[:date_to] if options[:date_to]
|
|
271
|
+
filters
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Format search results with metadata
|
|
275
|
+
def format_search_results(results, options)
|
|
276
|
+
results.map.with_index do |result, index|
|
|
277
|
+
formatted = {
|
|
278
|
+
section_id: result[:section_id],
|
|
279
|
+
rank_score: result[:rank_score],
|
|
280
|
+
rank: index + 1
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
# Add highlight if available and enabled
|
|
284
|
+
formatted[:highlight] = result[:highlight] if result[:highlight] && config[:enable_highlighting]
|
|
285
|
+
|
|
286
|
+
# Include content if requested
|
|
287
|
+
if options[:include_content]
|
|
288
|
+
section = get_section_content(result[:section_id])
|
|
289
|
+
formatted[:content] = section[:content]
|
|
290
|
+
formatted[:title] = section[:title]
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Include metadata if requested
|
|
294
|
+
if options[:include_metadata]
|
|
295
|
+
metadata = get_section_metadata(result[:section_id])
|
|
296
|
+
formatted.merge!(metadata)
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
formatted
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Get section content
|
|
304
|
+
def get_section_content(section_id)
|
|
305
|
+
@fulltext_manager.db[:source_sections]
|
|
306
|
+
.where(id: section_id)
|
|
307
|
+
.select(:content, :section_title)
|
|
308
|
+
.first || {}
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# Get section metadata
|
|
312
|
+
def get_section_metadata(section_id)
|
|
313
|
+
dataset = @fulltext_manager.db[:source_sections]
|
|
314
|
+
.where(Sequel[:source_sections][:id] => section_id)
|
|
315
|
+
.left_join(:source_documents, id: Sequel[:source_sections][:document_id])
|
|
316
|
+
.select(
|
|
317
|
+
Sequel[:source_documents][:id].as(:document_id),
|
|
318
|
+
Sequel[:source_documents][:title].as(:document_title),
|
|
319
|
+
Sequel[:source_documents][:author],
|
|
320
|
+
Sequel[:source_documents][:publication_date],
|
|
321
|
+
Sequel[:source_sections][:section_number],
|
|
322
|
+
Sequel[:source_documents][:metadata]
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
result = dataset.first
|
|
326
|
+
return {} unless result
|
|
327
|
+
|
|
328
|
+
metadata = {
|
|
329
|
+
document_id: result[:document_id],
|
|
330
|
+
document_title: result[:document_title],
|
|
331
|
+
author: result[:author],
|
|
332
|
+
publication_date: result[:publication_date],
|
|
333
|
+
section_number: result[:section_number]
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
if result[:metadata]
|
|
337
|
+
if result[:metadata].is_a?(String)
|
|
338
|
+
begin
|
|
339
|
+
parsed = JSON.parse(result[:metadata])
|
|
340
|
+
metadata.merge!(parsed) if parsed.is_a?(Hash)
|
|
341
|
+
rescue JSON::ParserError
|
|
342
|
+
# Ignore malformed metadata strings
|
|
343
|
+
end
|
|
344
|
+
elsif result[:metadata].is_a?(Hash)
|
|
345
|
+
metadata.merge!(result[:metadata])
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
metadata
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Simplify result for quick search
|
|
352
|
+
def simplify_result(result)
|
|
353
|
+
{
|
|
354
|
+
id: result[:section_id],
|
|
355
|
+
rank: result[:rank_score]
|
|
356
|
+
}
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# Generate search suggestions
|
|
360
|
+
def generate_suggestions(query, language)
|
|
361
|
+
# Simple implementation - find similar terms
|
|
362
|
+
suggestions = []
|
|
363
|
+
|
|
364
|
+
# Split query into words
|
|
365
|
+
words = query.strip.split(/\s+/)
|
|
366
|
+
|
|
367
|
+
words.each do |word|
|
|
368
|
+
next if word.length < 3
|
|
369
|
+
|
|
370
|
+
# Find similar terms in the index
|
|
371
|
+
similar = @fulltext_manager.db[:section_fts]
|
|
372
|
+
.join(:source_sections, id: Sequel[:section_fts][:section_id])
|
|
373
|
+
.select(
|
|
374
|
+
Sequel.function(:substring, Sequel[:source_sections][:content],
|
|
375
|
+
/\b#{word[0..3]}\w*/i).as(:term)
|
|
376
|
+
)
|
|
377
|
+
.where(Sequel[:section_fts][:language] =~ language)
|
|
378
|
+
.map { |row| row[:term] }
|
|
379
|
+
.compact
|
|
380
|
+
.uniq
|
|
381
|
+
|
|
382
|
+
suggestions.concat(similar)
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
suggestions.uniq.first(3)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Extract words starting with prefix
|
|
389
|
+
def extract_words(text, prefix)
|
|
390
|
+
# Find word boundaries
|
|
391
|
+
words = text.scan(/\b\w+/)
|
|
392
|
+
words.select { |w| w.downcase.start_with?(prefix.downcase) }
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Get performance statistics
|
|
396
|
+
def get_performance_stats
|
|
397
|
+
{
|
|
398
|
+
average_response_time: 0,
|
|
399
|
+
slowest_queries: [],
|
|
400
|
+
total_searches: 0
|
|
401
|
+
}
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
# Get language distribution
|
|
405
|
+
def get_language_distribution
|
|
406
|
+
@fulltext_manager.db[:section_fts]
|
|
407
|
+
.select(:language, Sequel.function(:count, '*').as(:count))
|
|
408
|
+
.group(:language)
|
|
409
|
+
.map { |row| { language: row[:language], count: row[:count] } }
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Get popular search queries
|
|
413
|
+
def get_popular_queries
|
|
414
|
+
@fulltext_manager.db[:search_logs]
|
|
415
|
+
.select(:query, Sequel.function(:count, '*').as(:count))
|
|
416
|
+
.where(Sequel[:created_at] > (Time.now - 86_400)) # Last 24 hours
|
|
417
|
+
.group(:query)
|
|
418
|
+
.order(Sequel.desc(:count))
|
|
419
|
+
.limit(10)
|
|
420
|
+
.map { |row| { query: row[:query], count: row[:count] } }
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
# Log search query
|
|
424
|
+
def log_search(query, result_count, execution_time, error = nil)
|
|
425
|
+
# Skip logging validation errors (nil/empty queries)
|
|
426
|
+
return if query.nil? || query.to_s.strip.empty?
|
|
427
|
+
|
|
428
|
+
begin
|
|
429
|
+
# Skip logging if database or fulltext_manager is not available
|
|
430
|
+
return unless @fulltext_manager && @fulltext_manager.respond_to?(:db) && @fulltext_manager.db
|
|
431
|
+
|
|
432
|
+
# Build insert hash without error_message column (not in migration)
|
|
433
|
+
log_data = {
|
|
434
|
+
query: query.to_s,
|
|
435
|
+
search_type: 'fulltext',
|
|
436
|
+
execution_time_ms: execution_time,
|
|
437
|
+
results_count: result_count,
|
|
438
|
+
created_at: Sequel::CURRENT_TIMESTAMP
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
# Only add filters if we have error (but format differently for existing columns)
|
|
442
|
+
log_data[:filters] = { error: error }.to_json if error
|
|
443
|
+
|
|
444
|
+
@fulltext_manager.db[:search_logs].insert(log_data) if fulltext_manager.db[:search_logs]
|
|
445
|
+
rescue StandardError => e
|
|
446
|
+
@logger.error "Failed to log search: #{e.message}"
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# Custom errors
|
|
452
|
+
module Errors
|
|
453
|
+
class FulltextSearchServiceError < StandardError; end
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
end
|