smart_rag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +33 -0
- data/README.en.md +115 -0
- data/README.md +144 -0
- data/config/database.yml +42 -0
- data/config/fulltext_search.yml +111 -0
- data/config/llm_config.yml +15 -0
- data/config/smart_rag.yml +156 -0
- data/db/fix_search_issues.sql +81 -0
- data/db/migrations/001_create_source_documents.rb +26 -0
- data/db/migrations/002_create_source_sections.rb +20 -0
- data/db/migrations/003_create_tags.rb +17 -0
- data/db/migrations/004_create_research_topics.rb +16 -0
- data/db/migrations/005_create_relationship_tables.rb +42 -0
- data/db/migrations/006_create_text_search_configs.rb +28 -0
- data/db/migrations/007_create_section_fts.rb +109 -0
- data/db/migrations/008_create_embeddings.rb +28 -0
- data/db/migrations/009_create_search_logs.rb +30 -0
- data/db/migrations/010_add_metadata_to_source_documents.rb +10 -0
- data/db/migrations/011_add_source_fields_to_source_documents.rb +23 -0
- data/db/rebuild_fts_complete.sql +51 -0
- data/db/seeds/text_search_configs.sql +28 -0
- data/examples/01_quick_start.rb +32 -0
- data/examples/02_document_management.rb +41 -0
- data/examples/03_search_operations.rb +46 -0
- data/examples/04_topics_and_tags.rb +38 -0
- data/examples/05_advanced_patterns.rb +154 -0
- data/examples/06_error_handling_and_retry.rb +64 -0
- data/examples/README.md +42 -0
- data/examples/common.rb +57 -0
- data/lib/smart_rag/chunker/markdown_chunker.rb +315 -0
- data/lib/smart_rag/config.rb +126 -0
- data/lib/smart_rag/core/document_processor.rb +537 -0
- data/lib/smart_rag/core/embedding.rb +340 -0
- data/lib/smart_rag/core/fulltext_manager.rb +483 -0
- data/lib/smart_rag/core/markitdown_bridge.rb +85 -0
- data/lib/smart_rag/core/query_processor.rb +577 -0
- data/lib/smart_rag/errors.rb +88 -0
- data/lib/smart_rag/models/embedding.rb +140 -0
- data/lib/smart_rag/models/model_base.rb +106 -0
- data/lib/smart_rag/models/research_topic.rb +171 -0
- data/lib/smart_rag/models/research_topic_section.rb +86 -0
- data/lib/smart_rag/models/research_topic_tag.rb +89 -0
- data/lib/smart_rag/models/search_log.rb +198 -0
- data/lib/smart_rag/models/section_fts.rb +170 -0
- data/lib/smart_rag/models/section_tag.rb +81 -0
- data/lib/smart_rag/models/source_document.rb +204 -0
- data/lib/smart_rag/models/source_section.rb +201 -0
- data/lib/smart_rag/models/tag.rb +214 -0
- data/lib/smart_rag/models/text_search_config.rb +168 -0
- data/lib/smart_rag/models.rb +116 -0
- data/lib/smart_rag/parsers/query_parser.rb +291 -0
- data/lib/smart_rag/retrieve.rb +745 -0
- data/lib/smart_rag/services/embedding_service.rb +278 -0
- data/lib/smart_rag/services/fulltext_search_service.rb +456 -0
- data/lib/smart_rag/services/hybrid_search_service.rb +768 -0
- data/lib/smart_rag/services/summarization_service.rb +322 -0
- data/lib/smart_rag/services/tag_service.rb +614 -0
- data/lib/smart_rag/services/vector_search_service.rb +347 -0
- data/lib/smart_rag/smart_chunking/chunk.rb +10 -0
- data/lib/smart_rag/smart_chunking/media_context.rb +9 -0
- data/lib/smart_rag/smart_chunking/merger.rb +94 -0
- data/lib/smart_rag/smart_chunking/parser.rb +75 -0
- data/lib/smart_rag/smart_chunking/pipeline.rb +45 -0
- data/lib/smart_rag/smart_chunking/section.rb +11 -0
- data/lib/smart_rag/smart_chunking/structure_detector.rb +31 -0
- data/lib/smart_rag/smart_chunking/tokenizer.rb +24 -0
- data/lib/smart_rag/version.rb +3 -0
- data/lib/smart_rag.rb +986 -0
- data/workers/analyze_content.rb +6 -0
- data/workers/get_embedding.rb +7 -0
- metadata +311 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
require_relative 'model_base'
|
|
2
|
+
require 'sequel/plugins/validation_helpers'
|
|
3
|
+
|
|
4
|
+
module SmartRAG
|
|
5
|
+
module Models
|
|
6
|
+
# Embedding model for storing vector embeddings of document sections
|
|
7
|
+
class Embedding < Sequel::Model
|
|
8
|
+
include FactoryBotHelpers
|
|
9
|
+
plugin :validation_helpers
|
|
10
|
+
plugin :timestamps
|
|
11
|
+
|
|
12
|
+
# Set dataset after database is connected
|
|
13
|
+
def self.set_dataset_from_db
|
|
14
|
+
set_dataset(:embeddings)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Add bang methods for FactoryBot compatibility
|
|
18
|
+
def self.create!(attributes = {})
|
|
19
|
+
instance = new(attributes)
|
|
20
|
+
instance.save! || raise(Sequel::ValidationFailed, instance.errors.full_messages.join(', '))
|
|
21
|
+
instance
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Relationships
|
|
25
|
+
many_to_one :section, class: '::SmartRAG::Models::SourceSection', key: :source_id
|
|
26
|
+
|
|
27
|
+
# Validation
|
|
28
|
+
def validate
|
|
29
|
+
super
|
|
30
|
+
validates_presence %i[source_id vector]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Class methods
|
|
34
|
+
class << self
|
|
35
|
+
# Find embeddings by source section
|
|
36
|
+
def by_section(section_id)
|
|
37
|
+
where(source_id: section_id).all
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Find embeddings by multiple sections
|
|
41
|
+
def by_sections(section_ids)
|
|
42
|
+
where(source_id: section_ids).all
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Find similar embeddings using cosine distance (pgvector)
|
|
46
|
+
def similar_to(query_vector, limit: 10, threshold: 0.3)
|
|
47
|
+
server_version = db.server_version
|
|
48
|
+
|
|
49
|
+
# Format vector for pgvector
|
|
50
|
+
formatted_vector = if query_vector.is_a?(Array)
|
|
51
|
+
"[#{query_vector.join(',')}]"
|
|
52
|
+
else
|
|
53
|
+
query_vector.to_s
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
distance_threshold = 1 - threshold
|
|
57
|
+
|
|
58
|
+
dataset = if server_version >= 120_000 # PostgreSQL 12+
|
|
59
|
+
where(Sequel.lit('(vector <=> ?) < ?', formatted_vector, distance_threshold))
|
|
60
|
+
.order(Sequel.lit('vector <=> ?', formatted_vector))
|
|
61
|
+
.limit(limit)
|
|
62
|
+
else
|
|
63
|
+
where(Sequel.lit('cosine_distance(vector, ?) < ?', formatted_vector, distance_threshold))
|
|
64
|
+
.order(Sequel.lit('cosine_distance(vector, ?)', formatted_vector))
|
|
65
|
+
.limit(limit)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
dataset.all
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Find nearest embeddings without a threshold fallback
|
|
72
|
+
def nearest_to(query_vector, limit: 10)
|
|
73
|
+
server_version = db.server_version
|
|
74
|
+
|
|
75
|
+
formatted_vector = if query_vector.is_a?(Array)
|
|
76
|
+
"[#{query_vector.join(',')}]"
|
|
77
|
+
else
|
|
78
|
+
query_vector.to_s
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
dataset = if server_version >= 120_000 # PostgreSQL 12+
|
|
82
|
+
order(Sequel.lit('vector <=> ?', formatted_vector))
|
|
83
|
+
.limit(limit)
|
|
84
|
+
else
|
|
85
|
+
order(Sequel.lit('cosine_distance(vector, ?)', formatted_vector))
|
|
86
|
+
.limit(limit)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
dataset.all
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Batch insert embeddings
|
|
93
|
+
def batch_insert(embedding_data)
|
|
94
|
+
db.transaction do
|
|
95
|
+
dataset.multi_insert(embedding_data)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Delete embeddings by section
|
|
100
|
+
def delete_by_section(section_id)
|
|
101
|
+
where(source_id: section_id).delete
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Delete old embeddings (cleanup)
|
|
105
|
+
def delete_old_embeddings(days: 30)
|
|
106
|
+
where(Sequel.lit('created_at < ?', Time.now - (days * 24 * 60 * 60))).delete
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Instance methods
|
|
111
|
+
|
|
112
|
+
# Return vector as array of floats
|
|
113
|
+
def vector_array
|
|
114
|
+
return nil unless vector
|
|
115
|
+
|
|
116
|
+
# Convert pgvector to array
|
|
117
|
+
vector.to_s.gsub(/[<>]/, '').split(',').map(&:to_f)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Calculate similarity to another vector
|
|
121
|
+
def similarity_to(other_vector)
|
|
122
|
+
vector_array = self.vector_array
|
|
123
|
+
vector_array_cosine_similarity(vector_array, other_vector)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
private
|
|
127
|
+
|
|
128
|
+
def vector_array_cosine_similarity(v1, v2)
|
|
129
|
+
return 0.0 if v1.nil? || v2.nil? || v1.empty? || v2.empty?
|
|
130
|
+
|
|
131
|
+
dot_product = v1.zip(v2).map { |a, b| a * b }.sum
|
|
132
|
+
magnitude1 = Math.sqrt(v1.map { |x| x * x }.sum)
|
|
133
|
+
magnitude2 = Math.sqrt(v2.map { |x| x * x }.sum)
|
|
134
|
+
return 0.0 if magnitude1 == 0 || magnitude2 == 0
|
|
135
|
+
|
|
136
|
+
dot_product / (magnitude1 * magnitude2)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
require 'sequel'
|
|
2
|
+
|
|
3
|
+
module SmartRAG
|
|
4
|
+
module Models
|
|
5
|
+
# Module to add FactoryBot compatibility methods to Sequel models
|
|
6
|
+
module FactoryBotHelpers
|
|
7
|
+
# Save! method for compatibility with FactoryBot and ActiveRecord style
|
|
8
|
+
def save!(*args)
|
|
9
|
+
save(*args) || raise(Sequel::ValidationFailed, errors.full_messages.join(', '))
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Create! class method for compatibility with FactoryBot
|
|
13
|
+
def self.included(base)
|
|
14
|
+
base.class_eval do
|
|
15
|
+
def self.create!(attributes = {})
|
|
16
|
+
instance = new(attributes)
|
|
17
|
+
instance.save! || raise(Sequel::ValidationFailed, instance.errors.full_messages.join(', '))
|
|
18
|
+
instance
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def to_hash
|
|
24
|
+
values
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def to_json(*args)
|
|
28
|
+
to_hash.to_json(*args)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Stub database class that allows models to be loaded without connection
|
|
33
|
+
class DelayedConnection
|
|
34
|
+
# Allow any method calls during model class definition
|
|
35
|
+
def method_missing(method_name, *args, &block)
|
|
36
|
+
# Return a stub object that accepts any method call
|
|
37
|
+
# This allows Sequel's internal setup to proceed
|
|
38
|
+
StubDataset.new(self)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
42
|
+
true # Pretend to respond to everything
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def kind_of?(other)
|
|
46
|
+
other == Sequel::Database || super
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Sequel needs these methods during model class definition
|
|
50
|
+
def schema(*args)
|
|
51
|
+
[] # Return empty schema
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def tables(*args)
|
|
55
|
+
[] # Return empty tables list
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def transaction(*args)
|
|
59
|
+
yield
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def class_scope(*args)
|
|
63
|
+
self
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def from(*args)
|
|
67
|
+
StubDataset.new(self)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Stub dataset class for delayed connection
|
|
72
|
+
class StubDataset
|
|
73
|
+
def initialize(db)
|
|
74
|
+
@db = db
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def method_missing(method_name, *args, **kwargs, &block)
|
|
78
|
+
# Return self to chain calls
|
|
79
|
+
self
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
83
|
+
true
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def clone(*args, **kwargs)
|
|
87
|
+
self
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Set a stub database connection so Sequel::Model subclasses can be defined
|
|
92
|
+
# This will be replaced with a real connection when SmartRAG::Models.db= is called
|
|
93
|
+
begin
|
|
94
|
+
Sequel::Model.db
|
|
95
|
+
rescue Sequel::Error
|
|
96
|
+
# No database set yet, set our stub connection
|
|
97
|
+
Sequel::Model.db = DelayedConnection.new
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Base class placeholder - models inherit directly from Sequel::Model
|
|
101
|
+
# This class is just for organization and documentation
|
|
102
|
+
module ModelBase
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
require_relative "model_base"
|
|
2
|
+
require "sequel/plugins/validation_helpers"
|
|
3
|
+
|
|
4
|
+
module SmartRAG
|
|
5
|
+
module Models
|
|
6
|
+
# ResearchTopic model for organizing content by topics
|
|
7
|
+
class ResearchTopic < Sequel::Model
|
|
8
|
+
# Set dataset after database is connected
|
|
9
|
+
def self.set_dataset_from_db
|
|
10
|
+
set_dataset(:research_topics)
|
|
11
|
+
end
|
|
12
|
+
include FactoryBotHelpers
|
|
13
|
+
plugin :validation_helpers
|
|
14
|
+
plugin :timestamps, update_on_create: true
|
|
15
|
+
|
|
16
|
+
# Add bang methods for FactoryBot compatibility
|
|
17
|
+
def self.create!(attributes = {})
|
|
18
|
+
instance = new(attributes)
|
|
19
|
+
instance.save! || raise(Sequel::ValidationFailed, instance.errors.full_messages.join(", "))
|
|
20
|
+
instance
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Relationships
|
|
24
|
+
one_to_many :research_topic_sections, class: '::SmartRAG::Models::ResearchTopicSection', key: :research_topic_id
|
|
25
|
+
one_to_many :research_topic_tags, class: '::SmartRAG::Models::ResearchTopicTag', key: :research_topic_id
|
|
26
|
+
many_to_many :sections, class: '::SmartRAG::Models::SourceSection',
|
|
27
|
+
join_table: :research_topic_sections,
|
|
28
|
+
left_key: :research_topic_id,
|
|
29
|
+
right_key: :section_id
|
|
30
|
+
many_to_many :tags, class: '::SmartRAG::Models::Tag',
|
|
31
|
+
join_table: :research_topic_tags,
|
|
32
|
+
left_key: :research_topic_id,
|
|
33
|
+
right_key: :tag_id
|
|
34
|
+
|
|
35
|
+
# Validation
|
|
36
|
+
def validate
|
|
37
|
+
super
|
|
38
|
+
validates_presence :name
|
|
39
|
+
validates_max_length 500, :name
|
|
40
|
+
validates_presence :description, allow_nil: true
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Class methods
|
|
44
|
+
class << self
|
|
45
|
+
# Find topic by name
|
|
46
|
+
def find_by_name(name)
|
|
47
|
+
where(Sequel.ilike(:name, name)).first
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Search topics by name or description
|
|
51
|
+
def search(query)
|
|
52
|
+
where(Sequel.ilike(:name, "%#{query}%"))
|
|
53
|
+
.or(Sequel.ilike(:description, "%#{query}%"))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Get topics with section count
|
|
57
|
+
def with_section_count
|
|
58
|
+
select(Sequel[:research_topics].*).select_append(Sequel.function(:count, :research_topic_sections__section_id).as(:section_count))
|
|
59
|
+
.left_join(:research_topic_sections, research_topic_id: :id)
|
|
60
|
+
.group(:research_topics__id)
|
|
61
|
+
.order(Sequel.desc(:section_count))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Get topics by tag
|
|
65
|
+
def by_tag(tag_id)
|
|
66
|
+
where(id: db[:research_topic_tags].select(:research_topic_id).where(tag_id: tag_id))
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Get recently used topics
|
|
70
|
+
def recent(limit: 10)
|
|
71
|
+
order(Sequel.desc(:created_at)).limit(limit)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Batch create topics
|
|
75
|
+
def batch_create(topics)
|
|
76
|
+
db.transaction do
|
|
77
|
+
topics.map { |topic_data| create(topic_data) }
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Instance methods
|
|
83
|
+
|
|
84
|
+
# Add section to topic
|
|
85
|
+
def add_section(section)
|
|
86
|
+
unless sections.include?(section)
|
|
87
|
+
self.add_section(section)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Remove section from topic
|
|
92
|
+
def remove_section(section)
|
|
93
|
+
if sections.include?(section)
|
|
94
|
+
self.remove_section(section)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Add tag to topic
|
|
99
|
+
def add_tag(tag)
|
|
100
|
+
unless tags.include?(tag)
|
|
101
|
+
self.add_tag(tag)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Remove tag from topic
|
|
106
|
+
def remove_tag(tag)
|
|
107
|
+
if tags.include?(tag)
|
|
108
|
+
self.remove_tag(tag)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Count sections for this topic
|
|
113
|
+
def section_count
|
|
114
|
+
sections.count
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Count tags for this topic
|
|
118
|
+
def tag_count
|
|
119
|
+
tags.count
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Get related topics (share sections or tags)
|
|
123
|
+
def related_topics(limit: 5)
|
|
124
|
+
topic_ids = db[:research_topic_sections]
|
|
125
|
+
.select(:research_topic_id)
|
|
126
|
+
.where(section_id: sections.map(&:id))
|
|
127
|
+
.where.not(research_topic_id: id)
|
|
128
|
+
.group(:research_topic_id)
|
|
129
|
+
.order(Sequel.desc(Sequel.function(:count, :*)))
|
|
130
|
+
.limit(limit)
|
|
131
|
+
|
|
132
|
+
self.class.where(id: topic_ids).all
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Get topics info
|
|
136
|
+
def info
|
|
137
|
+
{
|
|
138
|
+
id: id,
|
|
139
|
+
name: name,
|
|
140
|
+
section_count: section_count,
|
|
141
|
+
tag_count: tag_count,
|
|
142
|
+
created_at: created_at
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# String representation
|
|
147
|
+
def to_s
|
|
148
|
+
"<ResearchTopic: #{id} - #{name} (#{section_count} sections, #{tag_count} tags)>"
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Alias name as title for API compatibility
|
|
152
|
+
def title
|
|
153
|
+
name
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def title=(value)
|
|
157
|
+
self.name = value
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Alias created_at as updated_at for API compatibility
|
|
161
|
+
def updated_at
|
|
162
|
+
@updated_at || created_at
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Allow updated_at= for API compatibility (stored in memory only, not DB)
|
|
166
|
+
def updated_at=(value)
|
|
167
|
+
@updated_at = value
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
require_relative "model_base"
|
|
2
|
+
require "sequel/plugins/validation_helpers"
|
|
3
|
+
|
|
4
|
+
module SmartRAG
|
|
5
|
+
module Models
|
|
6
|
+
# ResearchTopicSection model for many-to-many relationship
|
|
7
|
+
class ResearchTopicSection < Sequel::Model(:research_topic_sections)
|
|
8
|
+
include FactoryBotHelpers
|
|
9
|
+
plugin :validation_helpers
|
|
10
|
+
plugin :timestamps, update_on_create: false
|
|
11
|
+
|
|
12
|
+
# Allow mass assignment of primary keys
|
|
13
|
+
unrestrict_primary_key
|
|
14
|
+
|
|
15
|
+
# Add bang methods for FactoryBot compatibility
|
|
16
|
+
def self.create!(attributes = {})
|
|
17
|
+
instance = new(attributes)
|
|
18
|
+
instance.save! || raise(Sequel::ValidationFailed, instance.errors.full_messages.join(", "))
|
|
19
|
+
instance
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Relationships
|
|
23
|
+
many_to_one :research_topic, class: '::SmartRAG::Models::ResearchTopic', key: :research_topic_id
|
|
24
|
+
many_to_one :section, class: '::SmartRAG::Models::SourceSection', key: :section_id
|
|
25
|
+
|
|
26
|
+
# Validation
|
|
27
|
+
def validate
|
|
28
|
+
super
|
|
29
|
+
validates_presence [:research_topic_id, :section_id]
|
|
30
|
+
validates_unique [:research_topic_id, :section_id]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Class methods
|
|
34
|
+
class << self
|
|
35
|
+
# Find by topic and section
|
|
36
|
+
def find_by_topic_and_section(topic_id, section_id)
|
|
37
|
+
where(research_topic_id: topic_id, section_id: section_id).first
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Get all sections for a topic
|
|
41
|
+
def sections_for_topic(topic_id)
|
|
42
|
+
where(research_topic_id: topic_id).all
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Get all topics for a section
|
|
46
|
+
def topics_for_section(section_id)
|
|
47
|
+
where(section_id: section_id).all
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Delete all sections for a topic
|
|
51
|
+
def delete_all_for_topic(topic_id)
|
|
52
|
+
where(research_topic_id: topic_id).delete
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Delete all topics for a section
|
|
56
|
+
def delete_all_for_section(section_id)
|
|
57
|
+
where(section_id: section_id).delete
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Bulk create associations
|
|
61
|
+
def bulk_create(associations)
|
|
62
|
+
db.transaction do
|
|
63
|
+
dataset.multi_insert(associations)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Check if section belongs to topic
|
|
68
|
+
def in_topic?(topic_id, section_id)
|
|
69
|
+
where(research_topic_id: topic_id, section_id: section_id).count > 0
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Get recent associations
|
|
73
|
+
def recent(limit: 50)
|
|
74
|
+
order(Sequel.desc(:created_at)).limit(limit)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Instance methods
|
|
79
|
+
|
|
80
|
+
# String representation
|
|
81
|
+
def to_s
|
|
82
|
+
"<ResearchTopicSection: topic:#{research_topic_id} => section:#{section_id}>"
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
require_relative "model_base"
|
|
2
|
+
require "sequel/plugins/validation_helpers"
|
|
3
|
+
|
|
4
|
+
module SmartRAG
|
|
5
|
+
module Models
|
|
6
|
+
# ResearchTopicTag model for many-to-many relationship between topics and tags
|
|
7
|
+
class ResearchTopicTag < Sequel::Model(:research_topic_tags)
|
|
8
|
+
include FactoryBotHelpers
|
|
9
|
+
plugin :validation_helpers
|
|
10
|
+
plugin :timestamps, update_on_create: false
|
|
11
|
+
# Allow mass assignment of composite primary key
|
|
12
|
+
unrestrict_primary_key
|
|
13
|
+
|
|
14
|
+
# Add bang methods for FactoryBot compatibility
|
|
15
|
+
def self.create!(attributes = {})
|
|
16
|
+
instance = new(attributes)
|
|
17
|
+
instance.save! || raise(Sequel::ValidationFailed, instance.errors.full_messages.join(", "))
|
|
18
|
+
instance
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Relationships
|
|
22
|
+
many_to_one :research_topic, class: '::SmartRAG::Models::ResearchTopic', key: :research_topic_id
|
|
23
|
+
many_to_one :tag, class: '::SmartRAG::Models::Tag', key: :tag_id
|
|
24
|
+
|
|
25
|
+
# Validation
|
|
26
|
+
def validate
|
|
27
|
+
super
|
|
28
|
+
validates_presence [:research_topic_id, :tag_id]
|
|
29
|
+
validates_unique [:research_topic_id, :tag_id]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Class methods
|
|
33
|
+
class << self
|
|
34
|
+
# Find by topic and tag
|
|
35
|
+
def find_by_topic_and_tag(topic_id, tag_id)
|
|
36
|
+
where(research_topic_id: topic_id, tag_id: tag_id).first
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Get all tags for a topic
|
|
40
|
+
def tags_for_topic(topic_id)
|
|
41
|
+
where(research_topic_id: topic_id).all
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Get all topics for a tag
|
|
45
|
+
def topics_for_tag(tag_id)
|
|
46
|
+
where(tag_id: tag_id).all
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Delete all tags for a topic
|
|
50
|
+
def delete_all_for_topic(topic_id)
|
|
51
|
+
where(research_topic_id: topic_id).delete
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Delete all topics for a tag
|
|
55
|
+
def delete_all_for_tag(tag_id)
|
|
56
|
+
where(tag_id: tag_id).delete
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Bulk create associations
|
|
60
|
+
def bulk_create(associations)
|
|
61
|
+
db.transaction do
|
|
62
|
+
dataset.multi_insert(associations)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Check if topic has a specific tag
|
|
67
|
+
def has_tag?(topic_id, tag_id)
|
|
68
|
+
where(research_topic_id: topic_id, tag_id: tag_id).count > 0
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Get popular tags for topics
|
|
72
|
+
def popular_tags(limit: 20)
|
|
73
|
+
db[:research_topic_tags]
|
|
74
|
+
.select(:tag_id, Sequel.function(:count, :research_topic_id).as(:topic_count))
|
|
75
|
+
.group(:tag_id)
|
|
76
|
+
.order(Sequel.desc(:topic_count))
|
|
77
|
+
.limit(limit)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Instance methods
|
|
82
|
+
|
|
83
|
+
# String representation
|
|
84
|
+
def to_s
|
|
85
|
+
"<ResearchTopicTag: topic:#{research_topic_id} => tag:#{tag_id}>"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|