htm 0.0.2 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +294 -26
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +280 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +125 -15
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class HTM
|
|
4
|
+
module Models
|
|
5
|
+
# FileSource model - tracks loaded source files
|
|
6
|
+
#
|
|
7
|
+
# Represents a file that has been loaded into HTM with its metadata.
|
|
8
|
+
# Each file can have multiple associated nodes (chunks).
|
|
9
|
+
#
|
|
10
|
+
# @example Find source by path
|
|
11
|
+
# source = FileSource.by_path('/path/to/doc.md').first
|
|
12
|
+
# source.chunks # => [Node, Node, ...]
|
|
13
|
+
#
|
|
14
|
+
# @example Check if re-sync needed
|
|
15
|
+
# current_mtime = File.mtime('/path/to/doc.md')
|
|
16
|
+
# source.needs_sync?(current_mtime) # => true/false
|
|
17
|
+
#
|
|
18
|
+
class FileSource < ActiveRecord::Base
|
|
19
|
+
self.table_name = 'file_sources'
|
|
20
|
+
|
|
21
|
+
# Tolerance for mtime comparison to avoid false positives from
|
|
22
|
+
# precision differences between filesystem and database timestamps
|
|
23
|
+
DELTA_TIME = 5 # seconds
|
|
24
|
+
|
|
25
|
+
# Associations
|
|
26
|
+
has_many :nodes, class_name: 'HTM::Models::Node',
|
|
27
|
+
foreign_key: :source_id, dependent: :nullify
|
|
28
|
+
|
|
29
|
+
# Validations
|
|
30
|
+
validates :file_path, presence: true, uniqueness: true
|
|
31
|
+
|
|
32
|
+
# Scopes
|
|
33
|
+
scope :by_path, ->(path) { where(file_path: File.expand_path(path)) }
|
|
34
|
+
scope :stale, -> { where('mtime < last_synced_at') }
|
|
35
|
+
scope :recently_synced, -> { order(last_synced_at: :desc) }
|
|
36
|
+
|
|
37
|
+
# Check if file needs re-sync based on mtime
|
|
38
|
+
#
|
|
39
|
+
# Uses DELTA_TIME tolerance to avoid false positives from:
|
|
40
|
+
# - Nanosecond/microsecond precision differences (filesystem vs PostgreSQL)
|
|
41
|
+
# - Floating-point rounding errors
|
|
42
|
+
# - Minor timestamp discrepancies across systems
|
|
43
|
+
#
|
|
44
|
+
# @param current_mtime [Time] Current file modification time
|
|
45
|
+
# @return [Boolean] true if file modification time differs by more than DELTA_TIME
|
|
46
|
+
#
|
|
47
|
+
def needs_sync?(current_mtime)
|
|
48
|
+
return true if mtime.nil?
|
|
49
|
+
|
|
50
|
+
(current_mtime.to_i - mtime.to_i).abs > DELTA_TIME
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Get ordered chunks from this file
|
|
54
|
+
#
|
|
55
|
+
# @return [ActiveRecord::Relation] Nodes ordered by chunk_position
|
|
56
|
+
#
|
|
57
|
+
def chunks
|
|
58
|
+
nodes.order(:chunk_position)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Extract tags from frontmatter
|
|
62
|
+
#
|
|
63
|
+
# @return [Array<String>] Tag names from frontmatter 'tags' field
|
|
64
|
+
#
|
|
65
|
+
def frontmatter_tags
|
|
66
|
+
return [] unless frontmatter.is_a?(Hash)
|
|
67
|
+
|
|
68
|
+
tags = frontmatter['tags'] || frontmatter[:tags] || []
|
|
69
|
+
Array(tags).map(&:to_s)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Get title from frontmatter
|
|
73
|
+
#
|
|
74
|
+
# @return [String, nil] Title from frontmatter
|
|
75
|
+
#
|
|
76
|
+
def title
|
|
77
|
+
return nil unless frontmatter.is_a?(Hash)
|
|
78
|
+
frontmatter['title'] || frontmatter[:title]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Get author from frontmatter
|
|
82
|
+
#
|
|
83
|
+
# @return [String, nil] Author from frontmatter
|
|
84
|
+
#
|
|
85
|
+
def author
|
|
86
|
+
return nil unless frontmatter.is_a?(Hash)
|
|
87
|
+
frontmatter['author'] || frontmatter[:author]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Soft delete all chunks from this file
|
|
91
|
+
#
|
|
92
|
+
# @return [Integer] Number of chunks soft-deleted
|
|
93
|
+
#
|
|
94
|
+
def soft_delete_chunks!
|
|
95
|
+
nodes.update_all(deleted_at: Time.current)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
data/lib/htm/models/node.rb
CHANGED
|
@@ -29,13 +29,16 @@ class HTM
|
|
|
29
29
|
has_many :node_tags, class_name: 'HTM::Models::NodeTag', dependent: :destroy
|
|
30
30
|
has_many :tags, through: :node_tags, class_name: 'HTM::Models::Tag'
|
|
31
31
|
|
|
32
|
+
# Optional source file association (for nodes loaded from files)
|
|
33
|
+
belongs_to :file_source, class_name: 'HTM::Models::FileSource',
|
|
34
|
+
foreign_key: :source_id, optional: true
|
|
35
|
+
|
|
32
36
|
# Neighbor - vector similarity search
|
|
33
37
|
has_neighbors :embedding
|
|
34
38
|
|
|
35
39
|
# Validations
|
|
36
40
|
validates :content, presence: true
|
|
37
41
|
validates :content_hash, presence: true, uniqueness: true
|
|
38
|
-
validates :embedding_dimension, numericality: { greater_than: 0, less_than_or_equal_to: 2000 }, allow_nil: true
|
|
39
42
|
|
|
40
43
|
# Callbacks
|
|
41
44
|
before_validation :set_content_hash, if: -> { content_hash.blank? && content.present? }
|
|
@@ -43,13 +46,32 @@ class HTM
|
|
|
43
46
|
before_save :update_timestamps
|
|
44
47
|
|
|
45
48
|
# Scopes
|
|
49
|
+
# Soft delete - by default, only show non-deleted nodes
|
|
50
|
+
default_scope { where(deleted_at: nil) }
|
|
51
|
+
|
|
46
52
|
scope :by_robot, ->(robot_id) { joins(:robot_nodes).where(robot_nodes: { robot_id: robot_id }) }
|
|
47
53
|
scope :recent, -> { order(created_at: :desc) }
|
|
48
54
|
scope :in_timeframe, ->(start_time, end_time) { where(created_at: start_time..end_time) }
|
|
49
55
|
scope :with_embeddings, -> { where.not(embedding: nil) }
|
|
56
|
+
scope :from_source, ->(source_id) { where(source_id: source_id).order(:chunk_position) }
|
|
57
|
+
|
|
58
|
+
# Soft delete scopes
|
|
59
|
+
scope :deleted, -> { unscoped.where.not(deleted_at: nil) }
|
|
60
|
+
scope :with_deleted, -> { unscoped }
|
|
61
|
+
scope :deleted_before, ->(time) { deleted.where('deleted_at < ?', time) }
|
|
50
62
|
|
|
51
63
|
# Class methods
|
|
52
64
|
|
|
65
|
+
# Permanently delete all soft-deleted nodes older than the specified time
|
|
66
|
+
#
|
|
67
|
+
# @param older_than [Time, ActiveSupport::Duration] Delete nodes soft-deleted before this time
|
|
68
|
+
# Can be a Time object or a duration like 30.days.ago
|
|
69
|
+
# @return [Integer] Number of nodes permanently deleted
|
|
70
|
+
#
|
|
71
|
+
def self.purge_deleted(older_than:)
|
|
72
|
+
deleted_before(older_than).destroy_all.count
|
|
73
|
+
end
|
|
74
|
+
|
|
53
75
|
# Find a node by content hash, or return nil
|
|
54
76
|
#
|
|
55
77
|
# @param content [String] The content to search for
|
|
@@ -91,19 +113,43 @@ class HTM
|
|
|
91
113
|
query_embedding = other.is_a?(Node) ? other.embedding : other
|
|
92
114
|
return nil unless embedding.present? && query_embedding.present?
|
|
93
115
|
|
|
116
|
+
# Validate embedding is an array of finite numeric values
|
|
117
|
+
unless query_embedding.is_a?(Array) && query_embedding.all? { |v| v.is_a?(Numeric) && v.finite? }
|
|
118
|
+
return nil
|
|
119
|
+
end
|
|
120
|
+
|
|
94
121
|
# Calculate cosine similarity: 1 - (embedding <=> query_embedding)
|
|
95
|
-
#
|
|
96
|
-
vector_str = "[#{query_embedding.join(',')}]"
|
|
97
|
-
|
|
98
|
-
|
|
122
|
+
# Safely format the array as a PostgreSQL vector literal
|
|
123
|
+
vector_str = "[#{query_embedding.map { |v| v.to_f }.join(',')}]"
|
|
124
|
+
conn = self.class.connection
|
|
125
|
+
quoted_vector = conn.quote(vector_str)
|
|
126
|
+
quoted_id = conn.quote(id)
|
|
127
|
+
|
|
128
|
+
result = conn.select_value(
|
|
129
|
+
"SELECT 1 - (embedding <=> #{quoted_vector}::vector) FROM nodes WHERE id = #{quoted_id}"
|
|
99
130
|
)
|
|
100
131
|
result&.to_f
|
|
101
132
|
end
|
|
102
133
|
|
|
134
|
+
# Get all tag names associated with this node
|
|
135
|
+
#
|
|
136
|
+
# @return [Array<String>] Array of hierarchical tag names (e.g., ["database:postgresql", "ai:llm"])
|
|
137
|
+
#
|
|
103
138
|
def tag_names
|
|
104
139
|
tags.pluck(:name)
|
|
105
140
|
end
|
|
106
141
|
|
|
142
|
+
# Add tags to this node (creates tags if they don't exist)
|
|
143
|
+
#
|
|
144
|
+
# @param tag_names [Array<String>, String] Tag name(s) to add
|
|
145
|
+
# @return [void]
|
|
146
|
+
#
|
|
147
|
+
# @example Add a single tag
|
|
148
|
+
# node.add_tags("database:postgresql")
|
|
149
|
+
#
|
|
150
|
+
# @example Add multiple tags
|
|
151
|
+
# node.add_tags(["database:postgresql", "ai:embeddings"])
|
|
152
|
+
#
|
|
107
153
|
def add_tags(tag_names)
|
|
108
154
|
Array(tag_names).each do |tag_name|
|
|
109
155
|
tag = HTM::Models::Tag.find_or_create_by(name: tag_name)
|
|
@@ -111,6 +157,11 @@ class HTM
|
|
|
111
157
|
end
|
|
112
158
|
end
|
|
113
159
|
|
|
160
|
+
# Remove a tag from this node
|
|
161
|
+
#
|
|
162
|
+
# @param tag_name [String] Tag name to remove
|
|
163
|
+
# @return [void]
|
|
164
|
+
#
|
|
114
165
|
def remove_tag(tag_name)
|
|
115
166
|
tag = HTM::Models::Tag.find_by(name: tag_name)
|
|
116
167
|
return unless tag
|
|
@@ -118,6 +169,30 @@ class HTM
|
|
|
118
169
|
node_tags.where(tag_id: tag.id).destroy_all
|
|
119
170
|
end
|
|
120
171
|
|
|
172
|
+
# Soft delete - mark node as deleted without removing from database
|
|
173
|
+
#
|
|
174
|
+
# @return [Boolean] true if soft deleted successfully
|
|
175
|
+
#
|
|
176
|
+
def soft_delete!
|
|
177
|
+
update!(deleted_at: Time.current)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Restore a soft-deleted node
|
|
181
|
+
#
|
|
182
|
+
# @return [Boolean] true if restored successfully
|
|
183
|
+
#
|
|
184
|
+
def restore!
|
|
185
|
+
update!(deleted_at: nil)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Check if node is soft-deleted
|
|
189
|
+
#
|
|
190
|
+
# @return [Boolean] true if deleted_at is set
|
|
191
|
+
#
|
|
192
|
+
def deleted?
|
|
193
|
+
deleted_at.present?
|
|
194
|
+
end
|
|
195
|
+
|
|
121
196
|
private
|
|
122
197
|
|
|
123
198
|
def set_content_hash
|
data/lib/htm/models/robot.rb
CHANGED
|
@@ -27,15 +27,31 @@ class HTM
|
|
|
27
27
|
scope :by_name, ->(name) { where(name: name) }
|
|
28
28
|
|
|
29
29
|
# Class methods
|
|
30
|
+
|
|
31
|
+
# Find or create a robot by name
|
|
32
|
+
#
|
|
33
|
+
# @param robot_name [String] Name of the robot
|
|
34
|
+
# @return [Robot] The found or created robot
|
|
35
|
+
#
|
|
30
36
|
def self.find_or_create_by_name(robot_name)
|
|
31
37
|
find_or_create_by(name: robot_name)
|
|
32
38
|
end
|
|
33
39
|
|
|
34
40
|
# Instance methods
|
|
41
|
+
|
|
42
|
+
# Get the total number of nodes associated with this robot
|
|
43
|
+
#
|
|
44
|
+
# @return [Integer] Number of nodes
|
|
45
|
+
#
|
|
35
46
|
def node_count
|
|
36
47
|
nodes.count
|
|
37
48
|
end
|
|
38
49
|
|
|
50
|
+
# Get the most recent nodes for this robot
|
|
51
|
+
#
|
|
52
|
+
# @param limit [Integer] Maximum number of nodes to return (default: 10)
|
|
53
|
+
# @return [ActiveRecord::Relation] Recent nodes ordered by created_at desc
|
|
54
|
+
#
|
|
39
55
|
def recent_nodes(limit = 10)
|
|
40
56
|
nodes.recent.limit(limit)
|
|
41
57
|
end
|
|
@@ -60,10 +76,17 @@ class HTM
|
|
|
60
76
|
end
|
|
61
77
|
end
|
|
62
78
|
|
|
79
|
+
# Get a summary of this robot's memory state
|
|
80
|
+
#
|
|
81
|
+
# @return [Hash] Summary including:
|
|
82
|
+
# - :total_nodes [Integer] Total nodes associated with this robot
|
|
83
|
+
# - :in_working_memory [Integer] Nodes currently in working memory
|
|
84
|
+
# - :with_embeddings [Integer] Nodes that have embeddings generated
|
|
85
|
+
#
|
|
63
86
|
def memory_summary
|
|
64
87
|
{
|
|
65
88
|
total_nodes: nodes.count,
|
|
66
|
-
in_working_memory:
|
|
89
|
+
in_working_memory: robot_nodes.in_working_memory.count,
|
|
67
90
|
with_embeddings: nodes.with_embeddings.count
|
|
68
91
|
}
|
|
69
92
|
end
|
|
@@ -34,6 +34,7 @@ class HTM
|
|
|
34
34
|
scope :by_robot, ->(robot_id) { where(robot_id: robot_id) }
|
|
35
35
|
scope :by_node, ->(node_id) { where(node_id: node_id) }
|
|
36
36
|
scope :frequently_remembered, -> { where('remember_count > 1').order(remember_count: :desc) }
|
|
37
|
+
scope :in_working_memory, -> { where(working_memory: true) }
|
|
37
38
|
|
|
38
39
|
# Record that a robot remembered this content again
|
|
39
40
|
#
|
data/lib/htm/models/tag.rb
CHANGED
|
@@ -29,10 +29,28 @@ class HTM
|
|
|
29
29
|
scope :root_level, -> { where("name NOT LIKE '%:%'") }
|
|
30
30
|
|
|
31
31
|
# Class methods
|
|
32
|
+
|
|
33
|
+
# Find tags with a given prefix (hierarchical query)
|
|
34
|
+
#
|
|
35
|
+
# @param prefix [String] Tag prefix to match (e.g., "database" matches "database:postgresql")
|
|
36
|
+
# @return [ActiveRecord::Relation] Tags matching the prefix
|
|
37
|
+
#
|
|
38
|
+
# @example Find all database-related tags
|
|
39
|
+
# Tag.find_by_topic_prefix("database")
|
|
40
|
+
# # => [#<Tag name: "database:postgresql">, #<Tag name: "database:mysql">]
|
|
41
|
+
#
|
|
32
42
|
def self.find_by_topic_prefix(prefix)
|
|
33
43
|
where("name LIKE ?", "#{prefix}%")
|
|
34
44
|
end
|
|
35
45
|
|
|
46
|
+
# Get the most frequently used tags
|
|
47
|
+
#
|
|
48
|
+
# @param limit [Integer] Maximum number of tags to return (default: 10)
|
|
49
|
+
# @return [ActiveRecord::Relation] Tags with usage_count attribute
|
|
50
|
+
#
|
|
51
|
+
# @example Get top 5 most used tags
|
|
52
|
+
# Tag.popular_tags(5).each { |t| puts "#{t.name}: #{t.usage_count}" }
|
|
53
|
+
#
|
|
36
54
|
def self.popular_tags(limit = 10)
|
|
37
55
|
joins(:node_tags)
|
|
38
56
|
.select('tags.*, COUNT(node_tags.id) as usage_count')
|
|
@@ -41,13 +59,27 @@ class HTM
|
|
|
41
59
|
.limit(limit)
|
|
42
60
|
end
|
|
43
61
|
|
|
62
|
+
# Find or create a tag by name
|
|
63
|
+
#
|
|
64
|
+
# @param name [String] Hierarchical tag name (e.g., "database:postgresql")
|
|
65
|
+
# @return [Tag] The found or created tag
|
|
66
|
+
#
|
|
44
67
|
def self.find_or_create_by_name(name)
|
|
45
68
|
find_or_create_by(name: name)
|
|
46
69
|
end
|
|
47
70
|
|
|
48
71
|
# Returns a nested hash tree structure from the current scope
|
|
49
|
-
#
|
|
50
|
-
#
|
|
72
|
+
#
|
|
73
|
+
# @return [Hash] Nested hash representing the tag hierarchy
|
|
74
|
+
#
|
|
75
|
+
# @example Get all tags as a tree
|
|
76
|
+
# Tag.all.tree
|
|
77
|
+
# # => { "database" => { "postgresql" => {}, "mysql" => {} }, "ai" => { "llm" => {} } }
|
|
78
|
+
#
|
|
79
|
+
# @example Get filtered tags as a tree
|
|
80
|
+
# Tag.with_prefix("database").tree
|
|
81
|
+
# # => { "database" => { "postgresql" => {} } }
|
|
82
|
+
#
|
|
51
83
|
def self.tree
|
|
52
84
|
tree = {}
|
|
53
85
|
|
|
@@ -65,13 +97,79 @@ class HTM
|
|
|
65
97
|
end
|
|
66
98
|
|
|
67
99
|
# Returns a formatted string representation of the tag tree
|
|
100
|
+
#
|
|
68
101
|
# Uses directory-style formatting with ├── and └── characters
|
|
69
|
-
#
|
|
70
|
-
#
|
|
102
|
+
#
|
|
103
|
+
# @return [String] Formatted tree string
|
|
104
|
+
#
|
|
105
|
+
# @example Display all tags as a tree
|
|
106
|
+
# puts Tag.all.tree_string
|
|
107
|
+
# # ├── ai
|
|
108
|
+
# # │ └── llm
|
|
109
|
+
# # └── database
|
|
110
|
+
# # └── postgresql
|
|
111
|
+
#
|
|
71
112
|
def self.tree_string
|
|
72
113
|
format_tree_branch(tree)
|
|
73
114
|
end
|
|
74
115
|
|
|
116
|
+
# Returns a Mermaid flowchart representation of the tag tree
|
|
117
|
+
# Example: puts Tag.all.tree_mermaid
|
|
118
|
+
# Example: Tag.all.tree_mermaid(direction: 'LR') # Left to right
|
|
119
|
+
#
|
|
120
|
+
# @param direction [String] Flow direction: 'TD' (top-down), 'LR' (left-right), 'BT', 'RL'
|
|
121
|
+
# @return [String] Mermaid flowchart syntax
|
|
122
|
+
#
|
|
123
|
+
def self.tree_mermaid(direction: 'TD')
|
|
124
|
+
tree_data = tree
|
|
125
|
+
return "flowchart #{direction}\n empty[No tags]" if tree_data.empty?
|
|
126
|
+
|
|
127
|
+
lines = ["flowchart #{direction}"]
|
|
128
|
+
node_id = 0
|
|
129
|
+
node_ids = {}
|
|
130
|
+
|
|
131
|
+
# Generate Mermaid nodes and connections
|
|
132
|
+
generate_mermaid_nodes(tree_data, nil, lines, node_ids, node_id)
|
|
133
|
+
|
|
134
|
+
lines.join("\n")
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Returns an SVG representation of the tag tree
|
|
138
|
+
# Uses dark theme with transparent background
|
|
139
|
+
# Example: File.write('tags.svg', Tag.all.tree_svg)
|
|
140
|
+
#
|
|
141
|
+
# @param title [String] Optional title for the SVG
|
|
142
|
+
# @return [String] SVG markup
|
|
143
|
+
#
|
|
144
|
+
def self.tree_svg(title: 'HTM Tag Hierarchy')
|
|
145
|
+
tree_data = tree
|
|
146
|
+
return empty_tree_svg(title) if tree_data.empty?
|
|
147
|
+
|
|
148
|
+
# Calculate dimensions based on tree structure
|
|
149
|
+
stats = calculate_tree_stats(tree_data)
|
|
150
|
+
node_count = stats[:total_nodes]
|
|
151
|
+
max_depth = stats[:max_depth]
|
|
152
|
+
|
|
153
|
+
# Layout constants
|
|
154
|
+
node_width = 140
|
|
155
|
+
node_height = 30
|
|
156
|
+
h_spacing = 180
|
|
157
|
+
v_spacing = 50
|
|
158
|
+
padding = 40
|
|
159
|
+
|
|
160
|
+
# Calculate positions for all nodes
|
|
161
|
+
positions = {}
|
|
162
|
+
y_offset = [0] # Use array to allow mutation in closure
|
|
163
|
+
calculate_node_positions(tree_data, 0, positions, y_offset, h_spacing, v_spacing)
|
|
164
|
+
|
|
165
|
+
# Calculate SVG dimensions
|
|
166
|
+
width = (max_depth * h_spacing) + node_width + (padding * 2)
|
|
167
|
+
height = (y_offset[0] * v_spacing) + node_height + (padding * 2)
|
|
168
|
+
|
|
169
|
+
# Generate SVG
|
|
170
|
+
generate_tree_svg(tree_data, positions, width, height, padding, node_width, node_height, title)
|
|
171
|
+
end
|
|
172
|
+
|
|
75
173
|
# Format a tree branch recursively (internal helper)
|
|
76
174
|
def self.format_tree_branch(node, is_last_array = [])
|
|
77
175
|
result = ''
|
|
@@ -97,23 +195,175 @@ class HTM
|
|
|
97
195
|
result
|
|
98
196
|
end
|
|
99
197
|
|
|
198
|
+
# Generate Mermaid nodes recursively (internal helper)
|
|
199
|
+
def self.generate_mermaid_nodes(node, parent_path, lines, node_ids, counter)
|
|
200
|
+
node.keys.sort.each do |key|
|
|
201
|
+
current_path = parent_path ? "#{parent_path}:#{key}" : key
|
|
202
|
+
|
|
203
|
+
# Create unique node ID
|
|
204
|
+
node_id = "n#{counter}"
|
|
205
|
+
node_ids[current_path] = node_id
|
|
206
|
+
counter += 1
|
|
207
|
+
|
|
208
|
+
# Add node definition with styling
|
|
209
|
+
lines << " #{node_id}[\"#{key}\"]"
|
|
210
|
+
|
|
211
|
+
# Add connection from parent
|
|
212
|
+
if parent_path && node_ids[parent_path]
|
|
213
|
+
lines << " #{node_ids[parent_path]} --> #{node_id}"
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Recurse into children
|
|
217
|
+
children = node[key]
|
|
218
|
+
counter = generate_mermaid_nodes(children, current_path, lines, node_ids, counter) unless children.empty?
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
counter
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Calculate tree statistics (internal helper)
|
|
225
|
+
def self.calculate_tree_stats(node, depth = 0)
|
|
226
|
+
return { total_nodes: 0, max_depth: depth } if node.empty?
|
|
227
|
+
|
|
228
|
+
total = node.keys.size
|
|
229
|
+
max = depth + 1
|
|
230
|
+
|
|
231
|
+
node.each_value do |children|
|
|
232
|
+
child_stats = calculate_tree_stats(children, depth + 1)
|
|
233
|
+
total += child_stats[:total_nodes]
|
|
234
|
+
max = [max, child_stats[:max_depth]].max
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
{ total_nodes: total, max_depth: max }
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Calculate node positions for SVG layout (internal helper)
|
|
241
|
+
def self.calculate_node_positions(node, depth, positions, y_offset, h_spacing, v_spacing, parent_path = nil)
|
|
242
|
+
node.keys.sort.each do |key|
|
|
243
|
+
current_path = parent_path ? "#{parent_path}:#{key}" : key
|
|
244
|
+
|
|
245
|
+
positions[current_path] = {
|
|
246
|
+
x: depth,
|
|
247
|
+
y: y_offset[0],
|
|
248
|
+
label: key
|
|
249
|
+
}
|
|
250
|
+
y_offset[0] += 1
|
|
251
|
+
|
|
252
|
+
children = node[key]
|
|
253
|
+
calculate_node_positions(children, depth + 1, positions, y_offset, h_spacing, v_spacing, current_path) unless children.empty?
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Generate SVG for empty tree (internal helper)
|
|
258
|
+
def self.empty_tree_svg(title)
|
|
259
|
+
<<~SVG
|
|
260
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 300 100">
|
|
261
|
+
<rect width="100%" height="100%" fill="transparent"/>
|
|
262
|
+
<text x="150" y="30" text-anchor="middle" fill="#9CA3AF" font-family="system-ui, sans-serif" font-size="14" font-weight="bold">#{title}</text>
|
|
263
|
+
<text x="150" y="60" text-anchor="middle" fill="#6B7280" font-family="system-ui, sans-serif" font-size="12">No tags found</text>
|
|
264
|
+
</svg>
|
|
265
|
+
SVG
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Generate SVG tree visualization (internal helper)
|
|
269
|
+
def self.generate_tree_svg(tree_data, positions, width, height, padding, node_width, node_height, title)
|
|
270
|
+
# Color palette for different depths (dark theme)
|
|
271
|
+
colors = ['#3B82F6', '#8B5CF6', '#EC4899', '#F59E0B', '#10B981', '#6366F1']
|
|
272
|
+
|
|
273
|
+
svg_lines = []
|
|
274
|
+
svg_lines << %(<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 #{width} #{height + 40}">)
|
|
275
|
+
svg_lines << ' <rect width="100%" height="100%" fill="transparent"/>'
|
|
276
|
+
|
|
277
|
+
# Title
|
|
278
|
+
svg_lines << %Q( <text x="#{width / 2}" y="25" text-anchor="middle" fill="#F3F4F6" font-family="system-ui, sans-serif" font-size="16" font-weight="bold">#{title}</text>)
|
|
279
|
+
|
|
280
|
+
# Draw connections first (so they appear behind nodes)
|
|
281
|
+
positions.each do |path, pos|
|
|
282
|
+
parent_path = path.include?(':') ? path.split(':')[0..-2].join(':') : nil
|
|
283
|
+
next unless parent_path && positions[parent_path]
|
|
284
|
+
|
|
285
|
+
parent_pos = positions[parent_path]
|
|
286
|
+
x1 = padding + (parent_pos[:x] * (node_width + 40)) + node_width
|
|
287
|
+
y1 = 40 + padding + (parent_pos[:y] * (node_height + 20)) + (node_height / 2)
|
|
288
|
+
x2 = padding + (pos[:x] * (node_width + 40))
|
|
289
|
+
y2 = 40 + padding + (pos[:y] * (node_height + 20)) + (node_height / 2)
|
|
290
|
+
|
|
291
|
+
# Curved connection line
|
|
292
|
+
mid_x = (x1 + x2) / 2
|
|
293
|
+
svg_lines << %Q( <path d="M#{x1},#{y1} C#{mid_x},#{y1} #{mid_x},#{y2} #{x2},#{y2}" stroke="#4B5563" stroke-width="2" fill="none"/>)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Draw nodes
|
|
297
|
+
positions.each do |path, pos|
|
|
298
|
+
depth = path.count(':')
|
|
299
|
+
color = colors[depth % colors.size]
|
|
300
|
+
|
|
301
|
+
x = padding + (pos[:x] * (node_width + 40))
|
|
302
|
+
y = 40 + padding + (pos[:y] * (node_height + 20))
|
|
303
|
+
|
|
304
|
+
# Node rectangle with rounded corners
|
|
305
|
+
svg_lines << %Q( <rect x="#{x}" y="#{y}" width="#{node_width}" height="#{node_height}" rx="6" fill="#{color}" opacity="0.9"/>)
|
|
306
|
+
|
|
307
|
+
# Node label
|
|
308
|
+
text_x = x + (node_width / 2)
|
|
309
|
+
text_y = y + (node_height / 2) + 4
|
|
310
|
+
svg_lines << %Q( <text x="#{text_x}" y="#{text_y}" text-anchor="middle" fill="#FFFFFF" font-family="system-ui, sans-serif" font-size="11" font-weight="500">#{pos[:label]}</text>)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
svg_lines << '</svg>'
|
|
314
|
+
svg_lines.join("\n")
|
|
315
|
+
end
|
|
316
|
+
|
|
100
317
|
# Instance methods
|
|
318
|
+
|
|
319
|
+
# Get the root (top-level) topic of this tag
|
|
320
|
+
#
|
|
321
|
+
# @return [String] The first segment of the hierarchical tag
|
|
322
|
+
#
|
|
323
|
+
# @example
|
|
324
|
+
# tag = Tag.find_by(name: "database:postgresql:extensions")
|
|
325
|
+
# tag.root_topic # => "database"
|
|
326
|
+
#
|
|
101
327
|
def root_topic
|
|
102
328
|
name.split(':').first
|
|
103
329
|
end
|
|
104
330
|
|
|
331
|
+
# Get all hierarchy levels of this tag
|
|
332
|
+
#
|
|
333
|
+
# @return [Array<String>] Array of topic segments
|
|
334
|
+
#
|
|
335
|
+
# @example
|
|
336
|
+
# tag = Tag.find_by(name: "database:postgresql:extensions")
|
|
337
|
+
# tag.topic_levels # => ["database", "postgresql", "extensions"]
|
|
338
|
+
#
|
|
105
339
|
def topic_levels
|
|
106
340
|
name.split(':')
|
|
107
341
|
end
|
|
108
342
|
|
|
343
|
+
# Get the depth (number of levels) of this tag
|
|
344
|
+
#
|
|
345
|
+
# @return [Integer] Number of hierarchy levels
|
|
346
|
+
#
|
|
347
|
+
# @example
|
|
348
|
+
# Tag.find_by(name: "database").depth # => 1
|
|
349
|
+
# Tag.find_by(name: "database:postgresql").depth # => 2
|
|
350
|
+
#
|
|
109
351
|
def depth
|
|
110
352
|
topic_levels.length
|
|
111
353
|
end
|
|
112
354
|
|
|
355
|
+
# Check if this tag is hierarchical (has child levels)
|
|
356
|
+
#
|
|
357
|
+
# @return [Boolean] True if tag contains colons (hierarchy separators)
|
|
358
|
+
#
|
|
113
359
|
def hierarchical?
|
|
114
360
|
name.include?(':')
|
|
115
361
|
end
|
|
116
362
|
|
|
363
|
+
# Get the number of nodes using this tag
|
|
364
|
+
#
|
|
365
|
+
# @return [Integer] Count of nodes with this tag
|
|
366
|
+
#
|
|
117
367
|
def usage_count
|
|
118
368
|
node_tags.count
|
|
119
369
|
end
|