htm 0.0.18 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +119 -1
  3. data/README.md +12 -0
  4. data/Rakefile +104 -18
  5. data/db/migrate/00001_enable_extensions.rb +9 -5
  6. data/db/migrate/00002_create_robots.rb +18 -6
  7. data/db/migrate/00003_create_file_sources.rb +30 -17
  8. data/db/migrate/00004_create_nodes.rb +60 -48
  9. data/db/migrate/00005_create_tags.rb +24 -12
  10. data/db/migrate/00006_create_node_tags.rb +28 -13
  11. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  12. data/db/schema.sql +17 -1
  13. data/db/seeds.rb +34 -34
  14. data/docs/api/embedding-service.md +140 -110
  15. data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
  16. data/docs/api/yard/HTM/Config.md +173 -0
  17. data/docs/api/yard/HTM/ConfigSection.md +28 -0
  18. data/docs/api/yard/HTM/Database.md +1 -1
  19. data/docs/api/yard/HTM/Railtie.md +2 -2
  20. data/docs/api/yard/HTM.md +0 -57
  21. data/docs/api/yard/index.csv +76 -61
  22. data/docs/api/yard-reference.md +2 -1
  23. data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
  24. data/docs/architecture/adrs/004-hive-mind.md +1 -1
  25. data/docs/architecture/adrs/008-robot-identification.md +1 -1
  26. data/docs/architecture/index.md +11 -9
  27. data/docs/architecture/overview.md +11 -7
  28. data/docs/assets/images/balanced-strategy-decay.svg +41 -0
  29. data/docs/assets/images/class-hierarchy.svg +1 -1
  30. data/docs/assets/images/eviction-priority.svg +43 -0
  31. data/docs/assets/images/exception-hierarchy.svg +2 -2
  32. data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
  33. data/docs/assets/images/htm-architecture-overview.svg +3 -3
  34. data/docs/assets/images/htm-core-components.svg +4 -4
  35. data/docs/assets/images/htm-layered-architecture.svg +1 -1
  36. data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
  37. data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
  38. data/docs/assets/images/memory-topology.svg +53 -0
  39. data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
  40. data/docs/database/naming-convention.md +244 -0
  41. data/docs/database_rake_tasks.md +31 -0
  42. data/docs/development/rake-tasks.md +80 -35
  43. data/docs/development/setup.md +76 -44
  44. data/docs/examples/basic-usage.md +133 -0
  45. data/docs/examples/config-files.md +170 -0
  46. data/docs/examples/file-loading.md +208 -0
  47. data/docs/examples/index.md +116 -0
  48. data/docs/examples/llm-configuration.md +168 -0
  49. data/docs/examples/mcp-client.md +172 -0
  50. data/docs/examples/rails-integration.md +173 -0
  51. data/docs/examples/robot-groups.md +210 -0
  52. data/docs/examples/sinatra-integration.md +218 -0
  53. data/docs/examples/standalone-app.md +216 -0
  54. data/docs/examples/telemetry.md +224 -0
  55. data/docs/examples/timeframes.md +143 -0
  56. data/docs/getting-started/installation.md +97 -40
  57. data/docs/getting-started/quick-start.md +28 -11
  58. data/docs/guides/configuration.md +515 -0
  59. data/docs/guides/file-loading.md +322 -0
  60. data/docs/guides/getting-started.md +40 -9
  61. data/docs/guides/index.md +3 -3
  62. data/docs/guides/mcp-server.md +100 -13
  63. data/docs/guides/propositions.md +264 -0
  64. data/docs/guides/recalling-memories.md +4 -4
  65. data/docs/guides/search-strategies.md +3 -3
  66. data/docs/guides/tags.md +318 -0
  67. data/docs/guides/telemetry.md +229 -0
  68. data/docs/index.md +8 -16
  69. data/docs/{architecture → robots}/hive-mind.md +8 -111
  70. data/docs/robots/index.md +73 -0
  71. data/docs/{guides → robots}/multi-robot.md +3 -3
  72. data/docs/{guides → robots}/robot-groups.md +8 -7
  73. data/docs/{architecture → robots}/two-tier-memory.md +13 -149
  74. data/docs/robots/why-robots.md +85 -0
  75. data/examples/.envrc +6 -0
  76. data/examples/.gitignore +2 -0
  77. data/examples/00_create_examples_db.rb +94 -0
  78. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  79. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  80. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  81. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  82. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  83. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  84. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  85. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  86. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  87. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  88. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  89. data/examples/11_robot_groups/README.md +335 -0
  90. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  91. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  92. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  93. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  94. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  95. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  96. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  97. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  98. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  99. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  100. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  101. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  102. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  103. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  104. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  105. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  106. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  107. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  108. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  109. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  110. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  111. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  112. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  113. data/examples/README.md +230 -211
  114. data/examples/examples_helper.rb +138 -0
  115. data/lib/htm/config/builder.rb +167 -0
  116. data/lib/htm/config/database.rb +317 -0
  117. data/lib/htm/config/defaults.yml +41 -13
  118. data/lib/htm/config/section.rb +74 -0
  119. data/lib/htm/config/validator.rb +83 -0
  120. data/lib/htm/config.rb +65 -361
  121. data/lib/htm/database.rb +85 -127
  122. data/lib/htm/errors.rb +14 -0
  123. data/lib/htm/integrations/sinatra.rb +13 -44
  124. data/lib/htm/job_adapter.rb +75 -1
  125. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  126. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  127. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  128. data/lib/htm/loaders/defaults_loader.rb +23 -0
  129. data/lib/htm/loaders/markdown_loader.rb +17 -15
  130. data/lib/htm/loaders/xdg_config_loader.rb +9 -9
  131. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  132. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  133. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  134. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  135. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  136. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  137. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  138. data/lib/htm/long_term_memory.rb +13 -13
  139. data/lib/htm/mcp/cli.rb +115 -8
  140. data/lib/htm/mcp/resources.rb +4 -3
  141. data/lib/htm/mcp/server.rb +5 -4
  142. data/lib/htm/mcp/tools.rb +37 -28
  143. data/lib/htm/migration.rb +72 -0
  144. data/lib/htm/models/file_source.rb +52 -31
  145. data/lib/htm/models/node.rb +224 -108
  146. data/lib/htm/models/node_tag.rb +49 -28
  147. data/lib/htm/models/robot.rb +38 -27
  148. data/lib/htm/models/robot_node.rb +63 -35
  149. data/lib/htm/models/tag.rb +126 -123
  150. data/lib/htm/observability.rb +45 -41
  151. data/lib/htm/proposition_service.rb +76 -7
  152. data/lib/htm/railtie.rb +2 -2
  153. data/lib/htm/robot_group.rb +30 -18
  154. data/lib/htm/sequel_config.rb +215 -0
  155. data/lib/htm/sql_builder.rb +14 -16
  156. data/lib/htm/tag_service.rb +78 -0
  157. data/lib/htm/tasks.rb +3 -0
  158. data/lib/htm/version.rb +1 -1
  159. data/lib/htm/workflows/remember_workflow.rb +213 -0
  160. data/lib/htm.rb +27 -22
  161. data/lib/tasks/db.rake +0 -2
  162. data/lib/tasks/doc.rake +2 -2
  163. data/lib/tasks/files.rake +11 -18
  164. data/lib/tasks/htm.rake +190 -62
  165. data/lib/tasks/jobs.rake +179 -54
  166. data/lib/tasks/tags.rake +8 -13
  167. data/mkdocs.yml +33 -8
  168. data/scripts/backfill_parent_tags.rb +376 -0
  169. data/scripts/normalize_plural_tags.rb +335 -0
  170. metadata +168 -86
  171. data/docs/api/yard/HTM/Configuration.md +0 -240
  172. data/docs/telemetry.md +0 -391
  173. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  174. data/examples/sinatra_app/Gemfile.lock +0 -166
  175. data/lib/htm/active_record_config.rb +0 -104
  176. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  177. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  178. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  179. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  180. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  181. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  182. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  183. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  184. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  185. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  186. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  187. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  188. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  189. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  190. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  191. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  192. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  193. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  194. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  195. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  196. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  197. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  198. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  199. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  200. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  201. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  202. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  203. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  204. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  205. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  206. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  207. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  208. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  209. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  210. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  211. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  212. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  213. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  214. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  215. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  216. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'async'
4
+ require 'async/barrier'
5
+
3
6
  class HTM
4
7
  # Job adapter for pluggable background job backends
5
8
  #
@@ -11,6 +14,7 @@ class HTM
11
14
  # - :sidekiq - Direct Sidekiq integration (recommended for Sinatra apps)
12
15
  # - :inline - Synchronous execution (recommended for CLI and tests)
13
16
  # - :thread - Background thread (legacy, for standalone apps)
17
+ # - :fiber - Fiber-based concurrency using async gem (recommended for I/O-bound jobs)
14
18
  #
15
19
  # @example Configure job backend
16
20
  # HTM.configure do |config|
@@ -44,8 +48,10 @@ class HTM
44
48
  enqueue_inline(job_class, **params)
45
49
  when :thread
46
50
  enqueue_thread(job_class, **params)
51
+ when :fiber
52
+ enqueue_fiber(job_class, **params)
47
53
  else
48
- raise HTM::Error, "Unknown job backend: #{backend}. Supported backends: :active_job, :sidekiq, :inline, :thread"
54
+ raise HTM::Error, "Unknown job backend: #{backend}. Supported backends: :active_job, :sidekiq, :inline, :thread, :fiber"
49
55
  end
50
56
  end
51
57
 
@@ -98,6 +104,74 @@ class HTM
98
104
  HTM.logger.error "Failed to start thread for #{job_class.name}: #{e.message}"
99
105
  end
100
106
 
107
+ # Execute job using async gem (fiber-based concurrency)
108
+ # Non-blocking for I/O-bound operations like LLM API calls
109
+ def enqueue_fiber(job_class, **params)
110
+ Async do
111
+ begin
112
+ job_class.perform(**params)
113
+ rescue StandardError => e
114
+ HTM.logger.error "Fiber job #{job_class.name} failed: #{e.class.name} - #{e.message}"
115
+ end
116
+ end
117
+ rescue StandardError => e
118
+ HTM.logger.error "Failed to start fiber for #{job_class.name}: #{e.message}"
119
+ end
120
+
121
+ public
122
+
123
+ # Execute multiple jobs in parallel using fibers
124
+ # Best for I/O-bound jobs like LLM API calls
125
+ #
126
+ # @param jobs [Array<Array>] Array of [job_class, params] pairs
127
+ # @return [void]
128
+ #
129
+ # @example Run embedding and tags jobs in parallel
130
+ # JobAdapter.enqueue_parallel([
131
+ # [GenerateEmbeddingJob, { node_id: 123 }],
132
+ # [GenerateTagsJob, { node_id: 123 }]
133
+ # ])
134
+ #
135
+ def enqueue_parallel(jobs)
136
+ return if jobs.empty?
137
+
138
+ backend = HTM.configuration.job_backend
139
+
140
+ case backend
141
+ when :fiber
142
+ enqueue_parallel_fiber(jobs)
143
+ when :inline
144
+ # Run sequentially for inline backend
145
+ jobs.each { |job_class, params| enqueue_inline(job_class, **params) }
146
+ else
147
+ # For other backends, enqueue each job separately
148
+ jobs.each { |job_class, params| enqueue(job_class, **params) }
149
+ end
150
+ end
151
+
152
+ private
153
+
154
+ # Execute multiple jobs in parallel using async fibers
155
+ def enqueue_parallel_fiber(jobs)
156
+ Async do |task|
157
+ barrier = Async::Barrier.new
158
+
159
+ jobs.each do |job_class, params|
160
+ barrier.async do
161
+ begin
162
+ job_class.perform(**params)
163
+ rescue StandardError => e
164
+ HTM.logger.error "Parallel fiber job #{job_class.name} failed: #{e.class.name} - #{e.message}"
165
+ end
166
+ end
167
+ end
168
+
169
+ barrier.wait
170
+ end
171
+ rescue StandardError => e
172
+ HTM.logger.error "Failed to start parallel fibers: #{e.message}"
173
+ end
174
+
101
175
  # Convert HTM job class to ActiveJob class
102
176
  def to_active_job_class(job_class)
103
177
  # If it's already an ActiveJob, return it
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../errors'
4
- require_relative '../models/node'
5
4
  require_relative '../embedding_service'
6
5
 
7
6
  class HTM
@@ -23,7 +22,7 @@ class HTM
23
22
  # @param node_id [Integer] ID of the node to process
24
23
  #
25
24
  def self.perform(node_id:)
26
- node = HTM::Models::Node.find_by(id: node_id)
25
+ node = HTM::Models::Node.first(id: node_id)
27
26
 
28
27
  unless node
29
28
  HTM.logger.warn "GenerateEmbeddingJob: Node #{node_id} not found"
@@ -31,7 +30,7 @@ class HTM
31
30
  end
32
31
 
33
32
  # Skip if already has embedding
34
- return if node.embedding.present?
33
+ return if node.embedding
35
34
 
36
35
  provider = HTM.configuration.embedding_provider.to_s
37
36
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
@@ -41,7 +40,7 @@ class HTM
41
40
  result = HTM::EmbeddingService.generate(node.content)
42
41
 
43
42
  # Update node with processed embedding
44
- node.update!(embedding: result[:storage_embedding])
43
+ node.update(embedding: result[:storage_embedding])
45
44
 
46
45
  # Record success metrics
47
46
  elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../errors'
4
- require_relative '../models/node'
5
4
  require_relative '../proposition_service'
6
5
 
7
6
  class HTM
@@ -25,7 +24,7 @@ class HTM
25
24
  # @param robot_id [Integer] ID of the robot that owns this node
26
25
  #
27
26
  def self.perform(node_id:, robot_id:)
28
- node = HTM::Models::Node.find_by(id: node_id)
27
+ node = HTM::Models::Node.first(id: node_id)
29
28
 
30
29
  unless node
31
30
  HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found"
@@ -49,14 +48,14 @@ class HTM
49
48
  token_count = HTM.count_tokens(proposition_text)
50
49
 
51
50
  # Create proposition node with is_proposition marker
52
- proposition_node = HTM::Models::Node.create!(
51
+ proposition_node = HTM::Models::Node.create(
53
52
  content: proposition_text,
54
53
  token_count: token_count,
55
54
  metadata: { is_proposition: true, source_node_id: node_id }
56
55
  )
57
56
 
58
57
  # Link to robot via RobotNode
59
- HTM::Models::RobotNode.find_or_create_by!(
58
+ HTM::Models::RobotNode.find_or_create(
60
59
  robot_id: robot_id,
61
60
  node_id: proposition_node.id
62
61
  )
@@ -79,7 +78,7 @@ class HTM
79
78
  # Log proposition-specific errors
80
79
  HTM.logger.error "GeneratePropositionsJob: Proposition extraction failed for node #{node_id}: #{e.message}"
81
80
 
82
- rescue ActiveRecord::RecordInvalid => e
81
+ rescue Sequel::ValidationFailed => e
83
82
  # Log validation errors
84
83
  HTM.logger.error "GeneratePropositionsJob: Database validation failed for node #{node_id}: #{e.message}"
85
84
 
@@ -1,9 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../errors'
4
- require_relative '../models/node'
5
- require_relative '../models/tag'
6
- require_relative '../models/node_tag'
7
4
  require_relative '../tag_service'
8
5
 
9
6
  class HTM
@@ -26,7 +23,7 @@ class HTM
26
23
  # @param node_id [Integer] ID of the node to process
27
24
  #
28
25
  def self.perform(node_id:)
29
- node = HTM::Models::Node.find_by(id: node_id)
26
+ node = HTM::Models::Node.first(id: node_id)
30
27
 
31
28
  unless node
32
29
  HTM.logger.warn "GenerateTagsJob: Node #{node_id} not found"
@@ -39,23 +36,27 @@ class HTM
39
36
  begin
40
37
  # Get existing ontology for context (sample of recent tags)
41
38
  existing_ontology = HTM::Models::Tag
42
- .order(created_at: :desc)
39
+ .order(Sequel.desc(:created_at))
43
40
  .limit(100)
44
- .pluck(:name)
41
+ .select_map(:name)
45
42
 
46
43
  # Extract and validate tags using TagService
47
44
  tag_names = HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
48
45
  return if tag_names.empty?
49
46
 
50
- # Create or find tags and associate with node
47
+ # Create or find tags (including all parent tags) and associate with node
48
+ # For "database:postgresql:extensions", this creates and associates:
49
+ # - "database"
50
+ # - "database:postgresql"
51
+ # - "database:postgresql:extensions"
51
52
  tag_names.each do |tag_name|
52
- tag = HTM::Models::Tag.find_or_create_by!(name: tag_name)
53
-
54
- # Create association if it doesn't exist
55
- HTM::Models::NodeTag.find_or_create_by!(
56
- node_id: node.id,
57
- tag_id: tag.id
58
- )
53
+ HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
54
+ # Create association if it doesn't exist
55
+ HTM::Models::NodeTag.find_or_create(
56
+ node_id: node.id,
57
+ tag_id: tag.id
58
+ )
59
+ end
59
60
  end
60
61
 
61
62
  # Record success metrics
@@ -79,7 +80,7 @@ class HTM
79
80
  # Log tag-specific errors
80
81
  HTM.logger.error "GenerateTagsJob: Tag generation failed for node #{node_id}: #{e.message}"
81
82
 
82
- rescue ActiveRecord::RecordInvalid => e
83
+ rescue Sequel::ValidationFailed => e
83
84
  # Record failure metrics
84
85
  elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
85
86
  HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
@@ -79,6 +79,29 @@ class HTM
79
79
  raw = load_raw_yaml
80
80
  raw[:defaults] || {}
81
81
  end
82
+
83
+ # Returns valid environment names from the config file
84
+ #
85
+ # Valid environments are top-level keys in defaults.yml excluding 'defaults'.
86
+ # For example, if defaults.yml has keys: defaults, development, test, production
87
+ # this returns [:development, :test, :production]
88
+ #
89
+ # @return [Array<Symbol>] list of valid environment names
90
+ def valid_environments
91
+ raw = load_raw_yaml
92
+ raw.keys.reject { |k| k == :defaults }.sort
93
+ end
94
+
95
+ # Check if a given environment name is valid
96
+ #
97
+ # @param env [String, Symbol] environment name to check
98
+ # @return [Boolean] true if environment is valid
99
+ def valid_environment?(env)
100
+ return false if env.nil? || env.to_s.empty?
101
+ return false if env.to_s == 'defaults'
102
+
103
+ valid_environments.include?(env.to_sym)
104
+ end
82
105
  end
83
106
 
84
107
  def call(name:, **_options)
@@ -78,10 +78,12 @@ class HTM
78
78
  file_hash = Digest::SHA256.hexdigest(content)
79
79
 
80
80
  # Find or create source record
81
- source = HTM::Models::FileSource.find_or_initialize_by(file_path: expanded_path)
81
+ source = HTM::Models::FileSource.first(file_path: expanded_path)
82
+ is_new = source.nil?
83
+ source ||= HTM::Models::FileSource.new(file_path: expanded_path)
82
84
 
83
85
  # Check if sync needed
84
- unless force || source.new_record? || source.needs_sync?(stat.mtime)
86
+ unless force || is_new || source.needs_sync?(stat.mtime)
85
87
  return {
86
88
  file_path: expanded_path,
87
89
  chunks_created: 0,
@@ -104,18 +106,18 @@ class HTM
104
106
  end
105
107
 
106
108
  # Save source first (need ID for node association)
107
- source.save! if source.new_record?
109
+ source.save if is_new
108
110
 
109
111
  # Sync chunks to database (chunks now include cursor positions)
110
112
  result = sync_chunks(source, chunks)
111
113
 
112
114
  # Update source record
113
- source.update!(
115
+ source.update(
114
116
  file_hash: file_hash,
115
117
  mtime: stat.mtime,
116
118
  file_size: stat.size,
117
119
  frontmatter: frontmatter,
118
- last_synced_at: Time.current
120
+ last_synced_at: Time.now
119
121
  )
120
122
 
121
123
  result.merge(
@@ -195,9 +197,9 @@ class HTM
195
197
  deleted = 0
196
198
 
197
199
  # Get existing nodes for this source (include soft-deleted for potential restore)
198
- existing_nodes = source.persisted? ?
199
- HTM::Models::Node.unscoped.where(source_id: source.id).to_a : []
200
- existing_by_hash = existing_nodes.index_by(&:content_hash)
200
+ existing_nodes = source.id ?
201
+ HTM::Models::Node.with_deleted.where(source_id: source.id).all : []
202
+ existing_by_hash = existing_nodes.each_with_object({}) { |n, h| h[n.content_hash] = n }
201
203
 
202
204
  # Track which existing nodes we've matched
203
205
  matched_hashes = Set.new
@@ -217,7 +219,7 @@ class HTM
217
219
 
218
220
  changes = {}
219
221
  changes[:chunk_position] = position if node.chunk_position != position
220
- changes[:deleted_at] = nil if node.deleted_at.present?
222
+ changes[:deleted_at] = nil if node.deleted_at
221
223
 
222
224
  # Update cursor in metadata if changed
223
225
  current_cursor = node.metadata&.dig('cursor')
@@ -227,7 +229,7 @@ class HTM
227
229
  end
228
230
 
229
231
  if changes.any?
230
- node.update!(changes)
232
+ node.update(changes)
231
233
  updated += 1
232
234
  end
233
235
  else
@@ -240,7 +242,7 @@ class HTM
240
242
  # Soft-delete chunks that no longer exist in file
241
243
  existing_by_hash.each do |hash, node|
242
244
  next if matched_hashes.include?(hash)
243
- next if node.deleted_at.present? # Already deleted
245
+ next if node.deleted_at # Already deleted
244
246
 
245
247
  node.soft_delete!
246
248
  deleted += 1
@@ -265,18 +267,18 @@ class HTM
265
267
  node_id = @htm.remember(content, metadata: chunk_metadata)
266
268
 
267
269
  # Update with source reference
268
- node = HTM::Models::Node.find(node_id)
269
- node.update!(source_id: source.id, chunk_position: position)
270
+ node = HTM::Models::Node[node_id]
271
+ node.update(source_id: source.id, chunk_position: position)
270
272
 
271
273
  node
272
- rescue ActiveRecord::RecordNotUnique
274
+ rescue Sequel::UniqueConstraintViolation
273
275
  # Duplicate content exists (different source or no source)
274
276
  # Find and link to this source
275
277
  existing = HTM::Models::Node.find_by_content(content)
276
278
  if existing && existing.source_id.nil?
277
279
  # Merge cursor into existing metadata
278
280
  new_metadata = (existing.metadata || {}).merge('cursor' => cursor) if cursor
279
- existing.update!(
281
+ existing.update(
280
282
  source_id: source.id,
281
283
  chunk_position: position,
282
284
  metadata: new_metadata || existing.metadata
@@ -28,24 +28,24 @@ class HTM
28
28
  class << self
29
29
  # Returns all XDG config paths to check, in order of priority (lowest first)
30
30
  #
31
+ # Per XDG spec: If $XDG_CONFIG_HOME is set, use it; otherwise use ~/.config
32
+ #
31
33
  # @return [Array<String>] list of potential config file paths
32
34
  def config_paths
33
35
  paths = []
34
36
 
35
- # macOS Application Support (lowest priority for XDG loader)
36
- if macos?
37
+ # macOS Application Support (lowest priority, only when XDG_CONFIG_HOME is not set)
38
+ if macos? && (!ENV['XDG_CONFIG_HOME'] || ENV['XDG_CONFIG_HOME'].empty?)
37
39
  macos_path = File.expand_path('~/Library/Application Support/htm')
38
40
  paths << macos_path if Dir.exist?(File.dirname(macos_path))
39
41
  end
40
42
 
41
- # XDG default: ~/.config/htm
42
- xdg_default = File.expand_path('~/.config/htm')
43
- paths << xdg_default
44
-
45
- # XDG_CONFIG_HOME override (highest priority for XDG loader)
43
+ # XDG_CONFIG_HOME takes precedence over default
46
44
  if ENV['XDG_CONFIG_HOME'] && !ENV['XDG_CONFIG_HOME'].empty?
47
- xdg_home = File.join(ENV['XDG_CONFIG_HOME'], 'htm')
48
- paths << xdg_home unless xdg_home == xdg_default
45
+ paths << File.join(ENV['XDG_CONFIG_HOME'], 'htm')
46
+ else
47
+ # XDG default: ~/.config/htm (only when XDG_CONFIG_HOME is not set)
48
+ paths << File.expand_path('~/.config/htm')
49
49
  end
50
50
 
51
51
  paths
@@ -79,6 +79,7 @@ class HTM
79
79
 
80
80
  # Combined tsvector + trigram search
81
81
  # tsvector matches get boosted score, trigram provides fuzzy fallback
82
+ # Note: Using ? placeholders for Sequel compatibility
82
83
  sql = <<~SQL
83
84
  WITH tsvector_matches AS (
84
85
  -- Primary: tsvector full-text search (stemmed word matching)
@@ -114,24 +115,23 @@ class HTM
114
115
  LIMIT ?
115
116
  SQL
116
117
 
117
- result = ActiveRecord::Base.connection.select_all(
118
- ActiveRecord::Base.sanitize_sql_array([
119
- sql,
120
- TSVECTOR_SCORE_BOOST, # boost for tsvector
121
- query, # ts_rank query
122
- query, # tsvector match query
123
- query, # trigram similarity query
124
- query, # trigram match query
125
- TRIGRAM_SIMILARITY_THRESHOLD,
126
- limit
127
- ])
128
- )
118
+ result = HTM.db.fetch(
119
+ sql,
120
+ TSVECTOR_SCORE_BOOST, # boost for tsvector
121
+ query, # query for ts_rank
122
+ query, # query for plainto_tsquery
123
+ query, # query for similarity (trigram)
124
+ query, # query for similarity condition
125
+ TRIGRAM_SIMILARITY_THRESHOLD, # similarity threshold
126
+ limit # limit
127
+ ).all
129
128
 
130
129
  # Track access for retrieved nodes
131
- node_ids = result.map { |r| r['id'] }
130
+ node_ids = result.map { |r| r[:id] }
132
131
  track_access(node_ids)
133
132
 
134
- result.to_a
133
+ # Convert to hash with string keys for compatibility
134
+ result.map { |r| r.transform_keys(&:to_s) }
135
135
  end
136
136
  end
137
137
  end