htm 0.0.20 → 0.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -0
  3. data/Rakefile +104 -18
  4. data/db/migrate/00001_enable_extensions.rb +9 -5
  5. data/db/migrate/00002_create_robots.rb +18 -6
  6. data/db/migrate/00003_create_file_sources.rb +30 -17
  7. data/db/migrate/00004_create_nodes.rb +60 -48
  8. data/db/migrate/00005_create_tags.rb +24 -12
  9. data/db/migrate/00006_create_node_tags.rb +28 -13
  10. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  11. data/db/schema.sql +17 -1
  12. data/db/seeds.rb +33 -33
  13. data/docs/database/naming-convention.md +244 -0
  14. data/docs/database_rake_tasks.md +31 -0
  15. data/docs/development/rake-tasks.md +80 -35
  16. data/docs/guides/mcp-server.md +70 -1
  17. data/examples/.envrc +6 -0
  18. data/examples/.gitignore +2 -0
  19. data/examples/00_create_examples_db.rb +94 -0
  20. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  21. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  22. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  23. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  24. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  25. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  26. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  27. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  28. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  29. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  30. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  31. data/examples/11_robot_groups/README.md +335 -0
  32. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  33. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  34. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  35. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  36. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  37. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  38. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  39. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  40. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  41. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  42. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  43. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  44. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  45. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  46. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  47. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  48. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  49. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  50. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  51. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  52. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  53. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  54. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  55. data/examples/README.md +230 -211
  56. data/examples/examples_helper.rb +138 -0
  57. data/lib/htm/config/builder.rb +167 -0
  58. data/lib/htm/config/database.rb +317 -0
  59. data/lib/htm/config/defaults.yml +62 -22
  60. data/lib/htm/config/validator.rb +83 -0
  61. data/lib/htm/config.rb +75 -462
  62. data/lib/htm/database.rb +85 -127
  63. data/lib/htm/errors.rb +14 -0
  64. data/lib/htm/integrations/sinatra.rb +13 -44
  65. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  66. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  67. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  68. data/lib/htm/loaders/markdown_loader.rb +17 -15
  69. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  70. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  71. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  72. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  73. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  74. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  75. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  76. data/lib/htm/long_term_memory.rb +13 -13
  77. data/lib/htm/mcp/cli.rb +115 -8
  78. data/lib/htm/mcp/resources.rb +4 -3
  79. data/lib/htm/mcp/server.rb +5 -4
  80. data/lib/htm/mcp/tools.rb +37 -28
  81. data/lib/htm/migration.rb +72 -0
  82. data/lib/htm/models/file_source.rb +52 -31
  83. data/lib/htm/models/node.rb +224 -108
  84. data/lib/htm/models/node_tag.rb +49 -28
  85. data/lib/htm/models/robot.rb +38 -27
  86. data/lib/htm/models/robot_node.rb +63 -35
  87. data/lib/htm/models/tag.rb +126 -123
  88. data/lib/htm/observability.rb +45 -41
  89. data/lib/htm/proposition_service.rb +76 -7
  90. data/lib/htm/railtie.rb +2 -2
  91. data/lib/htm/robot_group.rb +30 -18
  92. data/lib/htm/sequel_config.rb +215 -0
  93. data/lib/htm/sql_builder.rb +14 -16
  94. data/lib/htm/tag_service.rb +78 -0
  95. data/lib/htm/tasks.rb +3 -0
  96. data/lib/htm/version.rb +1 -1
  97. data/lib/htm/workflows/remember_workflow.rb +6 -5
  98. data/lib/htm.rb +26 -22
  99. data/lib/tasks/db.rake +0 -2
  100. data/lib/tasks/doc.rake +2 -2
  101. data/lib/tasks/files.rake +11 -18
  102. data/lib/tasks/htm.rake +190 -62
  103. data/lib/tasks/jobs.rake +179 -54
  104. data/lib/tasks/tags.rake +8 -13
  105. data/scripts/backfill_parent_tags.rb +376 -0
  106. data/scripts/normalize_plural_tags.rb +335 -0
  107. metadata +111 -85
  108. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  109. data/examples/sinatra_app/Gemfile.lock +0 -166
  110. data/lib/htm/active_record_config.rb +0 -104
  111. data/lib/htm/loaders/defaults_loader.rb +0 -143
  112. data/lib/htm/loaders/xdg_config_loader.rb +0 -116
  113. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  114. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  115. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  116. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  117. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  118. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  119. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  120. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  121. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  122. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  123. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  124. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  125. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  126. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  127. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  128. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  129. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  130. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  131. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  132. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  133. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  134. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  135. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  136. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  137. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  138. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  139. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  140. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  141. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  142. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  143. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  144. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  145. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  146. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  147. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  148. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  149. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  150. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  151. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  152. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  153. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../errors'
4
- require_relative '../models/node'
5
4
  require_relative '../embedding_service'
6
5
 
7
6
  class HTM
@@ -23,7 +22,7 @@ class HTM
23
22
  # @param node_id [Integer] ID of the node to process
24
23
  #
25
24
  def self.perform(node_id:)
26
- node = HTM::Models::Node.find_by(id: node_id)
25
+ node = HTM::Models::Node.first(id: node_id)
27
26
 
28
27
  unless node
29
28
  HTM.logger.warn "GenerateEmbeddingJob: Node #{node_id} not found"
@@ -31,7 +30,7 @@ class HTM
31
30
  end
32
31
 
33
32
  # Skip if already has embedding
34
- return if node.embedding.present?
33
+ return if node.embedding
35
34
 
36
35
  provider = HTM.configuration.embedding_provider.to_s
37
36
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
@@ -41,7 +40,7 @@ class HTM
41
40
  result = HTM::EmbeddingService.generate(node.content)
42
41
 
43
42
  # Update node with processed embedding
44
- node.update!(embedding: result[:storage_embedding])
43
+ node.update(embedding: result[:storage_embedding])
45
44
 
46
45
  # Record success metrics
47
46
  elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../errors'
4
- require_relative '../models/node'
5
4
  require_relative '../proposition_service'
6
5
 
7
6
  class HTM
@@ -25,7 +24,7 @@ class HTM
25
24
  # @param robot_id [Integer] ID of the robot that owns this node
26
25
  #
27
26
  def self.perform(node_id:, robot_id:)
28
- node = HTM::Models::Node.find_by(id: node_id)
27
+ node = HTM::Models::Node.first(id: node_id)
29
28
 
30
29
  unless node
31
30
  HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found"
@@ -49,14 +48,14 @@ class HTM
49
48
  token_count = HTM.count_tokens(proposition_text)
50
49
 
51
50
  # Create proposition node with is_proposition marker
52
- proposition_node = HTM::Models::Node.create!(
51
+ proposition_node = HTM::Models::Node.create(
53
52
  content: proposition_text,
54
53
  token_count: token_count,
55
54
  metadata: { is_proposition: true, source_node_id: node_id }
56
55
  )
57
56
 
58
57
  # Link to robot via RobotNode
59
- HTM::Models::RobotNode.find_or_create_by!(
58
+ HTM::Models::RobotNode.find_or_create(
60
59
  robot_id: robot_id,
61
60
  node_id: proposition_node.id
62
61
  )
@@ -79,7 +78,7 @@ class HTM
79
78
  # Log proposition-specific errors
80
79
  HTM.logger.error "GeneratePropositionsJob: Proposition extraction failed for node #{node_id}: #{e.message}"
81
80
 
82
- rescue ActiveRecord::RecordInvalid => e
81
+ rescue Sequel::ValidationFailed => e
83
82
  # Log validation errors
84
83
  HTM.logger.error "GeneratePropositionsJob: Database validation failed for node #{node_id}: #{e.message}"
85
84
 
@@ -1,9 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../errors'
4
- require_relative '../models/node'
5
- require_relative '../models/tag'
6
- require_relative '../models/node_tag'
7
4
  require_relative '../tag_service'
8
5
 
9
6
  class HTM
@@ -26,7 +23,7 @@ class HTM
26
23
  # @param node_id [Integer] ID of the node to process
27
24
  #
28
25
  def self.perform(node_id:)
29
- node = HTM::Models::Node.find_by(id: node_id)
26
+ node = HTM::Models::Node.first(id: node_id)
30
27
 
31
28
  unless node
32
29
  HTM.logger.warn "GenerateTagsJob: Node #{node_id} not found"
@@ -39,23 +36,27 @@ class HTM
39
36
  begin
40
37
  # Get existing ontology for context (sample of recent tags)
41
38
  existing_ontology = HTM::Models::Tag
42
- .order(created_at: :desc)
39
+ .order(Sequel.desc(:created_at))
43
40
  .limit(100)
44
- .pluck(:name)
41
+ .select_map(:name)
45
42
 
46
43
  # Extract and validate tags using TagService
47
44
  tag_names = HTM::TagService.extract(node.content, existing_ontology: existing_ontology)
48
45
  return if tag_names.empty?
49
46
 
50
- # Create or find tags and associate with node
47
+ # Create or find tags (including all parent tags) and associate with node
48
+ # For "database:postgresql:extensions", this creates and associates:
49
+ # - "database"
50
+ # - "database:postgresql"
51
+ # - "database:postgresql:extensions"
51
52
  tag_names.each do |tag_name|
52
- tag = HTM::Models::Tag.find_or_create_by!(name: tag_name)
53
-
54
- # Create association if it doesn't exist
55
- HTM::Models::NodeTag.find_or_create_by!(
56
- node_id: node.id,
57
- tag_id: tag.id
58
- )
53
+ HTM::Models::Tag.find_or_create_with_ancestors(tag_name).each do |tag|
54
+ # Create association if it doesn't exist
55
+ HTM::Models::NodeTag.find_or_create(
56
+ node_id: node.id,
57
+ tag_id: tag.id
58
+ )
59
+ end
59
60
  end
60
61
 
61
62
  # Record success metrics
@@ -79,7 +80,7 @@ class HTM
79
80
  # Log tag-specific errors
80
81
  HTM.logger.error "GenerateTagsJob: Tag generation failed for node #{node_id}: #{e.message}"
81
82
 
82
- rescue ActiveRecord::RecordInvalid => e
83
+ rescue Sequel::ValidationFailed => e
83
84
  # Record failure metrics
84
85
  elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
85
86
  HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
@@ -78,10 +78,12 @@ class HTM
78
78
  file_hash = Digest::SHA256.hexdigest(content)
79
79
 
80
80
  # Find or create source record
81
- source = HTM::Models::FileSource.find_or_initialize_by(file_path: expanded_path)
81
+ source = HTM::Models::FileSource.first(file_path: expanded_path)
82
+ is_new = source.nil?
83
+ source ||= HTM::Models::FileSource.new(file_path: expanded_path)
82
84
 
83
85
  # Check if sync needed
84
- unless force || source.new_record? || source.needs_sync?(stat.mtime)
86
+ unless force || is_new || source.needs_sync?(stat.mtime)
85
87
  return {
86
88
  file_path: expanded_path,
87
89
  chunks_created: 0,
@@ -104,18 +106,18 @@ class HTM
104
106
  end
105
107
 
106
108
  # Save source first (need ID for node association)
107
- source.save! if source.new_record?
109
+ source.save if is_new
108
110
 
109
111
  # Sync chunks to database (chunks now include cursor positions)
110
112
  result = sync_chunks(source, chunks)
111
113
 
112
114
  # Update source record
113
- source.update!(
115
+ source.update(
114
116
  file_hash: file_hash,
115
117
  mtime: stat.mtime,
116
118
  file_size: stat.size,
117
119
  frontmatter: frontmatter,
118
- last_synced_at: Time.current
120
+ last_synced_at: Time.now
119
121
  )
120
122
 
121
123
  result.merge(
@@ -195,9 +197,9 @@ class HTM
195
197
  deleted = 0
196
198
 
197
199
  # Get existing nodes for this source (include soft-deleted for potential restore)
198
- existing_nodes = source.persisted? ?
199
- HTM::Models::Node.unscoped.where(source_id: source.id).to_a : []
200
- existing_by_hash = existing_nodes.index_by(&:content_hash)
200
+ existing_nodes = source.id ?
201
+ HTM::Models::Node.with_deleted.where(source_id: source.id).all : []
202
+ existing_by_hash = existing_nodes.each_with_object({}) { |n, h| h[n.content_hash] = n }
201
203
 
202
204
  # Track which existing nodes we've matched
203
205
  matched_hashes = Set.new
@@ -217,7 +219,7 @@ class HTM
217
219
 
218
220
  changes = {}
219
221
  changes[:chunk_position] = position if node.chunk_position != position
220
- changes[:deleted_at] = nil if node.deleted_at.present?
222
+ changes[:deleted_at] = nil if node.deleted_at
221
223
 
222
224
  # Update cursor in metadata if changed
223
225
  current_cursor = node.metadata&.dig('cursor')
@@ -227,7 +229,7 @@ class HTM
227
229
  end
228
230
 
229
231
  if changes.any?
230
- node.update!(changes)
232
+ node.update(changes)
231
233
  updated += 1
232
234
  end
233
235
  else
@@ -240,7 +242,7 @@ class HTM
240
242
  # Soft-delete chunks that no longer exist in file
241
243
  existing_by_hash.each do |hash, node|
242
244
  next if matched_hashes.include?(hash)
243
- next if node.deleted_at.present? # Already deleted
245
+ next if node.deleted_at # Already deleted
244
246
 
245
247
  node.soft_delete!
246
248
  deleted += 1
@@ -265,18 +267,18 @@ class HTM
265
267
  node_id = @htm.remember(content, metadata: chunk_metadata)
266
268
 
267
269
  # Update with source reference
268
- node = HTM::Models::Node.find(node_id)
269
- node.update!(source_id: source.id, chunk_position: position)
270
+ node = HTM::Models::Node[node_id]
271
+ node.update(source_id: source.id, chunk_position: position)
270
272
 
271
273
  node
272
- rescue ActiveRecord::RecordNotUnique
274
+ rescue Sequel::UniqueConstraintViolation
273
275
  # Duplicate content exists (different source or no source)
274
276
  # Find and link to this source
275
277
  existing = HTM::Models::Node.find_by_content(content)
276
278
  if existing && existing.source_id.nil?
277
279
  # Merge cursor into existing metadata
278
280
  new_metadata = (existing.metadata || {}).merge('cursor' => cursor) if cursor
279
- existing.update!(
281
+ existing.update(
280
282
  source_id: source.id,
281
283
  chunk_position: position,
282
284
  metadata: new_metadata || existing.metadata
@@ -79,6 +79,7 @@ class HTM
79
79
 
80
80
  # Combined tsvector + trigram search
81
81
  # tsvector matches get boosted score, trigram provides fuzzy fallback
82
+ # Note: Using ? placeholders for Sequel compatibility
82
83
  sql = <<~SQL
83
84
  WITH tsvector_matches AS (
84
85
  -- Primary: tsvector full-text search (stemmed word matching)
@@ -114,24 +115,23 @@ class HTM
114
115
  LIMIT ?
115
116
  SQL
116
117
 
117
- result = ActiveRecord::Base.connection.select_all(
118
- ActiveRecord::Base.sanitize_sql_array([
119
- sql,
120
- TSVECTOR_SCORE_BOOST, # boost for tsvector
121
- query, # ts_rank query
122
- query, # tsvector match query
123
- query, # trigram similarity query
124
- query, # trigram match query
125
- TRIGRAM_SIMILARITY_THRESHOLD,
126
- limit
127
- ])
128
- )
118
+ result = HTM.db.fetch(
119
+ sql,
120
+ TSVECTOR_SCORE_BOOST, # boost for tsvector
121
+ query, # query for ts_rank
122
+ query, # query for plainto_tsquery
123
+ query, # query for similarity (trigram)
124
+ query, # query for similarity condition
125
+ TRIGRAM_SIMILARITY_THRESHOLD, # similarity threshold
126
+ limit # limit
127
+ ).all
129
128
 
130
129
  # Track access for retrieved nodes
131
- node_ids = result.map { |r| r['id'] }
130
+ node_ids = result.map { |r| r[:id] }
132
131
  track_access(node_ids)
133
132
 
134
- result.to_a
133
+ # Convert to hash with string keys for compatibility
134
+ result.map { |r| r.transform_keys(&:to_s) }
135
135
  end
136
136
  end
137
137
  end