htm 0.0.30 → 0.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. checksums.yaml +4 -4
  2. data/.irbrc +2 -3
  3. data/.rubocop.yml +184 -0
  4. data/CHANGELOG.md +46 -0
  5. data/README.md +2 -0
  6. data/Rakefile +93 -12
  7. data/db/migrate/00008_create_node_relationships.rb +54 -0
  8. data/db/migrate/00009_fix_node_relationships_column_types.rb +17 -0
  9. data/db/schema.sql +124 -1
  10. data/docs/api/database.md +35 -57
  11. data/docs/api/embedding-service.md +1 -1
  12. data/docs/api/index.md +26 -15
  13. data/docs/api/working-memory.md +8 -8
  14. data/docs/architecture/index.md +5 -7
  15. data/docs/architecture/overview.md +5 -8
  16. data/docs/assets/images/htm-architecture-overview.svg +1 -1
  17. data/docs/assets/images/htm-context-assembly-flow.svg +2 -2
  18. data/docs/assets/images/htm-layered-architecture.svg +3 -3
  19. data/docs/assets/images/two-tier-memory-architecture.svg +1 -1
  20. data/docs/database/README.md +1 -0
  21. data/docs/database_rake_tasks.md +20 -28
  22. data/docs/development/contributing.md +5 -5
  23. data/docs/development/index.md +4 -7
  24. data/docs/development/schema.md +71 -1
  25. data/docs/development/setup.md +40 -82
  26. data/docs/development/testing.md +1 -1
  27. data/docs/examples/file-loading.md +4 -4
  28. data/docs/examples/mcp-client.md +1 -1
  29. data/docs/getting-started/quick-start.md +4 -4
  30. data/docs/guides/adding-memories.md +14 -1
  31. data/docs/guides/configuration.md +5 -5
  32. data/docs/guides/context-assembly.md +4 -4
  33. data/docs/guides/file-loading.md +12 -12
  34. data/docs/guides/getting-started.md +2 -2
  35. data/docs/guides/long-term-memory.md +7 -27
  36. data/docs/guides/propositions.md +20 -19
  37. data/docs/guides/recalling-memories.md +5 -5
  38. data/docs/guides/tags.md +18 -13
  39. data/docs/multi_framework_support.md +1 -1
  40. data/docs/robots/hive-mind.md +1 -1
  41. data/docs/robots/multi-robot.md +2 -2
  42. data/docs/robots/robot-groups.md +1 -1
  43. data/docs/robots/two-tier-memory.md +72 -94
  44. data/docs/setup_local_database.md +8 -54
  45. data/docs/using_rake_tasks_in_your_app.md +6 -6
  46. data/examples/01_basic_usage.rb +1 -0
  47. data/examples/03_custom_llm_configuration.rb +1 -0
  48. data/examples/04_file_loader_usage.rb +1 -0
  49. data/examples/05_timeframe_demo.rb +1 -0
  50. data/examples/06_example_app/app.rb +1 -0
  51. data/examples/07_cli_app/htm_cli.rb +1 -0
  52. data/examples/09_mcp_client.rb +1 -0
  53. data/examples/10_telemetry/demo.rb +1 -0
  54. data/examples/11_robot_groups/multi_process.rb +1 -0
  55. data/examples/11_robot_groups/same_process.rb +1 -0
  56. data/examples/12_rails_app/.envrc +12 -0
  57. data/examples/12_rails_app/Gemfile +8 -3
  58. data/examples/12_rails_app/Gemfile.lock +94 -89
  59. data/examples/12_rails_app/README.md +70 -19
  60. data/examples/12_rails_app/app/controllers/application_controller.rb +6 -0
  61. data/examples/12_rails_app/app/controllers/chats_controller.rb +305 -0
  62. data/examples/12_rails_app/app/controllers/dashboard_controller.rb +3 -0
  63. data/examples/12_rails_app/app/controllers/files_controller.rb +17 -2
  64. data/examples/12_rails_app/app/controllers/home_controller.rb +8 -0
  65. data/examples/12_rails_app/app/controllers/memories_controller.rb +9 -4
  66. data/examples/12_rails_app/app/controllers/messages_controller.rb +214 -0
  67. data/examples/12_rails_app/app/controllers/robots_controller.rb +11 -1
  68. data/examples/12_rails_app/app/controllers/tags_controller.rb +14 -1
  69. data/examples/12_rails_app/app/javascript/application.js +1 -1
  70. data/examples/12_rails_app/app/models/application_record.rb +5 -0
  71. data/examples/12_rails_app/app/models/chat.rb +36 -0
  72. data/examples/12_rails_app/app/models/message.rb +5 -0
  73. data/examples/12_rails_app/app/models/model.rb +5 -0
  74. data/examples/12_rails_app/app/models/tool_call.rb +5 -0
  75. data/examples/12_rails_app/app/views/chats/index.html.erb +61 -0
  76. data/examples/12_rails_app/app/views/chats/show.html.erb +213 -0
  77. data/examples/12_rails_app/app/views/dashboard/index.html.erb +3 -0
  78. data/examples/12_rails_app/app/views/files/index.html.erb +10 -5
  79. data/examples/12_rails_app/app/views/files/new.html.erb +4 -2
  80. data/examples/12_rails_app/app/views/files/show.html.erb +19 -3
  81. data/examples/12_rails_app/app/views/home/index.html.erb +45 -0
  82. data/examples/12_rails_app/app/views/layouts/application.html.erb +20 -18
  83. data/examples/12_rails_app/app/views/memories/_memory_card.html.erb +1 -1
  84. data/examples/12_rails_app/app/views/memories/deleted.html.erb +3 -1
  85. data/examples/12_rails_app/app/views/memories/edit.html.erb +2 -0
  86. data/examples/12_rails_app/app/views/memories/index.html.erb +2 -0
  87. data/examples/12_rails_app/app/views/memories/new.html.erb +2 -0
  88. data/examples/12_rails_app/app/views/memories/show.html.erb +4 -2
  89. data/examples/12_rails_app/app/views/messages/_message.html.erb +20 -0
  90. data/examples/12_rails_app/app/views/robots/index.html.erb +2 -0
  91. data/examples/12_rails_app/app/views/robots/new.html.erb +2 -0
  92. data/examples/12_rails_app/app/views/robots/show.html.erb +2 -0
  93. data/examples/12_rails_app/app/views/search/index.html.erb +59 -8
  94. data/examples/12_rails_app/app/views/shared/_navbar.html.erb +75 -29
  95. data/examples/12_rails_app/app/views/tags/index.html.erb +2 -0
  96. data/examples/12_rails_app/app/views/tags/show.html.erb +3 -1
  97. data/examples/12_rails_app/config/application.rb +1 -1
  98. data/examples/12_rails_app/config/database.yml +9 -5
  99. data/examples/12_rails_app/config/importmap.rb +1 -1
  100. data/examples/12_rails_app/config/initializers/htm.rb +9 -2
  101. data/examples/12_rails_app/config/initializers/ruby_llm.rb +33 -0
  102. data/examples/12_rails_app/config/routes.rb +39 -23
  103. data/examples/12_rails_app/db/migrate/20250124000001_create_ruby_llm_tables.rb +34 -0
  104. data/examples/12_rails_app/db/migrate/20250124000002_create_models_table.rb +28 -0
  105. data/examples/12_rails_app/db/schema.rb +67 -0
  106. data/examples/examples_helper.rb +25 -0
  107. data/lib/htm/circuit_breaker.rb +5 -6
  108. data/lib/htm/config/builder.rb +12 -12
  109. data/lib/htm/config/database.rb +21 -27
  110. data/lib/htm/config/defaults.yml +25 -13
  111. data/lib/htm/config/validator.rb +12 -18
  112. data/lib/htm/config.rb +93 -173
  113. data/lib/htm/database.rb +193 -199
  114. data/lib/htm/embedding_service.rb +4 -9
  115. data/lib/htm/integrations/sinatra.rb +7 -7
  116. data/lib/htm/job_adapter.rb +14 -21
  117. data/lib/htm/jobs/generate_embedding_job.rb +28 -44
  118. data/lib/htm/jobs/generate_propositions_job.rb +29 -55
  119. data/lib/htm/jobs/generate_relationships_job.rb +137 -0
  120. data/lib/htm/jobs/generate_tags_job.rb +45 -67
  121. data/lib/htm/loaders/markdown_loader.rb +65 -112
  122. data/lib/htm/long_term_memory/fulltext_search.rb +1 -1
  123. data/lib/htm/long_term_memory/hybrid_search.rb +300 -128
  124. data/lib/htm/long_term_memory/node_operations.rb +2 -2
  125. data/lib/htm/long_term_memory/relevance_scorer.rb +100 -68
  126. data/lib/htm/long_term_memory/tag_operations.rb +87 -120
  127. data/lib/htm/long_term_memory/vector_search.rb +1 -1
  128. data/lib/htm/long_term_memory.rb +2 -1
  129. data/lib/htm/mcp/cli.rb +59 -58
  130. data/lib/htm/mcp/server.rb +5 -6
  131. data/lib/htm/mcp/tools.rb +30 -36
  132. data/lib/htm/migration.rb +10 -10
  133. data/lib/htm/models/node.rb +2 -3
  134. data/lib/htm/models/node_relationship.rb +72 -0
  135. data/lib/htm/models/node_tag.rb +2 -2
  136. data/lib/htm/models/robot_node.rb +2 -2
  137. data/lib/htm/models/tag.rb +41 -28
  138. data/lib/htm/observability.rb +45 -51
  139. data/lib/htm/proposition_service.rb +3 -7
  140. data/lib/htm/query_cache.rb +13 -15
  141. data/lib/htm/railtie.rb +1 -2
  142. data/lib/htm/robot_group.rb +9 -9
  143. data/lib/htm/sequel_config.rb +1 -0
  144. data/lib/htm/sql_builder.rb +1 -1
  145. data/lib/htm/tag_service.rb +2 -6
  146. data/lib/htm/timeframe.rb +4 -5
  147. data/lib/htm/timeframe_extractor.rb +42 -83
  148. data/lib/htm/version.rb +1 -1
  149. data/lib/htm/workflows/remember_workflow.rb +112 -115
  150. data/lib/htm/working_memory.rb +21 -26
  151. data/lib/htm.rb +103 -116
  152. data/lib/tasks/db.rake +0 -2
  153. data/lib/tasks/doc.rake +14 -13
  154. data/lib/tasks/files.rake +5 -12
  155. data/lib/tasks/htm.rake +70 -71
  156. data/lib/tasks/jobs.rake +41 -47
  157. data/lib/tasks/tags.rake +3 -8
  158. metadata +28 -106
  159. data/lib/htm/config/section.rb +0 -74
  160. data/lib/htm/loaders/defaults_loader.rb +0 -166
  161. data/lib/htm/loaders/xdg_config_loader.rb +0 -116
@@ -48,83 +48,28 @@ class HTM
48
48
  # - :skipped [Boolean] True if file was unchanged and skipped
49
49
  #
50
50
  def load_file(path, force: false)
51
- expanded_path = File.expand_path(path)
52
-
53
- unless File.exist?(expanded_path)
54
- raise ArgumentError, "File not found: #{path}"
55
- end
56
-
57
- unless File.file?(expanded_path)
58
- raise ArgumentError, "Not a file: #{path}"
59
- end
51
+ expanded_path = validate_file_path!(path)
52
+ content = read_file_content(expanded_path, path)
53
+ stat = File.stat(expanded_path)
54
+ file_hash = Digest::SHA256.hexdigest(content)
60
55
 
61
- # Validate file size before reading
62
- file_size = File.size(expanded_path)
63
- if file_size > MAX_FILE_SIZE
64
- raise ArgumentError, "File too large: #{path} (#{file_size} bytes). Maximum size is #{MAX_FILE_SIZE} bytes (10 MB)."
65
- end
66
-
67
- # Read file with encoding detection and fallback
68
- # Try UTF-8 first, then fall back to binary if encoding errors occur
69
- begin
70
- content = File.read(expanded_path, encoding: 'UTF-8')
71
- rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
72
- # Try reading as binary and force encoding to UTF-8, replacing invalid chars
73
- content = File.read(expanded_path, encoding: 'BINARY')
74
- content = content.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
75
- HTM.logger.warn "File #{path} has non-UTF-8 encoding, some characters may be replaced"
76
- end
77
- stat = File.stat(expanded_path)
78
- file_hash = Digest::SHA256.hexdigest(content)
79
-
80
- # Find or create source record
81
56
  source = HTM::Models::FileSource.first(file_path: expanded_path)
82
57
  is_new = source.nil?
83
58
  source ||= HTM::Models::FileSource.new(file_path: expanded_path)
84
59
 
85
- # Check if sync needed
86
60
  unless force || is_new || source.needs_sync?(stat.mtime)
87
- return {
88
- file_path: expanded_path,
89
- chunks_created: 0,
90
- chunks_updated: 0,
91
- chunks_deleted: 0,
92
- skipped: true
93
- }
61
+ return { file_path: expanded_path, chunks_created: 0, chunks_updated: 0, chunks_deleted: 0, skipped: true }
94
62
  end
95
63
 
96
- # Parse frontmatter and body
97
64
  frontmatter, body = extract_frontmatter(content)
98
-
99
- # Chunk the body with metadata (includes cursor positions)
100
65
  chunks = @chunker.chunk_with_metadata(body)
66
+ prepend_frontmatter_to_chunk(frontmatter, chunks)
101
67
 
102
- # Prepend frontmatter to first chunk if present
103
- if frontmatter.any? && chunks.any?
104
- frontmatter_yaml = YAML.dump(frontmatter).sub(/\A---\n/, "---\n")
105
- chunks[0][:text] = "#{frontmatter_yaml}---\n\n#{chunks[0][:text]}"
106
- end
107
-
108
- # Save source first (need ID for node association)
109
68
  source.save if is_new
110
-
111
- # Sync chunks to database (chunks now include cursor positions)
112
69
  result = sync_chunks(source, chunks)
113
-
114
- # Update source record
115
- source.update(
116
- file_hash: file_hash,
117
- mtime: stat.mtime,
118
- file_size: stat.size,
119
- frontmatter: frontmatter,
120
- last_synced_at: Time.now
121
- )
122
-
123
- result.merge(
124
- file_path: expanded_path,
125
- file_source_id: source.id,
126
- skipped: false
127
- )
70
+ source.update(file_hash: file_hash, mtime: stat.mtime, file_size: stat.size,
71
+ frontmatter: frontmatter, last_synced_at: Time.now)
72
+ result.merge(file_path: expanded_path, file_source_id: source.id, skipped: false)
128
73
  end
129
74
 
130
75
  # Load all matching files from a directory
@@ -148,16 +93,60 @@ class HTM
148
93
  files = Dir.glob(File.join(expanded_path, pattern))
149
94
 
150
95
  files.map do |file_path|
151
- begin
152
- load_file(file_path, force: force)
153
- rescue StandardError => e
154
- { file_path: file_path, error: e.message, skipped: false }
155
- end
96
+ load_file(file_path, force: force)
97
+ rescue StandardError => e
98
+ { file_path: file_path, error: e.message, skipped: false }
156
99
  end
157
100
  end
158
101
 
159
102
  private
160
103
 
104
+ def validate_file_path!(path)
105
+ expanded = File.expand_path(path)
106
+ raise ArgumentError, "File not found: #{path}" unless File.exist?(expanded)
107
+ raise ArgumentError, "Not a file: #{path}" unless File.file?(expanded)
108
+ size = File.size(expanded)
109
+ if size > MAX_FILE_SIZE
110
+ raise ArgumentError, "File too large: #{path} (#{size} bytes). Maximum size is #{MAX_FILE_SIZE} bytes (10 MB)."
111
+ end
112
+ expanded
113
+ end
114
+
115
+ def read_file_content(expanded_path, path)
116
+ File.read(expanded_path, encoding: 'UTF-8')
117
+ rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
118
+ content = File.read(expanded_path, encoding: 'BINARY')
119
+ HTM.logger.warn "File #{path} has non-UTF-8 encoding, some characters may be replaced"
120
+ content.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
121
+ end
122
+
123
+ def prepend_frontmatter_to_chunk(frontmatter, chunks)
124
+ return unless frontmatter.any? && chunks.any?
125
+ frontmatter_yaml = YAML.dump(frontmatter).sub(/\A---\n/, "---\n")
126
+ chunks[0][:text] = "#{frontmatter_yaml}---\n\n#{chunks[0][:text]}"
127
+ end
128
+
129
+ def update_existing_chunk(node, position, chunk_cursor)
130
+ changes = {}
131
+ changes[:chunk_position] = position if node.chunk_position != position
132
+ changes[:deleted_at] = nil if node.deleted_at
133
+ current_cursor = node.metadata&.dig('cursor')
134
+ changes[:metadata] = (node.metadata || {}).merge('cursor' => chunk_cursor) if current_cursor != chunk_cursor
135
+ return false unless changes.any?
136
+ node.update(changes)
137
+ true
138
+ end
139
+
140
+ def soft_delete_removed_chunks(existing_by_hash, matched_hashes)
141
+ count = 0
142
+ existing_by_hash.each_value do |node|
143
+ next if matched_hashes.include?(node.content_hash) || node.deleted_at
144
+ node.soft_delete!
145
+ count += 1
146
+ end
147
+ count
148
+ end
149
+
161
150
  # Extract YAML frontmatter from content
162
151
  #
163
152
  # @param content [String] File content
@@ -194,60 +183,24 @@ class HTM
194
183
  def sync_chunks(source, chunks)
195
184
  created = 0
196
185
  updated = 0
197
- deleted = 0
186
+ existing_nodes = source.id ? HTM::Models::Node.with_deleted.where(source_id: source.id).all : []
187
+ existing_by_hash = existing_nodes.to_h { |n| [n.content_hash, n] }
188
+ matched_hashes = Set.new
198
189
 
199
- # Get existing nodes for this source (include soft-deleted for potential restore)
200
- existing_nodes = source.id ?
201
- HTM::Models::Node.with_deleted.where(source_id: source.id).all : []
202
- existing_by_hash = existing_nodes.each_with_object({}) { |n, h| h[n.content_hash] = n }
203
-
204
- # Track which existing nodes we've matched
205
- matched_hashes = Set.new
206
-
207
- # Process each new chunk (chunks are now Hashes with :text and :cursor)
208
190
  chunks.each_with_index do |chunk_data, position|
209
191
  chunk_content = chunk_data[:text].strip
210
- chunk_cursor = chunk_data[:cursor]
211
192
  next if chunk_content.empty?
212
193
 
213
194
  chunk_hash = HTM::Models::Node.generate_content_hash(chunk_content)
214
-
215
195
  if existing_by_hash[chunk_hash]
216
- # Chunk exists - update position/cursor if needed, restore if soft-deleted
217
- node = existing_by_hash[chunk_hash]
218
196
  matched_hashes << chunk_hash
219
-
220
- changes = {}
221
- changes[:chunk_position] = position if node.chunk_position != position
222
- changes[:deleted_at] = nil if node.deleted_at
223
-
224
- # Update cursor in metadata if changed
225
- current_cursor = node.metadata&.dig('cursor')
226
- if current_cursor != chunk_cursor
227
- new_metadata = (node.metadata || {}).merge('cursor' => chunk_cursor)
228
- changes[:metadata] = new_metadata
229
- end
230
-
231
- if changes.any?
232
- node.update(changes)
233
- updated += 1
234
- end
235
- else
236
- # New chunk - create node with cursor in metadata
237
- node = create_chunk_node(source, chunk_content, position, cursor: chunk_cursor)
238
- created += 1 if node
197
+ updated += 1 if update_existing_chunk(existing_by_hash[chunk_hash], position, chunk_data[:cursor])
198
+ elsif create_chunk_node(source, chunk_content, position, cursor: chunk_data[:cursor])
199
+ created += 1
239
200
  end
240
201
  end
241
202
 
242
- # Soft-delete chunks that no longer exist in file
243
- existing_by_hash.each do |hash, node|
244
- next if matched_hashes.include?(hash)
245
- next if node.deleted_at # Already deleted
246
-
247
- node.soft_delete!
248
- deleted += 1
249
- end
250
-
203
+ deleted = soft_delete_removed_chunks(existing_by_hash, matched_hashes)
251
204
  { chunks_created: created, chunks_updated: updated, chunks_deleted: deleted }
252
205
  end
253
206
 
@@ -35,7 +35,7 @@ class HTM
35
35
  #
36
36
  def search_fulltext(timeframe:, query:, limit:, metadata: {})
37
37
  # Enforce limit to prevent DoS
38
- safe_limit = [[limit.to_i, 1].max, MAX_FULLTEXT_LIMIT].min
38
+ safe_limit = limit.to_i.clamp(1, MAX_FULLTEXT_LIMIT)
39
39
 
40
40
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
41
41
  result = @cache.fetch(:fulltext, timeframe, query, safe_limit, metadata) do