htm 0.0.2 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +95 -0
  5. data/.irbrc +283 -80
  6. data/.tbls.yml +2 -1
  7. data/CHANGELOG.md +327 -26
  8. data/CLAUDE.md +603 -0
  9. data/README.md +83 -12
  10. data/Rakefile +5 -0
  11. data/bin/htm_mcp.rb +527 -0
  12. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  13. data/db/migrate/00002_create_robots.rb +11 -0
  14. data/db/migrate/00003_create_file_sources.rb +20 -0
  15. data/db/migrate/00004_create_nodes.rb +65 -0
  16. data/db/migrate/00005_create_tags.rb +13 -0
  17. data/db/migrate/00006_create_node_tags.rb +18 -0
  18. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  19. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  20. data/db/schema.sql +172 -1
  21. data/docs/api/database.md +1 -2
  22. data/docs/api/htm.md +197 -2
  23. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  24. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  25. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  26. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  27. data/docs/api/yard/HTM/Configuration.md +175 -0
  28. data/docs/api/yard/HTM/Database.md +99 -0
  29. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  30. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  31. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  32. data/docs/api/yard/HTM/Error.md +11 -0
  33. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  34. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  35. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  36. data/docs/api/yard/HTM/Observability.md +107 -0
  37. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  38. data/docs/api/yard/HTM/Railtie.md +27 -0
  39. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  40. data/docs/api/yard/HTM/TagError.md +18 -0
  41. data/docs/api/yard/HTM/TagService.md +67 -0
  42. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  43. data/docs/api/yard/HTM/Timeframe.md +40 -0
  44. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  45. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  46. data/docs/api/yard/HTM/ValidationError.md +20 -0
  47. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  48. data/docs/api/yard/HTM.md +80 -0
  49. data/docs/api/yard/index.csv +179 -0
  50. data/docs/api/yard-reference.md +51 -0
  51. data/docs/database/README.md +128 -128
  52. data/docs/database/public.file_sources.md +42 -0
  53. data/docs/database/public.file_sources.svg +211 -0
  54. data/docs/database/public.node_tags.md +4 -4
  55. data/docs/database/public.node_tags.svg +212 -79
  56. data/docs/database/public.nodes.md +22 -12
  57. data/docs/database/public.nodes.svg +246 -127
  58. data/docs/database/public.robot_nodes.md +11 -9
  59. data/docs/database/public.robot_nodes.svg +220 -98
  60. data/docs/database/public.robots.md +2 -2
  61. data/docs/database/public.robots.svg +136 -81
  62. data/docs/database/public.tags.md +3 -3
  63. data/docs/database/public.tags.svg +118 -39
  64. data/docs/database/schema.json +850 -771
  65. data/docs/database/schema.svg +256 -197
  66. data/docs/development/schema.md +67 -2
  67. data/docs/guides/adding-memories.md +93 -7
  68. data/docs/guides/recalling-memories.md +36 -1
  69. data/examples/README.md +405 -0
  70. data/examples/cli_app/htm_cli.rb +65 -5
  71. data/examples/cli_app/temp.log +93 -0
  72. data/examples/file_loader_usage.rb +177 -0
  73. data/examples/mcp_client.rb +529 -0
  74. data/examples/robot_groups/lib/robot_group.rb +419 -0
  75. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  76. data/examples/robot_groups/multi_process.rb +286 -0
  77. data/examples/robot_groups/robot_worker.rb +136 -0
  78. data/examples/robot_groups/same_process.rb +229 -0
  79. data/examples/timeframe_demo.rb +276 -0
  80. data/lib/htm/active_record_config.rb +1 -1
  81. data/lib/htm/circuit_breaker.rb +202 -0
  82. data/lib/htm/configuration.rb +59 -13
  83. data/lib/htm/database.rb +67 -36
  84. data/lib/htm/embedding_service.rb +39 -2
  85. data/lib/htm/errors.rb +131 -11
  86. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  87. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  88. data/lib/htm/loaders/markdown_loader.rb +263 -0
  89. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  90. data/lib/htm/long_term_memory.rb +460 -343
  91. data/lib/htm/models/file_source.rb +99 -0
  92. data/lib/htm/models/node.rb +80 -5
  93. data/lib/htm/models/robot.rb +24 -1
  94. data/lib/htm/models/robot_node.rb +1 -0
  95. data/lib/htm/models/tag.rb +254 -4
  96. data/lib/htm/observability.rb +395 -0
  97. data/lib/htm/tag_service.rb +60 -3
  98. data/lib/htm/tasks.rb +26 -1
  99. data/lib/htm/timeframe.rb +194 -0
  100. data/lib/htm/timeframe_extractor.rb +307 -0
  101. data/lib/htm/version.rb +1 -1
  102. data/lib/htm/working_memory.rb +165 -70
  103. data/lib/htm.rb +328 -130
  104. data/lib/tasks/doc.rake +300 -0
  105. data/lib/tasks/files.rake +299 -0
  106. data/lib/tasks/htm.rake +158 -3
  107. data/lib/tasks/jobs.rake +3 -9
  108. data/lib/tasks/tags.rake +166 -6
  109. data/mkdocs.yml +36 -1
  110. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  111. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  112. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  113. data/notes/next_steps.md +100 -0
  114. data/notes/plan.md +627 -0
  115. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  116. data/notes/timescaledb_removal_summary.md +200 -0
  117. metadata +158 -17
  118. data/db/migrate/20250101000002_create_robots.rb +0 -14
  119. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  120. data/db/migrate/20250101000005_create_tags.rb +0 -38
  121. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  122. data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
  123. data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
  124. data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
  125. data/db/migrate/20250126000001_create_working_memories.rb +0 -19
  126. data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
  127. data/docs/database/public.working_memories.md +0 -40
  128. data/docs/database/public.working_memories.svg +0 -112
  129. data/lib/htm/models/working_memory_entry.rb +0 -88
data/lib/htm.rb CHANGED
@@ -3,15 +3,21 @@
3
3
  require_relative "htm/version"
4
4
  require_relative "htm/errors"
5
5
  require_relative "htm/configuration"
6
+ require_relative "htm/circuit_breaker"
6
7
  require_relative "htm/active_record_config"
7
8
  require_relative "htm/database"
8
9
  require_relative "htm/long_term_memory"
9
10
  require_relative "htm/working_memory"
10
11
  require_relative "htm/embedding_service"
11
12
  require_relative "htm/tag_service"
13
+ require_relative "htm/timeframe_extractor"
14
+ require_relative "htm/timeframe"
12
15
  require_relative "htm/job_adapter"
13
16
  require_relative "htm/jobs/generate_embedding_job"
14
17
  require_relative "htm/jobs/generate_tags_job"
18
+ require_relative "htm/loaders/paragraph_chunker"
19
+ require_relative "htm/loaders/markdown_loader"
20
+ require_relative "htm/observability"
15
21
 
16
22
  require "pg"
17
23
  require "securerandom"
@@ -24,14 +30,14 @@ require_relative "htm/railtie" if defined?(Rails::Railtie)
24
30
  #
25
31
  # HTM implements a two-tier memory system:
26
32
  # - Working Memory: Token-limited, active context for immediate LLM use
27
- # - Long-term Memory: Durable PostgreSQL/TimescaleDB storage for permanent knowledge
33
+ # - Long-term Memory: Durable PostgreSQL storage with pgvector for permanent knowledge
28
34
  #
29
35
  # Key Features:
30
- # - Never forgets unless explicitly told
31
- # - RAG-based retrieval (temporal + semantic search)
36
+ # - Never forgets unless explicitly told (soft delete by default)
37
+ # - RAG-based retrieval (temporal + semantic search via pgvector)
32
38
  # - Multi-robot "hive mind" - all robots share global memory
33
- # - Relationship graphs for knowledge connections
34
- # - Time-series optimized with TimescaleDB
39
+ # - Hierarchical tagging system for knowledge organization
40
+ # - Async background processing for embeddings and tags
35
41
  #
36
42
  # @example Basic usage
37
43
  # htm = HTM.new(robot_name: "Code Helper")
@@ -98,13 +104,10 @@ class HTM
98
104
  # Stores content in long-term memory and adds it to working memory.
99
105
  # Embeddings and hierarchical tags are automatically extracted by LLM in the background.
100
106
  #
101
- # If content is empty, returns the ID of the most recent node without creating a duplicate.
102
- # Nil values for content or source are converted to empty strings.
103
- #
104
- # @param content [String, nil] The information to remember
105
- # @param source [String, nil] Where this content came from (defaults to empty string if not provided)
107
+ # @param content [String] The information to remember (required, cannot be nil or empty)
106
108
  # @param tags [Array<String>] Manual tags to assign (optional, in addition to auto-extracted tags)
107
109
  # @return [Integer] Database ID of the memory node
110
+ # @raise [ValidationError] If content is nil, empty, or exceeds maximum size
108
111
  #
109
112
  # @example Remember with source
110
113
  # node_id = htm.remember("PostgreSQL is great for HTM")
@@ -112,16 +115,31 @@ class HTM
112
115
  # @example Remember with manual tags
113
116
  # node_id = htm.remember("Time-series data", tags: ["database:timescaledb"])
114
117
  #
115
- def remember(content, tags: [])
116
- # Convert nil to empty string
117
- content = content.to_s
118
+ # @example Remember with metadata
119
+ # node_id = htm.remember("User prefers dark mode", metadata: { source: "user", confidence: 0.95 })
120
+ #
121
+ def remember(content, tags: [], metadata: {})
122
+ # Validate inputs
123
+ raise ValidationError, "Content cannot be nil" if content.nil?
124
+
125
+ content_str = content.to_s.strip
126
+ raise ValidationError, "Content cannot be empty" if content_str.empty?
118
127
 
119
- # If content is empty, return the last node ID without creating a new entry
120
- if content.empty?
121
- last_node = HTM::Models::Node.order(created_at: :desc).first
122
- return last_node&.id || 0
128
+ if content_str.bytesize > MAX_VALUE_LENGTH
129
+ raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
130
+ end
131
+
132
+ validate_array!(tags, "tags")
133
+ tags.each do |tag|
134
+ unless tag.is_a?(String) && tag.match?(/\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/)
135
+ raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
136
+ end
123
137
  end
124
138
 
139
+ validate_metadata!(metadata)
140
+
141
+ content = content_str
142
+
125
143
  # Calculate token count using configured counter
126
144
  token_count = HTM.count_tokens(content)
127
145
 
@@ -131,7 +149,8 @@ class HTM
131
149
  content: content,
132
150
  token_count: token_count,
133
151
  robot_id: @robot_id,
134
- embedding: nil # Will be generated in background
152
+ embedding: nil, # Will be generated in background
153
+ metadata: metadata
135
154
  )
136
155
 
137
156
  node_id = result[:node_id]
@@ -155,9 +174,17 @@ class HTM
155
174
  end
156
175
  end
157
176
 
158
- # Add to working memory (access_count starts at 0)
177
+ # Add to working memory (evict if needed, access_count starts at 0)
178
+ unless @working_memory.has_space?(token_count)
179
+ evicted = @working_memory.evict_to_make_space(token_count)
180
+ evicted_keys = evicted.map { |n| n[:key] }
181
+ @long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
182
+ end
159
183
  @working_memory.add(node_id, content, token_count: token_count, access_count: 0)
160
184
 
185
+ # Mark node as in working memory in the robot_nodes join table
186
+ result[:robot_node].update!(working_memory: true)
187
+
161
188
  update_robot_activity
162
189
  node_id
163
190
  end
@@ -165,53 +192,70 @@ class HTM
165
192
  # Recall memories from a timeframe and topic
166
193
  #
167
194
  # @param topic [String] Topic to search for (required)
168
- # @param timeframe [String, Range, nil] Time range (default: last 7 days). Examples: "last week", 7.days.ago..Time.now
195
+ # @param timeframe [nil, Range, Array<Range>, Date, DateTime, Time, String, Symbol] Time filter
196
+ # - nil: No time filter (search all memories)
197
+ # - Range: Time range (e.g., 7.days.ago..Time.now)
198
+ # - Array<Range>: Multiple time windows (OR'd together)
199
+ # - Date: Entire day
200
+ # - DateTime/Time: Entire day containing that moment
201
+ # - String: Natural language (e.g., "last week", "few days ago")
202
+ # - :auto: Extract timeframe from topic query automatically
169
203
  # @param limit [Integer] Maximum number of nodes to retrieve (default: 20)
170
204
  # @param strategy [Symbol] Search strategy (:vector, :fulltext, :hybrid) (default: :vector)
171
205
  # @param with_relevance [Boolean] Include dynamic relevance scores (default: false)
172
206
  # @param query_tags [Array<String>] Tags to boost relevance (default: [])
173
207
  # @param raw [Boolean] Return full node hashes (true) or just content strings (false) (default: false)
208
+ # @param metadata [Hash] Filter by metadata fields using JSONB containment (default: {})
174
209
  # @return [Array<String>, Array<Hash>] Content strings (raw: false) or full node hashes (raw: true)
175
210
  #
176
- # @example Basic usage (returns content strings)
211
+ # @example Basic usage - no time filter (returns content strings)
177
212
  # memories = htm.recall("PostgreSQL")
178
213
  # # => ["PostgreSQL is great for time-series data", "PostgreSQL with TimescaleDB..."]
179
214
  #
180
- # @example Get full node hashes
181
- # nodes = htm.recall("PostgreSQL", raw: true)
182
- # # => [{"id" => 1, "content" => "...", "created_at" => "...", ...}, ...]
183
- #
184
- # @example With timeframe
215
+ # @example With explicit timeframe
185
216
  # memories = htm.recall("PostgreSQL", timeframe: "last week")
217
+ # memories = htm.recall("PostgreSQL", timeframe: Date.today)
218
+ # memories = htm.recall("PostgreSQL", timeframe: 7.days.ago..Time.now)
186
219
  #
187
- # @example With all options
188
- # memories = htm.recall("PostgreSQL",
189
- # timeframe: "last month",
190
- # limit: 50,
191
- # strategy: :hybrid,
192
- # with_relevance: true,
193
- # query_tags: ["database", "timeseries"])
220
+ # @example Auto-extract timeframe from query
221
+ # memories = htm.recall("what did we discuss last week about PostgreSQL", timeframe: :auto)
222
+ # # Extracts "last week" as timeframe, searches for "what did we discuss about PostgreSQL"
194
223
  #
195
- def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false)
196
- # Use default timeframe if not provided (last 7 days)
197
- timeframe ||= "last 7 days"
198
-
224
+ # @example Multiple time windows
225
+ # memories = htm.recall("meetings", timeframe: [last_monday, last_friday])
226
+ #
227
+ # @example Filter by metadata
228
+ # memories = htm.recall("preferences", metadata: { source: "user" })
229
+ # memories = htm.recall("decisions", metadata: { confidence: 0.9, type: "architectural" })
230
+ #
231
+ def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false, metadata: {})
199
232
  # Validate inputs
200
233
  validate_timeframe!(timeframe)
201
234
  validate_positive_integer!(limit, "limit")
202
235
  validate_recall_strategy!(strategy)
203
236
  validate_array!(query_tags, "query_tags")
204
-
205
- parsed_timeframe = parse_timeframe(timeframe)
237
+ validate_metadata!(metadata)
238
+
239
+ # Normalize timeframe and potentially extract from query
240
+ search_query = topic
241
+ normalized_timeframe = if timeframe == :auto
242
+ result = HTM::Timeframe.normalize(:auto, query: topic)
243
+ search_query = result.query # Use cleaned query for search
244
+ HTM.logger.debug "Auto-extracted timeframe: #{result.extracted.inspect}" if result.extracted
245
+ result.timeframe
246
+ else
247
+ HTM::Timeframe.normalize(timeframe)
248
+ end
206
249
 
207
250
  # Use relevance-based search if requested
208
251
  if with_relevance
209
252
  nodes = @long_term_memory.search_with_relevance(
210
- timeframe: parsed_timeframe,
211
- query: topic,
253
+ timeframe: normalized_timeframe,
254
+ query: search_query,
212
255
  query_tags: query_tags,
213
256
  limit: limit,
214
- embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil
257
+ embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil,
258
+ metadata: metadata
215
259
  )
216
260
  else
217
261
  # Perform standard RAG-based retrieval
@@ -219,24 +263,27 @@ class HTM
219
263
  when :vector
220
264
  # Vector search using query embedding
221
265
  @long_term_memory.search(
222
- timeframe: parsed_timeframe,
223
- query: topic,
266
+ timeframe: normalized_timeframe,
267
+ query: search_query,
224
268
  limit: limit,
225
- embedding_service: HTM
269
+ embedding_service: HTM,
270
+ metadata: metadata
226
271
  )
227
272
  when :fulltext
228
273
  @long_term_memory.search_fulltext(
229
- timeframe: parsed_timeframe,
230
- query: topic,
231
- limit: limit
274
+ timeframe: normalized_timeframe,
275
+ query: search_query,
276
+ limit: limit,
277
+ metadata: metadata
232
278
  )
233
279
  when :hybrid
234
280
  # Hybrid search combining vector + fulltext
235
281
  @long_term_memory.search_hybrid(
236
- timeframe: parsed_timeframe,
237
- query: topic,
282
+ timeframe: normalized_timeframe,
283
+ query: search_query,
238
284
  limit: limit,
239
- embedding_service: HTM
285
+ embedding_service: HTM,
286
+ metadata: metadata
240
287
  )
241
288
  end
242
289
  end
@@ -252,32 +299,222 @@ class HTM
252
299
  raw ? nodes : nodes.map { |node| node['content'] }
253
300
  end
254
301
 
255
- # Forget a memory node (explicit deletion)
302
+ # Forget a memory node (soft delete by default, permanent delete requires confirmation)
256
303
  #
257
- # @param key [String] Key of the node to delete
258
- # @param confirm [Symbol] Must be :confirmed to proceed
304
+ # By default, performs a soft delete (sets deleted_at timestamp). The node
305
+ # remains in the database but is excluded from queries. Use soft: false
306
+ # with confirm: :confirmed for permanent deletion.
307
+ #
308
+ # @param node_id [Integer] ID of the node to delete
309
+ # @param soft [Boolean] If true (default), soft delete; if false, permanent delete
310
+ # @param confirm [Symbol] Must be :confirmed to proceed with permanent deletion
259
311
  # @return [Boolean] true if deleted
260
- # @raise [ArgumentError] if confirmation not provided
312
+ # @raise [ArgumentError] if permanent deletion requested without confirmation
261
313
  # @raise [HTM::NotFoundError] if node doesn't exist
262
314
  #
263
- def forget(node_id, confirm: false)
315
+ # @example Soft delete (recoverable)
316
+ # htm.forget(node_id)
317
+ # htm.forget(node_id, soft: true)
318
+ #
319
+ # @example Permanent delete (requires confirmation)
320
+ # htm.forget(node_id, soft: false, confirm: :confirmed)
321
+ #
322
+ def forget(node_id, soft: true, confirm: false)
264
323
  # Validate inputs
265
- raise ArgumentError, "node_id cannot be nil" if node_id.nil?
266
- raise ArgumentError, "Must pass confirm: :confirmed to delete" unless confirm == :confirmed
324
+ raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
267
325
 
268
- # Verify node exists
269
- unless @long_term_memory.exists?(node_id)
270
- raise HTM::NotFoundError, "Node not found: #{node_id}"
326
+ # Permanent delete requires confirmation
327
+ if !soft && confirm != :confirmed
328
+ raise ArgumentError, "Permanent deletion requires confirm: :confirmed"
271
329
  end
272
330
 
273
- # Delete the node and remove from working memory
274
- @long_term_memory.delete(node_id)
331
+ # Verify node exists (including soft-deleted for restore scenarios)
332
+ node = HTM::Models::Node.with_deleted.find_by(id: node_id)
333
+ raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
334
+
335
+ if soft
336
+ # Soft delete - mark as deleted but keep in database
337
+ node.soft_delete!
338
+ @long_term_memory.clear_cache! # Invalidate cache since node is no longer visible
339
+ HTM.logger.info "Node #{node_id} soft deleted"
340
+ else
341
+ # Permanent delete (also invalidates cache internally)
342
+ @long_term_memory.delete(node_id)
343
+ HTM.logger.info "Node #{node_id} permanently deleted"
344
+ end
345
+
346
+ # Remove from working memory either way
275
347
  @working_memory.remove(node_id)
276
348
 
277
349
  update_robot_activity
278
350
  true
279
351
  end
280
352
 
353
+ # Restore a soft-deleted memory node
354
+ #
355
+ # @param node_id [Integer] ID of the soft-deleted node to restore
356
+ # @return [Boolean] true if restored
357
+ # @raise [HTM::NotFoundError] if node doesn't exist or isn't deleted
358
+ #
359
+ # @example
360
+ # htm.forget(node_id) # Soft delete
361
+ # htm.restore(node_id) # Bring it back
362
+ #
363
+ def restore(node_id)
364
+ raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
365
+
366
+ # Find including soft-deleted nodes
367
+ node = HTM::Models::Node.with_deleted.find_by(id: node_id)
368
+ raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
369
+
370
+ unless node.deleted?
371
+ raise ArgumentError, "Node #{node_id} is not soft-deleted"
372
+ end
373
+
374
+ node.restore!
375
+ HTM.logger.info "Node #{node_id} restored"
376
+
377
+ update_robot_activity
378
+ true
379
+ end
380
+
381
+ # Permanently delete all soft-deleted nodes older than specified time
382
+ #
383
+ # @param older_than [Time, ActiveSupport::Duration] Purge nodes soft-deleted before this time
384
+ # @param confirm [Symbol] Must be :confirmed to proceed
385
+ # @return [Integer] Number of nodes permanently deleted
386
+ # @raise [ArgumentError] if confirmation not provided
387
+ #
388
+ # @example Purge nodes deleted more than 30 days ago
389
+ # htm.purge_deleted(older_than: 30.days.ago, confirm: :confirmed)
390
+ #
391
+ # @example Purge nodes deleted before a specific date
392
+ # htm.purge_deleted(older_than: Time.new(2024, 1, 1), confirm: :confirmed)
393
+ #
394
+ def purge_deleted(older_than:, confirm: false)
395
+ raise ArgumentError, "Purge requires confirm: :confirmed" unless confirm == :confirmed
396
+
397
+ count = HTM::Models::Node.purge_deleted(older_than: older_than)
398
+ HTM.logger.info "Purged #{count} soft-deleted nodes older than #{older_than}"
399
+
400
+ count
401
+ end
402
+
403
+ # Clear all nodes from working memory
404
+ #
405
+ # Marks all nodes as evicted from working memory (in database) and clears
406
+ # the in-memory cache. Nodes remain in long-term memory.
407
+ #
408
+ # @return [Integer] Number of nodes cleared from working memory
409
+ #
410
+ # @example
411
+ # htm.clear_working_memory # => 5
412
+ #
413
+ def clear_working_memory
414
+ # Clear in-memory cache
415
+ @working_memory.clear
416
+
417
+ # Update database: mark all as evicted from working memory
418
+ count = HTM::Models::RobotNode
419
+ .where(robot_id: @robot_id, working_memory: true)
420
+ .update_all(working_memory: false)
421
+
422
+ HTM.logger.info "Cleared #{count} nodes from working memory"
423
+ count
424
+ end
425
+
426
+ # Load a single file into long-term memory
427
+ #
428
+ # Reads a text-based file (starting with markdown), chunks it by paragraph,
429
+ # and stores each chunk as a node. YAML frontmatter is preserved as metadata
430
+ # on the first chunk.
431
+ #
432
+ # @param path [String] Path to file
433
+ # @param force [Boolean] Force re-sync even if mtime unchanged (default: false)
434
+ # @return [Hash] Result with keys:
435
+ # - :file_path [String] Absolute path to file
436
+ # - :chunks_created [Integer] Number of new chunks created
437
+ # - :chunks_updated [Integer] Number of existing chunks updated
438
+ # - :chunks_deleted [Integer] Number of chunks soft-deleted
439
+ # - :skipped [Boolean] True if file was unchanged and skipped
440
+ #
441
+ # @example Load a markdown file
442
+ # result = htm.load_file('/path/to/doc.md')
443
+ # # => { file_path: '/path/to/doc.md', chunks_created: 5, ... }
444
+ #
445
+ # @example Force re-sync even if unchanged
446
+ # result = htm.load_file('/path/to/doc.md', force: true)
447
+ #
448
+ def load_file(path, force: false)
449
+ loader = HTM::Loaders::MarkdownLoader.new(self)
450
+ result = loader.load_file(path, force: force)
451
+
452
+ update_robot_activity unless result[:skipped]
453
+ result
454
+ end
455
+
456
+ # Load all matching files from a directory into long-term memory
457
+ #
458
+ # @param path [String] Directory path
459
+ # @param pattern [String] Glob pattern (default: '**/*.md')
460
+ # @param force [Boolean] Force re-sync even if unchanged (default: false)
461
+ # @return [Array<Hash>] Results for each file
462
+ #
463
+ # @example Load all markdown files recursively
464
+ # results = htm.load_directory('/path/to/docs')
465
+ #
466
+ # @example Load only top-level markdown files
467
+ # results = htm.load_directory('/path/to/docs', pattern: '*.md')
468
+ #
469
+ def load_directory(path, pattern: '**/*.md', force: false)
470
+ loader = HTM::Loaders::MarkdownLoader.new(self)
471
+ results = loader.load_directory(path, pattern: pattern, force: force)
472
+
473
+ # Update activity if any files were processed
474
+ if results.any? { |r| !r[:skipped] && !r[:error] }
475
+ update_robot_activity
476
+ end
477
+
478
+ results
479
+ end
480
+
481
+ # Get all nodes loaded from a specific file
482
+ #
483
+ # @param file_path [String] Path to the source file
484
+ # @return [ActiveRecord::Relation] Nodes from this file, ordered by chunk_position
485
+ #
486
+ # @example
487
+ # nodes = htm.nodes_from_file('/path/to/doc.md')
488
+ # nodes.each { |node| puts node.content }
489
+ #
490
+ def nodes_from_file(file_path)
491
+ source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
492
+ return HTM::Models::Node.none unless source
493
+
494
+ HTM::Models::Node.from_source(source.id)
495
+ end
496
+
497
+ # Unload a file (soft-delete all its chunks and remove source record)
498
+ #
499
+ # @param file_path [String] Path to the source file
500
+ # @return [Integer] Number of nodes soft-deleted
501
+ #
502
+ # @example
503
+ # count = htm.unload_file('/path/to/doc.md')
504
+ # puts "Unloaded #{count} chunks"
505
+ #
506
+ def unload_file(file_path)
507
+ source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
508
+ return 0 unless source
509
+
510
+ count = source.soft_delete_chunks!
511
+ @long_term_memory.clear_cache!
512
+ source.destroy
513
+
514
+ update_robot_activity
515
+ count
516
+ end
517
+
281
518
  private
282
519
 
283
520
  def register_robot
@@ -318,35 +555,30 @@ class HTM
318
555
  token_count = node['token_count'].to_i
319
556
  access_count = (node['access_count'] || 0).to_i
320
557
  last_accessed = node['last_accessed'] ? Time.parse(node['last_accessed'].to_s) : nil
558
+ node_id = node['id']
321
559
 
322
- if @working_memory.has_space?(token_count)
323
- @working_memory.add(
324
- node['id'],
325
- node['content'],
326
- token_count: token_count,
327
- access_count: access_count,
328
- last_accessed: last_accessed,
329
- from_recall: true
330
- )
331
- else
560
+ unless @working_memory.has_space?(token_count)
332
561
  # Evict to make space
333
562
  evicted = @working_memory.evict_to_make_space(token_count)
334
563
  evicted_keys = evicted.map { |n| n[:key] }
335
- @long_term_memory.mark_evicted(evicted_keys) if evicted_keys.any?
336
-
337
- # Now add the recalled node
338
- @working_memory.add(
339
- node['id'],
340
- node['content'],
341
- token_count: token_count,
342
- access_count: access_count,
343
- last_accessed: last_accessed,
344
- from_recall: true
345
- )
564
+ @long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
346
565
  end
347
- end
348
566
 
349
- private
567
+ # Add to in-memory working memory
568
+ @working_memory.add(
569
+ node_id,
570
+ node['content'],
571
+ token_count: token_count,
572
+ access_count: access_count,
573
+ last_accessed: last_accessed,
574
+ from_recall: true
575
+ )
576
+
577
+ # Mark node as in working memory in the robot_nodes join table
578
+ HTM::Models::RobotNode
579
+ .find_by(robot_id: @robot_id, node_id: node_id)
580
+ &.update!(working_memory: true)
581
+ end
350
582
 
351
583
  # Validation helper methods
352
584
 
@@ -364,58 +596,24 @@ class HTM
364
596
 
365
597
 
366
598
  def validate_timeframe!(timeframe)
367
- return if timeframe.is_a?(Range) || timeframe.is_a?(String)
368
- raise ValidationError, "Timeframe must be a Range or String, got #{timeframe.class}"
599
+ return if HTM::Timeframe.valid?(timeframe)
600
+ raise ValidationError, "Invalid timeframe type: #{timeframe.class}. " \
601
+ "Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
369
602
  end
370
603
 
371
604
  def validate_positive_integer!(value, name)
372
605
  raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value > 0
373
606
  end
374
607
 
375
- # Timeframe parsing methods
608
+ def validate_metadata!(metadata)
609
+ raise ValidationError, "Metadata must be a Hash" unless metadata.is_a?(Hash)
376
610
 
377
- def parse_timeframe(timeframe)
378
- case timeframe
379
- when Range
380
- timeframe
381
- when String
382
- parse_natural_timeframe(timeframe)
383
- else
384
- raise ArgumentError, "Invalid timeframe: #{timeframe}"
611
+ # Ensure all keys are strings or symbols (will be converted to strings in JSON)
612
+ metadata.each_key do |key|
613
+ unless key.is_a?(String) || key.is_a?(Symbol)
614
+ raise ValidationError, "Metadata keys must be strings or symbols"
615
+ end
385
616
  end
386
617
  end
387
618
 
388
- def parse_natural_timeframe(text)
389
- now = Time.now
390
-
391
- case text.downcase
392
- when /last week/
393
- (now - 7 * 24 * 3600)..now
394
- when /yesterday/
395
- start_of_yesterday = Time.new(now.year, now.month, now.day - 1)
396
- start_of_yesterday..(start_of_yesterday + 24 * 3600)
397
- when /last (\d+) days?/
398
- days = $1.to_i
399
- (now - days * 24 * 3600)..now
400
- when /last (\d+) seconds?/
401
- seconds = $1.to_i
402
- (now - seconds)..now
403
- when /last (\d+) minutes?/
404
- minutes = $1.to_i
405
- (now - minutes * 60)..now
406
- when /last (\d+) hours?/
407
- hours = $1.to_i
408
- (now - hours * 3600)..now
409
- when /this month/
410
- start_of_month = Time.new(now.year, now.month, 1)
411
- start_of_month..now
412
- when /last month/
413
- start_of_last_month = Time.new(now.year, now.month - 1, 1)
414
- end_of_last_month = Time.new(now.year, now.month, 1) - 1
415
- start_of_last_month..end_of_last_month
416
- else
417
- # Default to last 24 hours
418
- (now - 24 * 3600)..now
419
- end
420
- end
421
619
  end