htm 0.0.1 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +92 -0
  5. data/.envrc +1 -0
  6. data/.irbrc +283 -80
  7. data/.tbls.yml +31 -0
  8. data/CHANGELOG.md +314 -16
  9. data/CLAUDE.md +603 -0
  10. data/README.md +76 -5
  11. data/Rakefile +5 -0
  12. data/SETUP.md +132 -101
  13. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  14. data/db/migrate/00002_create_robots.rb +11 -0
  15. data/db/migrate/00003_create_file_sources.rb +20 -0
  16. data/db/migrate/00004_create_nodes.rb +65 -0
  17. data/db/migrate/00005_create_tags.rb +13 -0
  18. data/db/migrate/00006_create_node_tags.rb +18 -0
  19. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  20. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  21. data/db/schema.sql +390 -36
  22. data/docs/api/database.md +19 -232
  23. data/docs/api/embedding-service.md +1 -7
  24. data/docs/api/htm.md +305 -364
  25. data/docs/api/index.md +1 -7
  26. data/docs/api/long-term-memory.md +342 -590
  27. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  28. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  29. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  30. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  31. data/docs/api/yard/HTM/Configuration.md +175 -0
  32. data/docs/api/yard/HTM/Database.md +99 -0
  33. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  34. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  35. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  36. data/docs/api/yard/HTM/Error.md +11 -0
  37. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  38. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  39. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  40. data/docs/api/yard/HTM/Observability.md +107 -0
  41. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  42. data/docs/api/yard/HTM/Railtie.md +27 -0
  43. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  44. data/docs/api/yard/HTM/TagError.md +18 -0
  45. data/docs/api/yard/HTM/TagService.md +67 -0
  46. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  47. data/docs/api/yard/HTM/Timeframe.md +40 -0
  48. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  49. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  50. data/docs/api/yard/HTM/ValidationError.md +20 -0
  51. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  52. data/docs/api/yard/HTM.md +80 -0
  53. data/docs/api/yard/index.csv +179 -0
  54. data/docs/api/yard-reference.md +51 -0
  55. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  56. data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
  57. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
  58. data/docs/architecture/adrs/index.md +2 -13
  59. data/docs/architecture/hive-mind.md +165 -166
  60. data/docs/architecture/index.md +2 -2
  61. data/docs/architecture/overview.md +5 -171
  62. data/docs/architecture/two-tier-memory.md +1 -35
  63. data/docs/assets/images/adr-010-current-architecture.svg +37 -0
  64. data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
  65. data/docs/assets/images/adr-dependency-tree.svg +93 -0
  66. data/docs/assets/images/class-hierarchy.svg +55 -0
  67. data/docs/assets/images/exception-hierarchy.svg +45 -0
  68. data/docs/assets/images/htm-architecture-overview.svg +83 -0
  69. data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
  70. data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
  71. data/docs/assets/images/htm-eviction-process.svg +141 -0
  72. data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
  73. data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
  74. data/docs/assets/images/htm-node-states.svg +123 -0
  75. data/docs/assets/images/project-structure.svg +78 -0
  76. data/docs/assets/images/test-directory-structure.svg +38 -0
  77. data/{dbdoc → docs/database}/README.md +127 -125
  78. data/docs/database/public.file_sources.md +42 -0
  79. data/docs/database/public.file_sources.svg +211 -0
  80. data/{dbdoc → docs/database}/public.node_tags.md +7 -8
  81. data/docs/database/public.node_tags.svg +239 -0
  82. data/{dbdoc → docs/database}/public.nodes.md +22 -17
  83. data/docs/database/public.nodes.svg +271 -0
  84. data/docs/database/public.robot_nodes.md +46 -0
  85. data/docs/database/public.robot_nodes.svg +243 -0
  86. data/{dbdoc → docs/database}/public.robots.md +2 -3
  87. data/docs/database/public.robots.svg +161 -0
  88. data/docs/database/public.tags.svg +139 -0
  89. data/{dbdoc → docs/database}/schema.json +941 -630
  90. data/docs/database/schema.svg +282 -0
  91. data/docs/development/index.md +1 -29
  92. data/docs/development/schema.md +134 -309
  93. data/docs/development/testing.md +1 -9
  94. data/docs/getting-started/index.md +47 -0
  95. data/docs/{installation.md → getting-started/installation.md} +2 -2
  96. data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
  97. data/docs/guides/adding-memories.md +295 -643
  98. data/docs/guides/recalling-memories.md +36 -1
  99. data/docs/guides/search-strategies.md +85 -51
  100. data/docs/images/htm-er-diagram.svg +156 -0
  101. data/docs/index.md +16 -31
  102. data/docs/multi_framework_support.md +4 -4
  103. data/examples/README.md +280 -0
  104. data/examples/basic_usage.rb +18 -16
  105. data/examples/cli_app/htm_cli.rb +146 -8
  106. data/examples/cli_app/temp.log +93 -0
  107. data/examples/custom_llm_configuration.rb +1 -2
  108. data/examples/example_app/app.rb +11 -14
  109. data/examples/file_loader_usage.rb +177 -0
  110. data/examples/robot_groups/lib/robot_group.rb +419 -0
  111. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  112. data/examples/robot_groups/multi_process.rb +286 -0
  113. data/examples/robot_groups/robot_worker.rb +136 -0
  114. data/examples/robot_groups/same_process.rb +229 -0
  115. data/examples/sinatra_app/Gemfile +1 -0
  116. data/examples/sinatra_app/Gemfile.lock +166 -0
  117. data/examples/sinatra_app/app.rb +219 -24
  118. data/examples/timeframe_demo.rb +276 -0
  119. data/lib/htm/active_record_config.rb +10 -3
  120. data/lib/htm/circuit_breaker.rb +202 -0
  121. data/lib/htm/configuration.rb +313 -80
  122. data/lib/htm/database.rb +67 -36
  123. data/lib/htm/embedding_service.rb +39 -2
  124. data/lib/htm/errors.rb +131 -11
  125. data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
  126. data/lib/htm/job_adapter.rb +10 -3
  127. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  128. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  129. data/lib/htm/loaders/markdown_loader.rb +263 -0
  130. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  131. data/lib/htm/long_term_memory.rb +601 -321
  132. data/lib/htm/models/file_source.rb +99 -0
  133. data/lib/htm/models/node.rb +116 -12
  134. data/lib/htm/models/robot.rb +53 -4
  135. data/lib/htm/models/robot_node.rb +51 -0
  136. data/lib/htm/models/tag.rb +302 -0
  137. data/lib/htm/observability.rb +395 -0
  138. data/lib/htm/tag_service.rb +60 -3
  139. data/lib/htm/tasks.rb +29 -0
  140. data/lib/htm/timeframe.rb +194 -0
  141. data/lib/htm/timeframe_extractor.rb +307 -0
  142. data/lib/htm/version.rb +1 -1
  143. data/lib/htm/working_memory.rb +165 -70
  144. data/lib/htm.rb +352 -133
  145. data/lib/tasks/doc.rake +300 -0
  146. data/lib/tasks/files.rake +299 -0
  147. data/lib/tasks/htm.rake +188 -2
  148. data/lib/tasks/jobs.rake +10 -12
  149. data/lib/tasks/tags.rake +194 -0
  150. data/mkdocs.yml +91 -9
  151. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  152. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  153. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  154. data/notes/next_steps.md +100 -0
  155. data/notes/plan.md +627 -0
  156. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  157. data/notes/timescaledb_removal_summary.md +200 -0
  158. metadata +177 -37
  159. data/db/migrate/20250101000002_create_robots.rb +0 -14
  160. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  161. data/db/migrate/20250101000005_create_tags.rb +0 -38
  162. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  163. data/dbdoc/public.node_tags.svg +0 -112
  164. data/dbdoc/public.nodes.svg +0 -118
  165. data/dbdoc/public.robots.svg +0 -90
  166. data/dbdoc/public.tags.svg +0 -60
  167. data/dbdoc/schema.svg +0 -154
  168. data/{dbdoc → docs/database}/public.node_stats.md +0 -0
  169. data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
  170. data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
  171. data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
  172. data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
  173. data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
  174. data/{dbdoc → docs/database}/public.operations_log.md +0 -0
  175. data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
  176. data/{dbdoc → docs/database}/public.relationships.md +0 -0
  177. data/{dbdoc → docs/database}/public.relationships.svg +0 -0
  178. data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
  179. data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
  180. data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
  181. data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
  182. data/{dbdoc → docs/database}/public.tags.md +3 -3
  183. /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
  184. /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
data/lib/htm.rb CHANGED
@@ -3,15 +3,21 @@
3
3
  require_relative "htm/version"
4
4
  require_relative "htm/errors"
5
5
  require_relative "htm/configuration"
6
+ require_relative "htm/circuit_breaker"
6
7
  require_relative "htm/active_record_config"
7
8
  require_relative "htm/database"
8
9
  require_relative "htm/long_term_memory"
9
10
  require_relative "htm/working_memory"
10
11
  require_relative "htm/embedding_service"
11
12
  require_relative "htm/tag_service"
13
+ require_relative "htm/timeframe_extractor"
14
+ require_relative "htm/timeframe"
12
15
  require_relative "htm/job_adapter"
13
16
  require_relative "htm/jobs/generate_embedding_job"
14
17
  require_relative "htm/jobs/generate_tags_job"
18
+ require_relative "htm/loaders/paragraph_chunker"
19
+ require_relative "htm/loaders/markdown_loader"
20
+ require_relative "htm/observability"
15
21
 
16
22
  require "pg"
17
23
  require "securerandom"
@@ -24,14 +30,14 @@ require_relative "htm/railtie" if defined?(Rails::Railtie)
24
30
  #
25
31
  # HTM implements a two-tier memory system:
26
32
  # - Working Memory: Token-limited, active context for immediate LLM use
27
- # - Long-term Memory: Durable PostgreSQL/TimescaleDB storage for permanent knowledge
33
+ # - Long-term Memory: Durable PostgreSQL storage with pgvector for permanent knowledge
28
34
  #
29
35
  # Key Features:
30
- # - Never forgets unless explicitly told
31
- # - RAG-based retrieval (temporal + semantic search)
36
+ # - Never forgets unless explicitly told (soft delete by default)
37
+ # - RAG-based retrieval (temporal + semantic search via pgvector)
32
38
  # - Multi-robot "hive mind" - all robots share global memory
33
- # - Relationship graphs for knowledge connections
34
- # - Time-series optimized with TimescaleDB
39
+ # - Hierarchical tagging system for knowledge organization
40
+ # - Async background processing for embeddings and tags
35
41
  #
36
42
  # @example Basic usage
37
43
  # htm = HTM.new(robot_name: "Code Helper")
@@ -98,54 +104,87 @@ class HTM
98
104
  # Stores content in long-term memory and adds it to working memory.
99
105
  # Embeddings and hierarchical tags are automatically extracted by LLM in the background.
100
106
  #
101
- # If content is empty, returns the ID of the most recent node without creating a duplicate.
102
- # Nil values for content or source are converted to empty strings.
103
- #
104
- # @param content [String, nil] The information to remember
105
- # @param source [String, nil] Where this content came from (defaults to empty string if not provided)
107
+ # @param content [String] The information to remember (required, cannot be nil or empty)
106
108
  # @param tags [Array<String>] Manual tags to assign (optional, in addition to auto-extracted tags)
107
109
  # @return [Integer] Database ID of the memory node
110
+ # @raise [ValidationError] If content is nil, empty, or exceeds maximum size
108
111
  #
109
112
  # @example Remember with source
110
- # node_id = htm.remember("PostgreSQL is great for HTM", source: "user")
113
+ # node_id = htm.remember("PostgreSQL is great for HTM")
111
114
  #
112
115
  # @example Remember with manual tags
113
- # node_id = htm.remember("Time-series data", source: "user", tags: ["database:timescaledb"])
116
+ # node_id = htm.remember("Time-series data", tags: ["database:timescaledb"])
117
+ #
118
+ # @example Remember with metadata
119
+ # node_id = htm.remember("User prefers dark mode", metadata: { source: "user", confidence: 0.95 })
114
120
  #
115
- def remember(content, source: "", tags: [])
116
- # Convert nil to empty string
117
- content = content.to_s
118
- source = source.to_s
121
+ def remember(content, tags: [], metadata: {})
122
+ # Validate inputs
123
+ raise ValidationError, "Content cannot be nil" if content.nil?
124
+
125
+ content_str = content.to_s.strip
126
+ raise ValidationError, "Content cannot be empty" if content_str.empty?
119
127
 
120
- # If content is empty, return the last node ID without creating a new entry
121
- if content.empty?
122
- last_node = HTM::Models::Node.order(created_at: :desc).first
123
- return last_node&.id || 0
128
+ if content_str.bytesize > MAX_VALUE_LENGTH
129
+ raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
130
+ end
131
+
132
+ validate_array!(tags, "tags")
133
+ tags.each do |tag|
134
+ unless tag.is_a?(String) && tag.match?(/\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/)
135
+ raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
136
+ end
124
137
  end
125
138
 
139
+ validate_metadata!(metadata)
140
+
141
+ content = content_str
142
+
126
143
  # Calculate token count using configured counter
127
144
  token_count = HTM.count_tokens(content)
128
145
 
129
- # Store in long-term memory immediately (without embedding)
130
- # Embedding and tags will be generated asynchronously
131
- node_id = @long_term_memory.add(
146
+ # Store in long-term memory (with deduplication)
147
+ # Returns { node_id:, is_new:, robot_node: }
148
+ result = @long_term_memory.add(
132
149
  content: content,
133
- source: source,
134
150
  token_count: token_count,
135
151
  robot_id: @robot_id,
136
- embedding: nil # Will be generated in background
152
+ embedding: nil, # Will be generated in background
153
+ metadata: metadata
137
154
  )
138
155
 
139
- HTM.logger.info "Node #{node_id} created for robot #{@robot_name} (#{token_count} tokens)"
156
+ node_id = result[:node_id]
157
+ is_new = result[:is_new]
140
158
 
141
- # Enqueue background jobs for embedding and tag generation
142
- # Both jobs run in parallel with equal priority
143
- enqueue_embedding_job(node_id)
144
- enqueue_tags_job(node_id, manual_tags: tags)
159
+ if is_new
160
+ HTM.logger.info "Node #{node_id} created for robot #{@robot_name} (#{token_count} tokens)"
161
+
162
+ # Enqueue background jobs for embedding and tag generation
163
+ # Only for NEW nodes - existing nodes already have embeddings/tags
164
+ enqueue_embedding_job(node_id)
165
+ enqueue_tags_job(node_id, manual_tags: tags)
166
+ else
167
+ HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{result[:robot_node].remember_count})"
168
+
169
+ # For existing nodes, only add manual tags if provided
170
+ if tags.any?
171
+ node = HTM::Models::Node.find(node_id)
172
+ node.add_tags(tags)
173
+ HTM.logger.info "Added #{tags.length} manual tags to existing node #{node_id}"
174
+ end
175
+ end
145
176
 
146
- # Add to working memory (access_count starts at 0)
177
+ # Add to working memory (evict if needed, access_count starts at 0)
178
+ unless @working_memory.has_space?(token_count)
179
+ evicted = @working_memory.evict_to_make_space(token_count)
180
+ evicted_keys = evicted.map { |n| n[:key] }
181
+ @long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
182
+ end
147
183
  @working_memory.add(node_id, content, token_count: token_count, access_count: 0)
148
184
 
185
+ # Mark node as in working memory in the robot_nodes join table
186
+ result[:robot_node].update!(working_memory: true)
187
+
149
188
  update_robot_activity
150
189
  node_id
151
190
  end
@@ -153,53 +192,70 @@ class HTM
153
192
  # Recall memories from a timeframe and topic
154
193
  #
155
194
  # @param topic [String] Topic to search for (required)
156
- # @param timeframe [String, Range, nil] Time range (default: last 7 days). Examples: "last week", 7.days.ago..Time.now
195
+ # @param timeframe [nil, Range, Array<Range>, Date, DateTime, Time, String, Symbol] Time filter
196
+ # - nil: No time filter (search all memories)
197
+ # - Range: Time range (e.g., 7.days.ago..Time.now)
198
+ # - Array<Range>: Multiple time windows (OR'd together)
199
+ # - Date: Entire day
200
+ # - DateTime/Time: Entire day containing that moment
201
+ # - String: Natural language (e.g., "last week", "few days ago")
202
+ # - :auto: Extract timeframe from topic query automatically
157
203
  # @param limit [Integer] Maximum number of nodes to retrieve (default: 20)
158
204
  # @param strategy [Symbol] Search strategy (:vector, :fulltext, :hybrid) (default: :vector)
159
205
  # @param with_relevance [Boolean] Include dynamic relevance scores (default: false)
160
206
  # @param query_tags [Array<String>] Tags to boost relevance (default: [])
161
207
  # @param raw [Boolean] Return full node hashes (true) or just content strings (false) (default: false)
208
+ # @param metadata [Hash] Filter by metadata fields using JSONB containment (default: {})
162
209
  # @return [Array<String>, Array<Hash>] Content strings (raw: false) or full node hashes (raw: true)
163
210
  #
164
- # @example Basic usage (returns content strings)
211
+ # @example Basic usage - no time filter (returns content strings)
165
212
  # memories = htm.recall("PostgreSQL")
166
213
  # # => ["PostgreSQL is great for time-series data", "PostgreSQL with TimescaleDB..."]
167
214
  #
168
- # @example Get full node hashes
169
- # nodes = htm.recall("PostgreSQL", raw: true)
170
- # # => [{"id" => 1, "content" => "...", "created_at" => "...", ...}, ...]
171
- #
172
- # @example With timeframe
215
+ # @example With explicit timeframe
173
216
  # memories = htm.recall("PostgreSQL", timeframe: "last week")
217
+ # memories = htm.recall("PostgreSQL", timeframe: Date.today)
218
+ # memories = htm.recall("PostgreSQL", timeframe: 7.days.ago..Time.now)
174
219
  #
175
- # @example With all options
176
- # memories = htm.recall("PostgreSQL",
177
- # timeframe: "last month",
178
- # limit: 50,
179
- # strategy: :hybrid,
180
- # with_relevance: true,
181
- # query_tags: ["database", "timeseries"])
220
+ # @example Auto-extract timeframe from query
221
+ # memories = htm.recall("what did we discuss last week about PostgreSQL", timeframe: :auto)
222
+ # # Extracts "last week" as timeframe, searches for "what did we discuss about PostgreSQL"
182
223
  #
183
- def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false)
184
- # Use default timeframe if not provided (last 7 days)
185
- timeframe ||= "last 7 days"
186
-
224
+ # @example Multiple time windows
225
+ # memories = htm.recall("meetings", timeframe: [last_monday, last_friday])
226
+ #
227
+ # @example Filter by metadata
228
+ # memories = htm.recall("preferences", metadata: { source: "user" })
229
+ # memories = htm.recall("decisions", metadata: { confidence: 0.9, type: "architectural" })
230
+ #
231
+ def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false, metadata: {})
187
232
  # Validate inputs
188
233
  validate_timeframe!(timeframe)
189
234
  validate_positive_integer!(limit, "limit")
190
235
  validate_recall_strategy!(strategy)
191
236
  validate_array!(query_tags, "query_tags")
192
-
193
- parsed_timeframe = parse_timeframe(timeframe)
237
+ validate_metadata!(metadata)
238
+
239
+ # Normalize timeframe and potentially extract from query
240
+ search_query = topic
241
+ normalized_timeframe = if timeframe == :auto
242
+ result = HTM::Timeframe.normalize(:auto, query: topic)
243
+ search_query = result.query # Use cleaned query for search
244
+ HTM.logger.debug "Auto-extracted timeframe: #{result.extracted.inspect}" if result.extracted
245
+ result.timeframe
246
+ else
247
+ HTM::Timeframe.normalize(timeframe)
248
+ end
194
249
 
195
250
  # Use relevance-based search if requested
196
251
  if with_relevance
197
252
  nodes = @long_term_memory.search_with_relevance(
198
- timeframe: parsed_timeframe,
199
- query: topic,
253
+ timeframe: normalized_timeframe,
254
+ query: search_query,
200
255
  query_tags: query_tags,
201
256
  limit: limit,
202
- embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil
257
+ embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil,
258
+ metadata: metadata
203
259
  )
204
260
  else
205
261
  # Perform standard RAG-based retrieval
@@ -207,24 +263,27 @@ class HTM
207
263
  when :vector
208
264
  # Vector search using query embedding
209
265
  @long_term_memory.search(
210
- timeframe: parsed_timeframe,
211
- query: topic,
266
+ timeframe: normalized_timeframe,
267
+ query: search_query,
212
268
  limit: limit,
213
- embedding_service: HTM
269
+ embedding_service: HTM,
270
+ metadata: metadata
214
271
  )
215
272
  when :fulltext
216
273
  @long_term_memory.search_fulltext(
217
- timeframe: parsed_timeframe,
218
- query: topic,
219
- limit: limit
274
+ timeframe: normalized_timeframe,
275
+ query: search_query,
276
+ limit: limit,
277
+ metadata: metadata
220
278
  )
221
279
  when :hybrid
222
280
  # Hybrid search combining vector + fulltext
223
281
  @long_term_memory.search_hybrid(
224
- timeframe: parsed_timeframe,
225
- query: topic,
282
+ timeframe: normalized_timeframe,
283
+ query: search_query,
226
284
  limit: limit,
227
- embedding_service: HTM
285
+ embedding_service: HTM,
286
+ metadata: metadata
228
287
  )
229
288
  end
230
289
  end
@@ -240,32 +299,222 @@ class HTM
240
299
  raw ? nodes : nodes.map { |node| node['content'] }
241
300
  end
242
301
 
243
- # Forget a memory node (explicit deletion)
302
+ # Forget a memory node (soft delete by default, permanent delete requires confirmation)
244
303
  #
245
- # @param key [String] Key of the node to delete
246
- # @param confirm [Symbol] Must be :confirmed to proceed
304
+ # By default, performs a soft delete (sets deleted_at timestamp). The node
305
+ # remains in the database but is excluded from queries. Use soft: false
306
+ # with confirm: :confirmed for permanent deletion.
307
+ #
308
+ # @param node_id [Integer] ID of the node to delete
309
+ # @param soft [Boolean] If true (default), soft delete; if false, permanent delete
310
+ # @param confirm [Symbol] Must be :confirmed to proceed with permanent deletion
247
311
  # @return [Boolean] true if deleted
248
- # @raise [ArgumentError] if confirmation not provided
312
+ # @raise [ArgumentError] if permanent deletion requested without confirmation
249
313
  # @raise [HTM::NotFoundError] if node doesn't exist
250
314
  #
251
- def forget(node_id, confirm: false)
315
+ # @example Soft delete (recoverable)
316
+ # htm.forget(node_id)
317
+ # htm.forget(node_id, soft: true)
318
+ #
319
+ # @example Permanent delete (requires confirmation)
320
+ # htm.forget(node_id, soft: false, confirm: :confirmed)
321
+ #
322
+ def forget(node_id, soft: true, confirm: false)
252
323
  # Validate inputs
253
- raise ArgumentError, "node_id cannot be nil" if node_id.nil?
254
- raise ArgumentError, "Must pass confirm: :confirmed to delete" unless confirm == :confirmed
324
+ raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
255
325
 
256
- # Verify node exists
257
- unless @long_term_memory.exists?(node_id)
258
- raise HTM::NotFoundError, "Node not found: #{node_id}"
326
+ # Permanent delete requires confirmation
327
+ if !soft && confirm != :confirmed
328
+ raise ArgumentError, "Permanent deletion requires confirm: :confirmed"
329
+ end
330
+
331
+ # Verify node exists (including soft-deleted for restore scenarios)
332
+ node = HTM::Models::Node.with_deleted.find_by(id: node_id)
333
+ raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
334
+
335
+ if soft
336
+ # Soft delete - mark as deleted but keep in database
337
+ node.soft_delete!
338
+ @long_term_memory.clear_cache! # Invalidate cache since node is no longer visible
339
+ HTM.logger.info "Node #{node_id} soft deleted"
340
+ else
341
+ # Permanent delete (also invalidates cache internally)
342
+ @long_term_memory.delete(node_id)
343
+ HTM.logger.info "Node #{node_id} permanently deleted"
259
344
  end
260
345
 
261
- # Delete the node and remove from working memory
262
- @long_term_memory.delete(node_id)
346
+ # Remove from working memory either way
263
347
  @working_memory.remove(node_id)
264
348
 
265
349
  update_robot_activity
266
350
  true
267
351
  end
268
352
 
353
+ # Restore a soft-deleted memory node
354
+ #
355
+ # @param node_id [Integer] ID of the soft-deleted node to restore
356
+ # @return [Boolean] true if restored
357
+ # @raise [HTM::NotFoundError] if node doesn't exist or isn't deleted
358
+ #
359
+ # @example
360
+ # htm.forget(node_id) # Soft delete
361
+ # htm.restore(node_id) # Bring it back
362
+ #
363
+ def restore(node_id)
364
+ raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
365
+
366
+ # Find including soft-deleted nodes
367
+ node = HTM::Models::Node.with_deleted.find_by(id: node_id)
368
+ raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
369
+
370
+ unless node.deleted?
371
+ raise ArgumentError, "Node #{node_id} is not soft-deleted"
372
+ end
373
+
374
+ node.restore!
375
+ HTM.logger.info "Node #{node_id} restored"
376
+
377
+ update_robot_activity
378
+ true
379
+ end
380
+
381
+ # Permanently delete all soft-deleted nodes older than specified time
382
+ #
383
+ # @param older_than [Time, ActiveSupport::Duration] Purge nodes soft-deleted before this time
384
+ # @param confirm [Symbol] Must be :confirmed to proceed
385
+ # @return [Integer] Number of nodes permanently deleted
386
+ # @raise [ArgumentError] if confirmation not provided
387
+ #
388
+ # @example Purge nodes deleted more than 30 days ago
389
+ # htm.purge_deleted(older_than: 30.days.ago, confirm: :confirmed)
390
+ #
391
+ # @example Purge nodes deleted before a specific date
392
+ # htm.purge_deleted(older_than: Time.new(2024, 1, 1), confirm: :confirmed)
393
+ #
394
+ def purge_deleted(older_than:, confirm: false)
395
+ raise ArgumentError, "Purge requires confirm: :confirmed" unless confirm == :confirmed
396
+
397
+ count = HTM::Models::Node.purge_deleted(older_than: older_than)
398
+ HTM.logger.info "Purged #{count} soft-deleted nodes older than #{older_than}"
399
+
400
+ count
401
+ end
402
+
403
+ # Clear all nodes from working memory
404
+ #
405
+ # Marks all nodes as evicted from working memory (in database) and clears
406
+ # the in-memory cache. Nodes remain in long-term memory.
407
+ #
408
+ # @return [Integer] Number of nodes cleared from working memory
409
+ #
410
+ # @example
411
+ # htm.clear_working_memory # => 5
412
+ #
413
+ def clear_working_memory
414
+ # Clear in-memory cache
415
+ @working_memory.clear
416
+
417
+ # Update database: mark all as evicted from working memory
418
+ count = HTM::Models::RobotNode
419
+ .where(robot_id: @robot_id, working_memory: true)
420
+ .update_all(working_memory: false)
421
+
422
+ HTM.logger.info "Cleared #{count} nodes from working memory"
423
+ count
424
+ end
425
+
426
+ # Load a single file into long-term memory
427
+ #
428
+ # Reads a text-based file (starting with markdown), chunks it by paragraph,
429
+ # and stores each chunk as a node. YAML frontmatter is preserved as metadata
430
+ # on the first chunk.
431
+ #
432
+ # @param path [String] Path to file
433
+ # @param force [Boolean] Force re-sync even if mtime unchanged (default: false)
434
+ # @return [Hash] Result with keys:
435
+ # - :file_path [String] Absolute path to file
436
+ # - :chunks_created [Integer] Number of new chunks created
437
+ # - :chunks_updated [Integer] Number of existing chunks updated
438
+ # - :chunks_deleted [Integer] Number of chunks soft-deleted
439
+ # - :skipped [Boolean] True if file was unchanged and skipped
440
+ #
441
+ # @example Load a markdown file
442
+ # result = htm.load_file('/path/to/doc.md')
443
+ # # => { file_path: '/path/to/doc.md', chunks_created: 5, ... }
444
+ #
445
+ # @example Force re-sync even if unchanged
446
+ # result = htm.load_file('/path/to/doc.md', force: true)
447
+ #
448
+ def load_file(path, force: false)
449
+ loader = HTM::Loaders::MarkdownLoader.new(self)
450
+ result = loader.load_file(path, force: force)
451
+
452
+ update_robot_activity unless result[:skipped]
453
+ result
454
+ end
455
+
456
+ # Load all matching files from a directory into long-term memory
457
+ #
458
+ # @param path [String] Directory path
459
+ # @param pattern [String] Glob pattern (default: '**/*.md')
460
+ # @param force [Boolean] Force re-sync even if unchanged (default: false)
461
+ # @return [Array<Hash>] Results for each file
462
+ #
463
+ # @example Load all markdown files recursively
464
+ # results = htm.load_directory('/path/to/docs')
465
+ #
466
+ # @example Load only top-level markdown files
467
+ # results = htm.load_directory('/path/to/docs', pattern: '*.md')
468
+ #
469
+ def load_directory(path, pattern: '**/*.md', force: false)
470
+ loader = HTM::Loaders::MarkdownLoader.new(self)
471
+ results = loader.load_directory(path, pattern: pattern, force: force)
472
+
473
+ # Update activity if any files were processed
474
+ if results.any? { |r| !r[:skipped] && !r[:error] }
475
+ update_robot_activity
476
+ end
477
+
478
+ results
479
+ end
480
+
481
+ # Get all nodes loaded from a specific file
482
+ #
483
+ # @param file_path [String] Path to the source file
484
+ # @return [ActiveRecord::Relation] Nodes from this file, ordered by chunk_position
485
+ #
486
+ # @example
487
+ # nodes = htm.nodes_from_file('/path/to/doc.md')
488
+ # nodes.each { |node| puts node.content }
489
+ #
490
+ def nodes_from_file(file_path)
491
+ source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
492
+ return HTM::Models::Node.none unless source
493
+
494
+ HTM::Models::Node.from_source(source.id)
495
+ end
496
+
497
+ # Unload a file (soft-delete all its chunks and remove source record)
498
+ #
499
+ # @param file_path [String] Path to the source file
500
+ # @return [Integer] Number of nodes soft-deleted
501
+ #
502
+ # @example
503
+ # count = htm.unload_file('/path/to/doc.md')
504
+ # puts "Unloaded #{count} chunks"
505
+ #
506
+ def unload_file(file_path)
507
+ source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
508
+ return 0 unless source
509
+
510
+ count = source.soft_delete_chunks!
511
+ @long_term_memory.clear_cache!
512
+ source.destroy
513
+
514
+ update_robot_activity
515
+ count
516
+ end
517
+
269
518
  private
270
519
 
271
520
  def register_robot
@@ -306,35 +555,30 @@ class HTM
306
555
  token_count = node['token_count'].to_i
307
556
  access_count = (node['access_count'] || 0).to_i
308
557
  last_accessed = node['last_accessed'] ? Time.parse(node['last_accessed'].to_s) : nil
558
+ node_id = node['id']
309
559
 
310
- if @working_memory.has_space?(token_count)
311
- @working_memory.add(
312
- node['id'],
313
- node['content'],
314
- token_count: token_count,
315
- access_count: access_count,
316
- last_accessed: last_accessed,
317
- from_recall: true
318
- )
319
- else
560
+ unless @working_memory.has_space?(token_count)
320
561
  # Evict to make space
321
562
  evicted = @working_memory.evict_to_make_space(token_count)
322
563
  evicted_keys = evicted.map { |n| n[:key] }
323
- @long_term_memory.mark_evicted(evicted_keys) if evicted_keys.any?
324
-
325
- # Now add the recalled node
326
- @working_memory.add(
327
- node['id'],
328
- node['content'],
329
- token_count: token_count,
330
- access_count: access_count,
331
- last_accessed: last_accessed,
332
- from_recall: true
333
- )
564
+ @long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
334
565
  end
335
- end
336
566
 
337
- private
567
+ # Add to in-memory working memory
568
+ @working_memory.add(
569
+ node_id,
570
+ node['content'],
571
+ token_count: token_count,
572
+ access_count: access_count,
573
+ last_accessed: last_accessed,
574
+ from_recall: true
575
+ )
576
+
577
+ # Mark node as in working memory in the robot_nodes join table
578
+ HTM::Models::RobotNode
579
+ .find_by(robot_id: @robot_id, node_id: node_id)
580
+ &.update!(working_memory: true)
581
+ end
338
582
 
339
583
  # Validation helper methods
340
584
 
@@ -352,49 +596,24 @@ class HTM
352
596
 
353
597
 
354
598
  def validate_timeframe!(timeframe)
355
- return if timeframe.is_a?(Range) || timeframe.is_a?(String)
356
- raise ValidationError, "Timeframe must be a Range or String, got #{timeframe.class}"
599
+ return if HTM::Timeframe.valid?(timeframe)
600
+ raise ValidationError, "Invalid timeframe type: #{timeframe.class}. " \
601
+ "Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
357
602
  end
358
603
 
359
604
  def validate_positive_integer!(value, name)
360
605
  raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value > 0
361
606
  end
362
607
 
363
- # Timeframe parsing methods
608
+ def validate_metadata!(metadata)
609
+ raise ValidationError, "Metadata must be a Hash" unless metadata.is_a?(Hash)
364
610
 
365
- def parse_timeframe(timeframe)
366
- case timeframe
367
- when Range
368
- timeframe
369
- when String
370
- parse_natural_timeframe(timeframe)
371
- else
372
- raise ArgumentError, "Invalid timeframe: #{timeframe}"
611
+ # Ensure all keys are strings or symbols (will be converted to strings in JSON)
612
+ metadata.each_key do |key|
613
+ unless key.is_a?(String) || key.is_a?(Symbol)
614
+ raise ValidationError, "Metadata keys must be strings or symbols"
615
+ end
373
616
  end
374
617
  end
375
618
 
376
- def parse_natural_timeframe(text)
377
- now = Time.now
378
-
379
- case text.downcase
380
- when /last week/
381
- (now - 7 * 24 * 3600)..now
382
- when /yesterday/
383
- start_of_yesterday = Time.new(now.year, now.month, now.day - 1)
384
- start_of_yesterday..(start_of_yesterday + 24 * 3600)
385
- when /last (\d+) days?/
386
- days = $1.to_i
387
- (now - days * 24 * 3600)..now
388
- when /this month/
389
- start_of_month = Time.new(now.year, now.month, 1)
390
- start_of_month..now
391
- when /last month/
392
- start_of_last_month = Time.new(now.year, now.month - 1, 1)
393
- end_of_last_month = Time.new(now.year, now.month, 1) - 1
394
- start_of_last_month..end_of_last_month
395
- else
396
- # Default to last 24 hours
397
- (now - 24 * 3600)..now
398
- end
399
- end
400
619
  end