htm 0.0.2 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +294 -26
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +280 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +125 -15
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
data/lib/htm.rb
CHANGED
|
@@ -3,15 +3,21 @@
|
|
|
3
3
|
require_relative "htm/version"
|
|
4
4
|
require_relative "htm/errors"
|
|
5
5
|
require_relative "htm/configuration"
|
|
6
|
+
require_relative "htm/circuit_breaker"
|
|
6
7
|
require_relative "htm/active_record_config"
|
|
7
8
|
require_relative "htm/database"
|
|
8
9
|
require_relative "htm/long_term_memory"
|
|
9
10
|
require_relative "htm/working_memory"
|
|
10
11
|
require_relative "htm/embedding_service"
|
|
11
12
|
require_relative "htm/tag_service"
|
|
13
|
+
require_relative "htm/timeframe_extractor"
|
|
14
|
+
require_relative "htm/timeframe"
|
|
12
15
|
require_relative "htm/job_adapter"
|
|
13
16
|
require_relative "htm/jobs/generate_embedding_job"
|
|
14
17
|
require_relative "htm/jobs/generate_tags_job"
|
|
18
|
+
require_relative "htm/loaders/paragraph_chunker"
|
|
19
|
+
require_relative "htm/loaders/markdown_loader"
|
|
20
|
+
require_relative "htm/observability"
|
|
15
21
|
|
|
16
22
|
require "pg"
|
|
17
23
|
require "securerandom"
|
|
@@ -24,14 +30,14 @@ require_relative "htm/railtie" if defined?(Rails::Railtie)
|
|
|
24
30
|
#
|
|
25
31
|
# HTM implements a two-tier memory system:
|
|
26
32
|
# - Working Memory: Token-limited, active context for immediate LLM use
|
|
27
|
-
# - Long-term Memory: Durable PostgreSQL
|
|
33
|
+
# - Long-term Memory: Durable PostgreSQL storage with pgvector for permanent knowledge
|
|
28
34
|
#
|
|
29
35
|
# Key Features:
|
|
30
|
-
# - Never forgets unless explicitly told
|
|
31
|
-
# - RAG-based retrieval (temporal + semantic search)
|
|
36
|
+
# - Never forgets unless explicitly told (soft delete by default)
|
|
37
|
+
# - RAG-based retrieval (temporal + semantic search via pgvector)
|
|
32
38
|
# - Multi-robot "hive mind" - all robots share global memory
|
|
33
|
-
# -
|
|
34
|
-
# -
|
|
39
|
+
# - Hierarchical tagging system for knowledge organization
|
|
40
|
+
# - Async background processing for embeddings and tags
|
|
35
41
|
#
|
|
36
42
|
# @example Basic usage
|
|
37
43
|
# htm = HTM.new(robot_name: "Code Helper")
|
|
@@ -98,13 +104,10 @@ class HTM
|
|
|
98
104
|
# Stores content in long-term memory and adds it to working memory.
|
|
99
105
|
# Embeddings and hierarchical tags are automatically extracted by LLM in the background.
|
|
100
106
|
#
|
|
101
|
-
#
|
|
102
|
-
# Nil values for content or source are converted to empty strings.
|
|
103
|
-
#
|
|
104
|
-
# @param content [String, nil] The information to remember
|
|
105
|
-
# @param source [String, nil] Where this content came from (defaults to empty string if not provided)
|
|
107
|
+
# @param content [String] The information to remember (required, cannot be nil or empty)
|
|
106
108
|
# @param tags [Array<String>] Manual tags to assign (optional, in addition to auto-extracted tags)
|
|
107
109
|
# @return [Integer] Database ID of the memory node
|
|
110
|
+
# @raise [ValidationError] If content is nil, empty, or exceeds maximum size
|
|
108
111
|
#
|
|
109
112
|
# @example Remember with source
|
|
110
113
|
# node_id = htm.remember("PostgreSQL is great for HTM")
|
|
@@ -112,16 +115,31 @@ class HTM
|
|
|
112
115
|
# @example Remember with manual tags
|
|
113
116
|
# node_id = htm.remember("Time-series data", tags: ["database:timescaledb"])
|
|
114
117
|
#
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
+
# @example Remember with metadata
|
|
119
|
+
# node_id = htm.remember("User prefers dark mode", metadata: { source: "user", confidence: 0.95 })
|
|
120
|
+
#
|
|
121
|
+
def remember(content, tags: [], metadata: {})
|
|
122
|
+
# Validate inputs
|
|
123
|
+
raise ValidationError, "Content cannot be nil" if content.nil?
|
|
124
|
+
|
|
125
|
+
content_str = content.to_s.strip
|
|
126
|
+
raise ValidationError, "Content cannot be empty" if content_str.empty?
|
|
118
127
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
128
|
+
if content_str.bytesize > MAX_VALUE_LENGTH
|
|
129
|
+
raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
validate_array!(tags, "tags")
|
|
133
|
+
tags.each do |tag|
|
|
134
|
+
unless tag.is_a?(String) && tag.match?(/\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/)
|
|
135
|
+
raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
|
|
136
|
+
end
|
|
123
137
|
end
|
|
124
138
|
|
|
139
|
+
validate_metadata!(metadata)
|
|
140
|
+
|
|
141
|
+
content = content_str
|
|
142
|
+
|
|
125
143
|
# Calculate token count using configured counter
|
|
126
144
|
token_count = HTM.count_tokens(content)
|
|
127
145
|
|
|
@@ -131,7 +149,8 @@ class HTM
|
|
|
131
149
|
content: content,
|
|
132
150
|
token_count: token_count,
|
|
133
151
|
robot_id: @robot_id,
|
|
134
|
-
embedding: nil # Will be generated in background
|
|
152
|
+
embedding: nil, # Will be generated in background
|
|
153
|
+
metadata: metadata
|
|
135
154
|
)
|
|
136
155
|
|
|
137
156
|
node_id = result[:node_id]
|
|
@@ -155,9 +174,17 @@ class HTM
|
|
|
155
174
|
end
|
|
156
175
|
end
|
|
157
176
|
|
|
158
|
-
# Add to working memory (access_count starts at 0)
|
|
177
|
+
# Add to working memory (evict if needed, access_count starts at 0)
|
|
178
|
+
unless @working_memory.has_space?(token_count)
|
|
179
|
+
evicted = @working_memory.evict_to_make_space(token_count)
|
|
180
|
+
evicted_keys = evicted.map { |n| n[:key] }
|
|
181
|
+
@long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
|
|
182
|
+
end
|
|
159
183
|
@working_memory.add(node_id, content, token_count: token_count, access_count: 0)
|
|
160
184
|
|
|
185
|
+
# Mark node as in working memory in the robot_nodes join table
|
|
186
|
+
result[:robot_node].update!(working_memory: true)
|
|
187
|
+
|
|
161
188
|
update_robot_activity
|
|
162
189
|
node_id
|
|
163
190
|
end
|
|
@@ -165,53 +192,70 @@ class HTM
|
|
|
165
192
|
# Recall memories from a timeframe and topic
|
|
166
193
|
#
|
|
167
194
|
# @param topic [String] Topic to search for (required)
|
|
168
|
-
# @param timeframe [
|
|
195
|
+
# @param timeframe [nil, Range, Array<Range>, Date, DateTime, Time, String, Symbol] Time filter
|
|
196
|
+
# - nil: No time filter (search all memories)
|
|
197
|
+
# - Range: Time range (e.g., 7.days.ago..Time.now)
|
|
198
|
+
# - Array<Range>: Multiple time windows (OR'd together)
|
|
199
|
+
# - Date: Entire day
|
|
200
|
+
# - DateTime/Time: Entire day containing that moment
|
|
201
|
+
# - String: Natural language (e.g., "last week", "few days ago")
|
|
202
|
+
# - :auto: Extract timeframe from topic query automatically
|
|
169
203
|
# @param limit [Integer] Maximum number of nodes to retrieve (default: 20)
|
|
170
204
|
# @param strategy [Symbol] Search strategy (:vector, :fulltext, :hybrid) (default: :vector)
|
|
171
205
|
# @param with_relevance [Boolean] Include dynamic relevance scores (default: false)
|
|
172
206
|
# @param query_tags [Array<String>] Tags to boost relevance (default: [])
|
|
173
207
|
# @param raw [Boolean] Return full node hashes (true) or just content strings (false) (default: false)
|
|
208
|
+
# @param metadata [Hash] Filter by metadata fields using JSONB containment (default: {})
|
|
174
209
|
# @return [Array<String>, Array<Hash>] Content strings (raw: false) or full node hashes (raw: true)
|
|
175
210
|
#
|
|
176
|
-
# @example Basic usage (returns content strings)
|
|
211
|
+
# @example Basic usage - no time filter (returns content strings)
|
|
177
212
|
# memories = htm.recall("PostgreSQL")
|
|
178
213
|
# # => ["PostgreSQL is great for time-series data", "PostgreSQL with TimescaleDB..."]
|
|
179
214
|
#
|
|
180
|
-
# @example
|
|
181
|
-
# nodes = htm.recall("PostgreSQL", raw: true)
|
|
182
|
-
# # => [{"id" => 1, "content" => "...", "created_at" => "...", ...}, ...]
|
|
183
|
-
#
|
|
184
|
-
# @example With timeframe
|
|
215
|
+
# @example With explicit timeframe
|
|
185
216
|
# memories = htm.recall("PostgreSQL", timeframe: "last week")
|
|
217
|
+
# memories = htm.recall("PostgreSQL", timeframe: Date.today)
|
|
218
|
+
# memories = htm.recall("PostgreSQL", timeframe: 7.days.ago..Time.now)
|
|
186
219
|
#
|
|
187
|
-
# @example
|
|
188
|
-
# memories = htm.recall("PostgreSQL",
|
|
189
|
-
#
|
|
190
|
-
# limit: 50,
|
|
191
|
-
# strategy: :hybrid,
|
|
192
|
-
# with_relevance: true,
|
|
193
|
-
# query_tags: ["database", "timeseries"])
|
|
220
|
+
# @example Auto-extract timeframe from query
|
|
221
|
+
# memories = htm.recall("what did we discuss last week about PostgreSQL", timeframe: :auto)
|
|
222
|
+
# # Extracts "last week" as timeframe, searches for "what did we discuss about PostgreSQL"
|
|
194
223
|
#
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
224
|
+
# @example Multiple time windows
|
|
225
|
+
# memories = htm.recall("meetings", timeframe: [last_monday, last_friday])
|
|
226
|
+
#
|
|
227
|
+
# @example Filter by metadata
|
|
228
|
+
# memories = htm.recall("preferences", metadata: { source: "user" })
|
|
229
|
+
# memories = htm.recall("decisions", metadata: { confidence: 0.9, type: "architectural" })
|
|
230
|
+
#
|
|
231
|
+
def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false, metadata: {})
|
|
199
232
|
# Validate inputs
|
|
200
233
|
validate_timeframe!(timeframe)
|
|
201
234
|
validate_positive_integer!(limit, "limit")
|
|
202
235
|
validate_recall_strategy!(strategy)
|
|
203
236
|
validate_array!(query_tags, "query_tags")
|
|
204
|
-
|
|
205
|
-
|
|
237
|
+
validate_metadata!(metadata)
|
|
238
|
+
|
|
239
|
+
# Normalize timeframe and potentially extract from query
|
|
240
|
+
search_query = topic
|
|
241
|
+
normalized_timeframe = if timeframe == :auto
|
|
242
|
+
result = HTM::Timeframe.normalize(:auto, query: topic)
|
|
243
|
+
search_query = result.query # Use cleaned query for search
|
|
244
|
+
HTM.logger.debug "Auto-extracted timeframe: #{result.extracted.inspect}" if result.extracted
|
|
245
|
+
result.timeframe
|
|
246
|
+
else
|
|
247
|
+
HTM::Timeframe.normalize(timeframe)
|
|
248
|
+
end
|
|
206
249
|
|
|
207
250
|
# Use relevance-based search if requested
|
|
208
251
|
if with_relevance
|
|
209
252
|
nodes = @long_term_memory.search_with_relevance(
|
|
210
|
-
timeframe:
|
|
211
|
-
query:
|
|
253
|
+
timeframe: normalized_timeframe,
|
|
254
|
+
query: search_query,
|
|
212
255
|
query_tags: query_tags,
|
|
213
256
|
limit: limit,
|
|
214
|
-
embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil
|
|
257
|
+
embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil,
|
|
258
|
+
metadata: metadata
|
|
215
259
|
)
|
|
216
260
|
else
|
|
217
261
|
# Perform standard RAG-based retrieval
|
|
@@ -219,24 +263,27 @@ class HTM
|
|
|
219
263
|
when :vector
|
|
220
264
|
# Vector search using query embedding
|
|
221
265
|
@long_term_memory.search(
|
|
222
|
-
timeframe:
|
|
223
|
-
query:
|
|
266
|
+
timeframe: normalized_timeframe,
|
|
267
|
+
query: search_query,
|
|
224
268
|
limit: limit,
|
|
225
|
-
embedding_service: HTM
|
|
269
|
+
embedding_service: HTM,
|
|
270
|
+
metadata: metadata
|
|
226
271
|
)
|
|
227
272
|
when :fulltext
|
|
228
273
|
@long_term_memory.search_fulltext(
|
|
229
|
-
timeframe:
|
|
230
|
-
query:
|
|
231
|
-
limit: limit
|
|
274
|
+
timeframe: normalized_timeframe,
|
|
275
|
+
query: search_query,
|
|
276
|
+
limit: limit,
|
|
277
|
+
metadata: metadata
|
|
232
278
|
)
|
|
233
279
|
when :hybrid
|
|
234
280
|
# Hybrid search combining vector + fulltext
|
|
235
281
|
@long_term_memory.search_hybrid(
|
|
236
|
-
timeframe:
|
|
237
|
-
query:
|
|
282
|
+
timeframe: normalized_timeframe,
|
|
283
|
+
query: search_query,
|
|
238
284
|
limit: limit,
|
|
239
|
-
embedding_service: HTM
|
|
285
|
+
embedding_service: HTM,
|
|
286
|
+
metadata: metadata
|
|
240
287
|
)
|
|
241
288
|
end
|
|
242
289
|
end
|
|
@@ -252,32 +299,222 @@ class HTM
|
|
|
252
299
|
raw ? nodes : nodes.map { |node| node['content'] }
|
|
253
300
|
end
|
|
254
301
|
|
|
255
|
-
# Forget a memory node (
|
|
302
|
+
# Forget a memory node (soft delete by default, permanent delete requires confirmation)
|
|
256
303
|
#
|
|
257
|
-
#
|
|
258
|
-
#
|
|
304
|
+
# By default, performs a soft delete (sets deleted_at timestamp). The node
|
|
305
|
+
# remains in the database but is excluded from queries. Use soft: false
|
|
306
|
+
# with confirm: :confirmed for permanent deletion.
|
|
307
|
+
#
|
|
308
|
+
# @param node_id [Integer] ID of the node to delete
|
|
309
|
+
# @param soft [Boolean] If true (default), soft delete; if false, permanent delete
|
|
310
|
+
# @param confirm [Symbol] Must be :confirmed to proceed with permanent deletion
|
|
259
311
|
# @return [Boolean] true if deleted
|
|
260
|
-
# @raise [ArgumentError] if
|
|
312
|
+
# @raise [ArgumentError] if permanent deletion requested without confirmation
|
|
261
313
|
# @raise [HTM::NotFoundError] if node doesn't exist
|
|
262
314
|
#
|
|
263
|
-
|
|
315
|
+
# @example Soft delete (recoverable)
|
|
316
|
+
# htm.forget(node_id)
|
|
317
|
+
# htm.forget(node_id, soft: true)
|
|
318
|
+
#
|
|
319
|
+
# @example Permanent delete (requires confirmation)
|
|
320
|
+
# htm.forget(node_id, soft: false, confirm: :confirmed)
|
|
321
|
+
#
|
|
322
|
+
def forget(node_id, soft: true, confirm: false)
|
|
264
323
|
# Validate inputs
|
|
265
|
-
raise ArgumentError, "
|
|
266
|
-
raise ArgumentError, "Must pass confirm: :confirmed to delete" unless confirm == :confirmed
|
|
324
|
+
raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
|
|
267
325
|
|
|
268
|
-
#
|
|
269
|
-
|
|
270
|
-
raise
|
|
326
|
+
# Permanent delete requires confirmation
|
|
327
|
+
if !soft && confirm != :confirmed
|
|
328
|
+
raise ArgumentError, "Permanent deletion requires confirm: :confirmed"
|
|
271
329
|
end
|
|
272
330
|
|
|
273
|
-
#
|
|
274
|
-
|
|
331
|
+
# Verify node exists (including soft-deleted for restore scenarios)
|
|
332
|
+
node = HTM::Models::Node.with_deleted.find_by(id: node_id)
|
|
333
|
+
raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
|
|
334
|
+
|
|
335
|
+
if soft
|
|
336
|
+
# Soft delete - mark as deleted but keep in database
|
|
337
|
+
node.soft_delete!
|
|
338
|
+
@long_term_memory.clear_cache! # Invalidate cache since node is no longer visible
|
|
339
|
+
HTM.logger.info "Node #{node_id} soft deleted"
|
|
340
|
+
else
|
|
341
|
+
# Permanent delete (also invalidates cache internally)
|
|
342
|
+
@long_term_memory.delete(node_id)
|
|
343
|
+
HTM.logger.info "Node #{node_id} permanently deleted"
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# Remove from working memory either way
|
|
275
347
|
@working_memory.remove(node_id)
|
|
276
348
|
|
|
277
349
|
update_robot_activity
|
|
278
350
|
true
|
|
279
351
|
end
|
|
280
352
|
|
|
353
|
+
# Restore a soft-deleted memory node
|
|
354
|
+
#
|
|
355
|
+
# @param node_id [Integer] ID of the soft-deleted node to restore
|
|
356
|
+
# @return [Boolean] true if restored
|
|
357
|
+
# @raise [HTM::NotFoundError] if node doesn't exist or isn't deleted
|
|
358
|
+
#
|
|
359
|
+
# @example
|
|
360
|
+
# htm.forget(node_id) # Soft delete
|
|
361
|
+
# htm.restore(node_id) # Bring it back
|
|
362
|
+
#
|
|
363
|
+
def restore(node_id)
|
|
364
|
+
raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
|
|
365
|
+
|
|
366
|
+
# Find including soft-deleted nodes
|
|
367
|
+
node = HTM::Models::Node.with_deleted.find_by(id: node_id)
|
|
368
|
+
raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
|
|
369
|
+
|
|
370
|
+
unless node.deleted?
|
|
371
|
+
raise ArgumentError, "Node #{node_id} is not soft-deleted"
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
node.restore!
|
|
375
|
+
HTM.logger.info "Node #{node_id} restored"
|
|
376
|
+
|
|
377
|
+
update_robot_activity
|
|
378
|
+
true
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# Permanently delete all soft-deleted nodes older than specified time
|
|
382
|
+
#
|
|
383
|
+
# @param older_than [Time, ActiveSupport::Duration] Purge nodes soft-deleted before this time
|
|
384
|
+
# @param confirm [Symbol] Must be :confirmed to proceed
|
|
385
|
+
# @return [Integer] Number of nodes permanently deleted
|
|
386
|
+
# @raise [ArgumentError] if confirmation not provided
|
|
387
|
+
#
|
|
388
|
+
# @example Purge nodes deleted more than 30 days ago
|
|
389
|
+
# htm.purge_deleted(older_than: 30.days.ago, confirm: :confirmed)
|
|
390
|
+
#
|
|
391
|
+
# @example Purge nodes deleted before a specific date
|
|
392
|
+
# htm.purge_deleted(older_than: Time.new(2024, 1, 1), confirm: :confirmed)
|
|
393
|
+
#
|
|
394
|
+
def purge_deleted(older_than:, confirm: false)
|
|
395
|
+
raise ArgumentError, "Purge requires confirm: :confirmed" unless confirm == :confirmed
|
|
396
|
+
|
|
397
|
+
count = HTM::Models::Node.purge_deleted(older_than: older_than)
|
|
398
|
+
HTM.logger.info "Purged #{count} soft-deleted nodes older than #{older_than}"
|
|
399
|
+
|
|
400
|
+
count
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Clear all nodes from working memory
|
|
404
|
+
#
|
|
405
|
+
# Marks all nodes as evicted from working memory (in database) and clears
|
|
406
|
+
# the in-memory cache. Nodes remain in long-term memory.
|
|
407
|
+
#
|
|
408
|
+
# @return [Integer] Number of nodes cleared from working memory
|
|
409
|
+
#
|
|
410
|
+
# @example
|
|
411
|
+
# htm.clear_working_memory # => 5
|
|
412
|
+
#
|
|
413
|
+
def clear_working_memory
|
|
414
|
+
# Clear in-memory cache
|
|
415
|
+
@working_memory.clear
|
|
416
|
+
|
|
417
|
+
# Update database: mark all as evicted from working memory
|
|
418
|
+
count = HTM::Models::RobotNode
|
|
419
|
+
.where(robot_id: @robot_id, working_memory: true)
|
|
420
|
+
.update_all(working_memory: false)
|
|
421
|
+
|
|
422
|
+
HTM.logger.info "Cleared #{count} nodes from working memory"
|
|
423
|
+
count
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# Load a single file into long-term memory
|
|
427
|
+
#
|
|
428
|
+
# Reads a text-based file (starting with markdown), chunks it by paragraph,
|
|
429
|
+
# and stores each chunk as a node. YAML frontmatter is preserved as metadata
|
|
430
|
+
# on the first chunk.
|
|
431
|
+
#
|
|
432
|
+
# @param path [String] Path to file
|
|
433
|
+
# @param force [Boolean] Force re-sync even if mtime unchanged (default: false)
|
|
434
|
+
# @return [Hash] Result with keys:
|
|
435
|
+
# - :file_path [String] Absolute path to file
|
|
436
|
+
# - :chunks_created [Integer] Number of new chunks created
|
|
437
|
+
# - :chunks_updated [Integer] Number of existing chunks updated
|
|
438
|
+
# - :chunks_deleted [Integer] Number of chunks soft-deleted
|
|
439
|
+
# - :skipped [Boolean] True if file was unchanged and skipped
|
|
440
|
+
#
|
|
441
|
+
# @example Load a markdown file
|
|
442
|
+
# result = htm.load_file('/path/to/doc.md')
|
|
443
|
+
# # => { file_path: '/path/to/doc.md', chunks_created: 5, ... }
|
|
444
|
+
#
|
|
445
|
+
# @example Force re-sync even if unchanged
|
|
446
|
+
# result = htm.load_file('/path/to/doc.md', force: true)
|
|
447
|
+
#
|
|
448
|
+
def load_file(path, force: false)
|
|
449
|
+
loader = HTM::Loaders::MarkdownLoader.new(self)
|
|
450
|
+
result = loader.load_file(path, force: force)
|
|
451
|
+
|
|
452
|
+
update_robot_activity unless result[:skipped]
|
|
453
|
+
result
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Load all matching files from a directory into long-term memory
|
|
457
|
+
#
|
|
458
|
+
# @param path [String] Directory path
|
|
459
|
+
# @param pattern [String] Glob pattern (default: '**/*.md')
|
|
460
|
+
# @param force [Boolean] Force re-sync even if unchanged (default: false)
|
|
461
|
+
# @return [Array<Hash>] Results for each file
|
|
462
|
+
#
|
|
463
|
+
# @example Load all markdown files recursively
|
|
464
|
+
# results = htm.load_directory('/path/to/docs')
|
|
465
|
+
#
|
|
466
|
+
# @example Load only top-level markdown files
|
|
467
|
+
# results = htm.load_directory('/path/to/docs', pattern: '*.md')
|
|
468
|
+
#
|
|
469
|
+
def load_directory(path, pattern: '**/*.md', force: false)
|
|
470
|
+
loader = HTM::Loaders::MarkdownLoader.new(self)
|
|
471
|
+
results = loader.load_directory(path, pattern: pattern, force: force)
|
|
472
|
+
|
|
473
|
+
# Update activity if any files were processed
|
|
474
|
+
if results.any? { |r| !r[:skipped] && !r[:error] }
|
|
475
|
+
update_robot_activity
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
results
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
# Get all nodes loaded from a specific file
|
|
482
|
+
#
|
|
483
|
+
# @param file_path [String] Path to the source file
|
|
484
|
+
# @return [ActiveRecord::Relation] Nodes from this file, ordered by chunk_position
|
|
485
|
+
#
|
|
486
|
+
# @example
|
|
487
|
+
# nodes = htm.nodes_from_file('/path/to/doc.md')
|
|
488
|
+
# nodes.each { |node| puts node.content }
|
|
489
|
+
#
|
|
490
|
+
def nodes_from_file(file_path)
|
|
491
|
+
source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
|
|
492
|
+
return HTM::Models::Node.none unless source
|
|
493
|
+
|
|
494
|
+
HTM::Models::Node.from_source(source.id)
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Unload a file (soft-delete all its chunks and remove source record)
|
|
498
|
+
#
|
|
499
|
+
# @param file_path [String] Path to the source file
|
|
500
|
+
# @return [Integer] Number of nodes soft-deleted
|
|
501
|
+
#
|
|
502
|
+
# @example
|
|
503
|
+
# count = htm.unload_file('/path/to/doc.md')
|
|
504
|
+
# puts "Unloaded #{count} chunks"
|
|
505
|
+
#
|
|
506
|
+
def unload_file(file_path)
|
|
507
|
+
source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
|
|
508
|
+
return 0 unless source
|
|
509
|
+
|
|
510
|
+
count = source.soft_delete_chunks!
|
|
511
|
+
@long_term_memory.clear_cache!
|
|
512
|
+
source.destroy
|
|
513
|
+
|
|
514
|
+
update_robot_activity
|
|
515
|
+
count
|
|
516
|
+
end
|
|
517
|
+
|
|
281
518
|
private
|
|
282
519
|
|
|
283
520
|
def register_robot
|
|
@@ -318,35 +555,30 @@ class HTM
|
|
|
318
555
|
token_count = node['token_count'].to_i
|
|
319
556
|
access_count = (node['access_count'] || 0).to_i
|
|
320
557
|
last_accessed = node['last_accessed'] ? Time.parse(node['last_accessed'].to_s) : nil
|
|
558
|
+
node_id = node['id']
|
|
321
559
|
|
|
322
|
-
|
|
323
|
-
@working_memory.add(
|
|
324
|
-
node['id'],
|
|
325
|
-
node['content'],
|
|
326
|
-
token_count: token_count,
|
|
327
|
-
access_count: access_count,
|
|
328
|
-
last_accessed: last_accessed,
|
|
329
|
-
from_recall: true
|
|
330
|
-
)
|
|
331
|
-
else
|
|
560
|
+
unless @working_memory.has_space?(token_count)
|
|
332
561
|
# Evict to make space
|
|
333
562
|
evicted = @working_memory.evict_to_make_space(token_count)
|
|
334
563
|
evicted_keys = evicted.map { |n| n[:key] }
|
|
335
|
-
@long_term_memory.mark_evicted(evicted_keys) if evicted_keys.any?
|
|
336
|
-
|
|
337
|
-
# Now add the recalled node
|
|
338
|
-
@working_memory.add(
|
|
339
|
-
node['id'],
|
|
340
|
-
node['content'],
|
|
341
|
-
token_count: token_count,
|
|
342
|
-
access_count: access_count,
|
|
343
|
-
last_accessed: last_accessed,
|
|
344
|
-
from_recall: true
|
|
345
|
-
)
|
|
564
|
+
@long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
|
|
346
565
|
end
|
|
347
|
-
end
|
|
348
566
|
|
|
349
|
-
|
|
567
|
+
# Add to in-memory working memory
|
|
568
|
+
@working_memory.add(
|
|
569
|
+
node_id,
|
|
570
|
+
node['content'],
|
|
571
|
+
token_count: token_count,
|
|
572
|
+
access_count: access_count,
|
|
573
|
+
last_accessed: last_accessed,
|
|
574
|
+
from_recall: true
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# Mark node as in working memory in the robot_nodes join table
|
|
578
|
+
HTM::Models::RobotNode
|
|
579
|
+
.find_by(robot_id: @robot_id, node_id: node_id)
|
|
580
|
+
&.update!(working_memory: true)
|
|
581
|
+
end
|
|
350
582
|
|
|
351
583
|
# Validation helper methods
|
|
352
584
|
|
|
@@ -364,58 +596,24 @@ class HTM
|
|
|
364
596
|
|
|
365
597
|
|
|
366
598
|
def validate_timeframe!(timeframe)
|
|
367
|
-
return if
|
|
368
|
-
raise ValidationError, "
|
|
599
|
+
return if HTM::Timeframe.valid?(timeframe)
|
|
600
|
+
raise ValidationError, "Invalid timeframe type: #{timeframe.class}. " \
|
|
601
|
+
"Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
|
|
369
602
|
end
|
|
370
603
|
|
|
371
604
|
def validate_positive_integer!(value, name)
|
|
372
605
|
raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value > 0
|
|
373
606
|
end
|
|
374
607
|
|
|
375
|
-
|
|
608
|
+
def validate_metadata!(metadata)
|
|
609
|
+
raise ValidationError, "Metadata must be a Hash" unless metadata.is_a?(Hash)
|
|
376
610
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
parse_natural_timeframe(timeframe)
|
|
383
|
-
else
|
|
384
|
-
raise ArgumentError, "Invalid timeframe: #{timeframe}"
|
|
611
|
+
# Ensure all keys are strings or symbols (will be converted to strings in JSON)
|
|
612
|
+
metadata.each_key do |key|
|
|
613
|
+
unless key.is_a?(String) || key.is_a?(Symbol)
|
|
614
|
+
raise ValidationError, "Metadata keys must be strings or symbols"
|
|
615
|
+
end
|
|
385
616
|
end
|
|
386
617
|
end
|
|
387
618
|
|
|
388
|
-
def parse_natural_timeframe(text)
|
|
389
|
-
now = Time.now
|
|
390
|
-
|
|
391
|
-
case text.downcase
|
|
392
|
-
when /last week/
|
|
393
|
-
(now - 7 * 24 * 3600)..now
|
|
394
|
-
when /yesterday/
|
|
395
|
-
start_of_yesterday = Time.new(now.year, now.month, now.day - 1)
|
|
396
|
-
start_of_yesterday..(start_of_yesterday + 24 * 3600)
|
|
397
|
-
when /last (\d+) days?/
|
|
398
|
-
days = $1.to_i
|
|
399
|
-
(now - days * 24 * 3600)..now
|
|
400
|
-
when /last (\d+) seconds?/
|
|
401
|
-
seconds = $1.to_i
|
|
402
|
-
(now - seconds)..now
|
|
403
|
-
when /last (\d+) minutes?/
|
|
404
|
-
minutes = $1.to_i
|
|
405
|
-
(now - minutes * 60)..now
|
|
406
|
-
when /last (\d+) hours?/
|
|
407
|
-
hours = $1.to_i
|
|
408
|
-
(now - hours * 3600)..now
|
|
409
|
-
when /this month/
|
|
410
|
-
start_of_month = Time.new(now.year, now.month, 1)
|
|
411
|
-
start_of_month..now
|
|
412
|
-
when /last month/
|
|
413
|
-
start_of_last_month = Time.new(now.year, now.month - 1, 1)
|
|
414
|
-
end_of_last_month = Time.new(now.year, now.month, 1) - 1
|
|
415
|
-
start_of_last_month..end_of_last_month
|
|
416
|
-
else
|
|
417
|
-
# Default to last 24 hours
|
|
418
|
-
(now - 24 * 3600)..now
|
|
419
|
-
end
|
|
420
|
-
end
|
|
421
619
|
end
|