htm 0.0.1 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.envrc +1 -0
- data/.irbrc +283 -80
- data/.tbls.yml +31 -0
- data/CHANGELOG.md +314 -16
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/SETUP.md +132 -101
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +390 -36
- data/docs/api/database.md +19 -232
- data/docs/api/embedding-service.md +1 -7
- data/docs/api/htm.md +305 -364
- data/docs/api/index.md +1 -7
- data/docs/api/long-term-memory.md +342 -590
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
- data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
- data/docs/architecture/adrs/index.md +2 -13
- data/docs/architecture/hive-mind.md +165 -166
- data/docs/architecture/index.md +2 -2
- data/docs/architecture/overview.md +5 -171
- data/docs/architecture/two-tier-memory.md +1 -35
- data/docs/assets/images/adr-010-current-architecture.svg +37 -0
- data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
- data/docs/assets/images/adr-dependency-tree.svg +93 -0
- data/docs/assets/images/class-hierarchy.svg +55 -0
- data/docs/assets/images/exception-hierarchy.svg +45 -0
- data/docs/assets/images/htm-architecture-overview.svg +83 -0
- data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
- data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
- data/docs/assets/images/htm-eviction-process.svg +141 -0
- data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
- data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
- data/docs/assets/images/htm-node-states.svg +123 -0
- data/docs/assets/images/project-structure.svg +78 -0
- data/docs/assets/images/test-directory-structure.svg +38 -0
- data/{dbdoc → docs/database}/README.md +127 -125
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/{dbdoc → docs/database}/public.node_tags.md +7 -8
- data/docs/database/public.node_tags.svg +239 -0
- data/{dbdoc → docs/database}/public.nodes.md +22 -17
- data/docs/database/public.nodes.svg +271 -0
- data/docs/database/public.robot_nodes.md +46 -0
- data/docs/database/public.robot_nodes.svg +243 -0
- data/{dbdoc → docs/database}/public.robots.md +2 -3
- data/docs/database/public.robots.svg +161 -0
- data/docs/database/public.tags.svg +139 -0
- data/{dbdoc → docs/database}/schema.json +941 -630
- data/docs/database/schema.svg +282 -0
- data/docs/development/index.md +1 -29
- data/docs/development/schema.md +134 -309
- data/docs/development/testing.md +1 -9
- data/docs/getting-started/index.md +47 -0
- data/docs/{installation.md → getting-started/installation.md} +2 -2
- data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
- data/docs/guides/adding-memories.md +295 -643
- data/docs/guides/recalling-memories.md +36 -1
- data/docs/guides/search-strategies.md +85 -51
- data/docs/images/htm-er-diagram.svg +156 -0
- data/docs/index.md +16 -31
- data/docs/multi_framework_support.md +4 -4
- data/examples/README.md +280 -0
- data/examples/basic_usage.rb +18 -16
- data/examples/cli_app/htm_cli.rb +146 -8
- data/examples/cli_app/temp.log +93 -0
- data/examples/custom_llm_configuration.rb +1 -2
- data/examples/example_app/app.rb +11 -14
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/sinatra_app/Gemfile +1 -0
- data/examples/sinatra_app/Gemfile.lock +166 -0
- data/examples/sinatra_app/app.rb +219 -24
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +10 -3
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +313 -80
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
- data/lib/htm/job_adapter.rb +10 -3
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +601 -321
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +116 -12
- data/lib/htm/models/robot.rb +53 -4
- data/lib/htm/models/robot_node.rb +51 -0
- data/lib/htm/models/tag.rb +302 -0
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +29 -0
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +352 -133
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +188 -2
- data/lib/tasks/jobs.rake +10 -12
- data/lib/tasks/tags.rake +194 -0
- data/mkdocs.yml +91 -9
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +177 -37
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/dbdoc/public.node_tags.svg +0 -112
- data/dbdoc/public.nodes.svg +0 -118
- data/dbdoc/public.robots.svg +0 -90
- data/dbdoc/public.tags.svg +0 -60
- data/dbdoc/schema.svg +0 -154
- data/{dbdoc → docs/database}/public.node_stats.md +0 -0
- data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
- data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
- data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
- data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
- data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
- data/{dbdoc → docs/database}/public.operations_log.md +0 -0
- data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
- data/{dbdoc → docs/database}/public.relationships.md +0 -0
- data/{dbdoc → docs/database}/public.relationships.svg +0 -0
- data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
- data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
- data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
- data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
- data/{dbdoc → docs/database}/public.tags.md +3 -3
- /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
- /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
data/lib/htm.rb
CHANGED
|
@@ -3,15 +3,21 @@
|
|
|
3
3
|
require_relative "htm/version"
|
|
4
4
|
require_relative "htm/errors"
|
|
5
5
|
require_relative "htm/configuration"
|
|
6
|
+
require_relative "htm/circuit_breaker"
|
|
6
7
|
require_relative "htm/active_record_config"
|
|
7
8
|
require_relative "htm/database"
|
|
8
9
|
require_relative "htm/long_term_memory"
|
|
9
10
|
require_relative "htm/working_memory"
|
|
10
11
|
require_relative "htm/embedding_service"
|
|
11
12
|
require_relative "htm/tag_service"
|
|
13
|
+
require_relative "htm/timeframe_extractor"
|
|
14
|
+
require_relative "htm/timeframe"
|
|
12
15
|
require_relative "htm/job_adapter"
|
|
13
16
|
require_relative "htm/jobs/generate_embedding_job"
|
|
14
17
|
require_relative "htm/jobs/generate_tags_job"
|
|
18
|
+
require_relative "htm/loaders/paragraph_chunker"
|
|
19
|
+
require_relative "htm/loaders/markdown_loader"
|
|
20
|
+
require_relative "htm/observability"
|
|
15
21
|
|
|
16
22
|
require "pg"
|
|
17
23
|
require "securerandom"
|
|
@@ -24,14 +30,14 @@ require_relative "htm/railtie" if defined?(Rails::Railtie)
|
|
|
24
30
|
#
|
|
25
31
|
# HTM implements a two-tier memory system:
|
|
26
32
|
# - Working Memory: Token-limited, active context for immediate LLM use
|
|
27
|
-
# - Long-term Memory: Durable PostgreSQL
|
|
33
|
+
# - Long-term Memory: Durable PostgreSQL storage with pgvector for permanent knowledge
|
|
28
34
|
#
|
|
29
35
|
# Key Features:
|
|
30
|
-
# - Never forgets unless explicitly told
|
|
31
|
-
# - RAG-based retrieval (temporal + semantic search)
|
|
36
|
+
# - Never forgets unless explicitly told (soft delete by default)
|
|
37
|
+
# - RAG-based retrieval (temporal + semantic search via pgvector)
|
|
32
38
|
# - Multi-robot "hive mind" - all robots share global memory
|
|
33
|
-
# -
|
|
34
|
-
# -
|
|
39
|
+
# - Hierarchical tagging system for knowledge organization
|
|
40
|
+
# - Async background processing for embeddings and tags
|
|
35
41
|
#
|
|
36
42
|
# @example Basic usage
|
|
37
43
|
# htm = HTM.new(robot_name: "Code Helper")
|
|
@@ -98,54 +104,87 @@ class HTM
|
|
|
98
104
|
# Stores content in long-term memory and adds it to working memory.
|
|
99
105
|
# Embeddings and hierarchical tags are automatically extracted by LLM in the background.
|
|
100
106
|
#
|
|
101
|
-
#
|
|
102
|
-
# Nil values for content or source are converted to empty strings.
|
|
103
|
-
#
|
|
104
|
-
# @param content [String, nil] The information to remember
|
|
105
|
-
# @param source [String, nil] Where this content came from (defaults to empty string if not provided)
|
|
107
|
+
# @param content [String] The information to remember (required, cannot be nil or empty)
|
|
106
108
|
# @param tags [Array<String>] Manual tags to assign (optional, in addition to auto-extracted tags)
|
|
107
109
|
# @return [Integer] Database ID of the memory node
|
|
110
|
+
# @raise [ValidationError] If content is nil, empty, or exceeds maximum size
|
|
108
111
|
#
|
|
109
112
|
# @example Remember with source
|
|
110
|
-
# node_id = htm.remember("PostgreSQL is great for HTM"
|
|
113
|
+
# node_id = htm.remember("PostgreSQL is great for HTM")
|
|
111
114
|
#
|
|
112
115
|
# @example Remember with manual tags
|
|
113
|
-
# node_id = htm.remember("Time-series data",
|
|
116
|
+
# node_id = htm.remember("Time-series data", tags: ["database:timescaledb"])
|
|
117
|
+
#
|
|
118
|
+
# @example Remember with metadata
|
|
119
|
+
# node_id = htm.remember("User prefers dark mode", metadata: { source: "user", confidence: 0.95 })
|
|
114
120
|
#
|
|
115
|
-
def remember(content,
|
|
116
|
-
#
|
|
117
|
-
|
|
118
|
-
|
|
121
|
+
def remember(content, tags: [], metadata: {})
|
|
122
|
+
# Validate inputs
|
|
123
|
+
raise ValidationError, "Content cannot be nil" if content.nil?
|
|
124
|
+
|
|
125
|
+
content_str = content.to_s.strip
|
|
126
|
+
raise ValidationError, "Content cannot be empty" if content_str.empty?
|
|
119
127
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
128
|
+
if content_str.bytesize > MAX_VALUE_LENGTH
|
|
129
|
+
raise ValidationError, "Content exceeds maximum size (#{MAX_VALUE_LENGTH} bytes)"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
validate_array!(tags, "tags")
|
|
133
|
+
tags.each do |tag|
|
|
134
|
+
unless tag.is_a?(String) && tag.match?(/\A[a-z0-9\-]+(:[a-z0-9\-]+)*\z/)
|
|
135
|
+
raise ValidationError, "Invalid tag format: #{tag.inspect}. Tags must be lowercase alphanumeric with hyphens, separated by colons."
|
|
136
|
+
end
|
|
124
137
|
end
|
|
125
138
|
|
|
139
|
+
validate_metadata!(metadata)
|
|
140
|
+
|
|
141
|
+
content = content_str
|
|
142
|
+
|
|
126
143
|
# Calculate token count using configured counter
|
|
127
144
|
token_count = HTM.count_tokens(content)
|
|
128
145
|
|
|
129
|
-
# Store in long-term memory
|
|
130
|
-
#
|
|
131
|
-
|
|
146
|
+
# Store in long-term memory (with deduplication)
|
|
147
|
+
# Returns { node_id:, is_new:, robot_node: }
|
|
148
|
+
result = @long_term_memory.add(
|
|
132
149
|
content: content,
|
|
133
|
-
source: source,
|
|
134
150
|
token_count: token_count,
|
|
135
151
|
robot_id: @robot_id,
|
|
136
|
-
embedding: nil # Will be generated in background
|
|
152
|
+
embedding: nil, # Will be generated in background
|
|
153
|
+
metadata: metadata
|
|
137
154
|
)
|
|
138
155
|
|
|
139
|
-
|
|
156
|
+
node_id = result[:node_id]
|
|
157
|
+
is_new = result[:is_new]
|
|
140
158
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
159
|
+
if is_new
|
|
160
|
+
HTM.logger.info "Node #{node_id} created for robot #{@robot_name} (#{token_count} tokens)"
|
|
161
|
+
|
|
162
|
+
# Enqueue background jobs for embedding and tag generation
|
|
163
|
+
# Only for NEW nodes - existing nodes already have embeddings/tags
|
|
164
|
+
enqueue_embedding_job(node_id)
|
|
165
|
+
enqueue_tags_job(node_id, manual_tags: tags)
|
|
166
|
+
else
|
|
167
|
+
HTM.logger.info "Node #{node_id} already exists, linked to robot #{@robot_name} (remember_count: #{result[:robot_node].remember_count})"
|
|
168
|
+
|
|
169
|
+
# For existing nodes, only add manual tags if provided
|
|
170
|
+
if tags.any?
|
|
171
|
+
node = HTM::Models::Node.find(node_id)
|
|
172
|
+
node.add_tags(tags)
|
|
173
|
+
HTM.logger.info "Added #{tags.length} manual tags to existing node #{node_id}"
|
|
174
|
+
end
|
|
175
|
+
end
|
|
145
176
|
|
|
146
|
-
# Add to working memory (access_count starts at 0)
|
|
177
|
+
# Add to working memory (evict if needed, access_count starts at 0)
|
|
178
|
+
unless @working_memory.has_space?(token_count)
|
|
179
|
+
evicted = @working_memory.evict_to_make_space(token_count)
|
|
180
|
+
evicted_keys = evicted.map { |n| n[:key] }
|
|
181
|
+
@long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
|
|
182
|
+
end
|
|
147
183
|
@working_memory.add(node_id, content, token_count: token_count, access_count: 0)
|
|
148
184
|
|
|
185
|
+
# Mark node as in working memory in the robot_nodes join table
|
|
186
|
+
result[:robot_node].update!(working_memory: true)
|
|
187
|
+
|
|
149
188
|
update_robot_activity
|
|
150
189
|
node_id
|
|
151
190
|
end
|
|
@@ -153,53 +192,70 @@ class HTM
|
|
|
153
192
|
# Recall memories from a timeframe and topic
|
|
154
193
|
#
|
|
155
194
|
# @param topic [String] Topic to search for (required)
|
|
156
|
-
# @param timeframe [
|
|
195
|
+
# @param timeframe [nil, Range, Array<Range>, Date, DateTime, Time, String, Symbol] Time filter
|
|
196
|
+
# - nil: No time filter (search all memories)
|
|
197
|
+
# - Range: Time range (e.g., 7.days.ago..Time.now)
|
|
198
|
+
# - Array<Range>: Multiple time windows (OR'd together)
|
|
199
|
+
# - Date: Entire day
|
|
200
|
+
# - DateTime/Time: Entire day containing that moment
|
|
201
|
+
# - String: Natural language (e.g., "last week", "few days ago")
|
|
202
|
+
# - :auto: Extract timeframe from topic query automatically
|
|
157
203
|
# @param limit [Integer] Maximum number of nodes to retrieve (default: 20)
|
|
158
204
|
# @param strategy [Symbol] Search strategy (:vector, :fulltext, :hybrid) (default: :vector)
|
|
159
205
|
# @param with_relevance [Boolean] Include dynamic relevance scores (default: false)
|
|
160
206
|
# @param query_tags [Array<String>] Tags to boost relevance (default: [])
|
|
161
207
|
# @param raw [Boolean] Return full node hashes (true) or just content strings (false) (default: false)
|
|
208
|
+
# @param metadata [Hash] Filter by metadata fields using JSONB containment (default: {})
|
|
162
209
|
# @return [Array<String>, Array<Hash>] Content strings (raw: false) or full node hashes (raw: true)
|
|
163
210
|
#
|
|
164
|
-
# @example Basic usage (returns content strings)
|
|
211
|
+
# @example Basic usage - no time filter (returns content strings)
|
|
165
212
|
# memories = htm.recall("PostgreSQL")
|
|
166
213
|
# # => ["PostgreSQL is great for time-series data", "PostgreSQL with TimescaleDB..."]
|
|
167
214
|
#
|
|
168
|
-
# @example
|
|
169
|
-
# nodes = htm.recall("PostgreSQL", raw: true)
|
|
170
|
-
# # => [{"id" => 1, "content" => "...", "created_at" => "...", ...}, ...]
|
|
171
|
-
#
|
|
172
|
-
# @example With timeframe
|
|
215
|
+
# @example With explicit timeframe
|
|
173
216
|
# memories = htm.recall("PostgreSQL", timeframe: "last week")
|
|
217
|
+
# memories = htm.recall("PostgreSQL", timeframe: Date.today)
|
|
218
|
+
# memories = htm.recall("PostgreSQL", timeframe: 7.days.ago..Time.now)
|
|
174
219
|
#
|
|
175
|
-
# @example
|
|
176
|
-
# memories = htm.recall("PostgreSQL",
|
|
177
|
-
#
|
|
178
|
-
# limit: 50,
|
|
179
|
-
# strategy: :hybrid,
|
|
180
|
-
# with_relevance: true,
|
|
181
|
-
# query_tags: ["database", "timeseries"])
|
|
220
|
+
# @example Auto-extract timeframe from query
|
|
221
|
+
# memories = htm.recall("what did we discuss last week about PostgreSQL", timeframe: :auto)
|
|
222
|
+
# # Extracts "last week" as timeframe, searches for "what did we discuss about PostgreSQL"
|
|
182
223
|
#
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
224
|
+
# @example Multiple time windows
|
|
225
|
+
# memories = htm.recall("meetings", timeframe: [last_monday, last_friday])
|
|
226
|
+
#
|
|
227
|
+
# @example Filter by metadata
|
|
228
|
+
# memories = htm.recall("preferences", metadata: { source: "user" })
|
|
229
|
+
# memories = htm.recall("decisions", metadata: { confidence: 0.9, type: "architectural" })
|
|
230
|
+
#
|
|
231
|
+
def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false, metadata: {})
|
|
187
232
|
# Validate inputs
|
|
188
233
|
validate_timeframe!(timeframe)
|
|
189
234
|
validate_positive_integer!(limit, "limit")
|
|
190
235
|
validate_recall_strategy!(strategy)
|
|
191
236
|
validate_array!(query_tags, "query_tags")
|
|
192
|
-
|
|
193
|
-
|
|
237
|
+
validate_metadata!(metadata)
|
|
238
|
+
|
|
239
|
+
# Normalize timeframe and potentially extract from query
|
|
240
|
+
search_query = topic
|
|
241
|
+
normalized_timeframe = if timeframe == :auto
|
|
242
|
+
result = HTM::Timeframe.normalize(:auto, query: topic)
|
|
243
|
+
search_query = result.query # Use cleaned query for search
|
|
244
|
+
HTM.logger.debug "Auto-extracted timeframe: #{result.extracted.inspect}" if result.extracted
|
|
245
|
+
result.timeframe
|
|
246
|
+
else
|
|
247
|
+
HTM::Timeframe.normalize(timeframe)
|
|
248
|
+
end
|
|
194
249
|
|
|
195
250
|
# Use relevance-based search if requested
|
|
196
251
|
if with_relevance
|
|
197
252
|
nodes = @long_term_memory.search_with_relevance(
|
|
198
|
-
timeframe:
|
|
199
|
-
query:
|
|
253
|
+
timeframe: normalized_timeframe,
|
|
254
|
+
query: search_query,
|
|
200
255
|
query_tags: query_tags,
|
|
201
256
|
limit: limit,
|
|
202
|
-
embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil
|
|
257
|
+
embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil,
|
|
258
|
+
metadata: metadata
|
|
203
259
|
)
|
|
204
260
|
else
|
|
205
261
|
# Perform standard RAG-based retrieval
|
|
@@ -207,24 +263,27 @@ class HTM
|
|
|
207
263
|
when :vector
|
|
208
264
|
# Vector search using query embedding
|
|
209
265
|
@long_term_memory.search(
|
|
210
|
-
timeframe:
|
|
211
|
-
query:
|
|
266
|
+
timeframe: normalized_timeframe,
|
|
267
|
+
query: search_query,
|
|
212
268
|
limit: limit,
|
|
213
|
-
embedding_service: HTM
|
|
269
|
+
embedding_service: HTM,
|
|
270
|
+
metadata: metadata
|
|
214
271
|
)
|
|
215
272
|
when :fulltext
|
|
216
273
|
@long_term_memory.search_fulltext(
|
|
217
|
-
timeframe:
|
|
218
|
-
query:
|
|
219
|
-
limit: limit
|
|
274
|
+
timeframe: normalized_timeframe,
|
|
275
|
+
query: search_query,
|
|
276
|
+
limit: limit,
|
|
277
|
+
metadata: metadata
|
|
220
278
|
)
|
|
221
279
|
when :hybrid
|
|
222
280
|
# Hybrid search combining vector + fulltext
|
|
223
281
|
@long_term_memory.search_hybrid(
|
|
224
|
-
timeframe:
|
|
225
|
-
query:
|
|
282
|
+
timeframe: normalized_timeframe,
|
|
283
|
+
query: search_query,
|
|
226
284
|
limit: limit,
|
|
227
|
-
embedding_service: HTM
|
|
285
|
+
embedding_service: HTM,
|
|
286
|
+
metadata: metadata
|
|
228
287
|
)
|
|
229
288
|
end
|
|
230
289
|
end
|
|
@@ -240,32 +299,222 @@ class HTM
|
|
|
240
299
|
raw ? nodes : nodes.map { |node| node['content'] }
|
|
241
300
|
end
|
|
242
301
|
|
|
243
|
-
# Forget a memory node (
|
|
302
|
+
# Forget a memory node (soft delete by default, permanent delete requires confirmation)
|
|
244
303
|
#
|
|
245
|
-
#
|
|
246
|
-
#
|
|
304
|
+
# By default, performs a soft delete (sets deleted_at timestamp). The node
|
|
305
|
+
# remains in the database but is excluded from queries. Use soft: false
|
|
306
|
+
# with confirm: :confirmed for permanent deletion.
|
|
307
|
+
#
|
|
308
|
+
# @param node_id [Integer] ID of the node to delete
|
|
309
|
+
# @param soft [Boolean] If true (default), soft delete; if false, permanent delete
|
|
310
|
+
# @param confirm [Symbol] Must be :confirmed to proceed with permanent deletion
|
|
247
311
|
# @return [Boolean] true if deleted
|
|
248
|
-
# @raise [ArgumentError] if
|
|
312
|
+
# @raise [ArgumentError] if permanent deletion requested without confirmation
|
|
249
313
|
# @raise [HTM::NotFoundError] if node doesn't exist
|
|
250
314
|
#
|
|
251
|
-
|
|
315
|
+
# @example Soft delete (recoverable)
|
|
316
|
+
# htm.forget(node_id)
|
|
317
|
+
# htm.forget(node_id, soft: true)
|
|
318
|
+
#
|
|
319
|
+
# @example Permanent delete (requires confirmation)
|
|
320
|
+
# htm.forget(node_id, soft: false, confirm: :confirmed)
|
|
321
|
+
#
|
|
322
|
+
def forget(node_id, soft: true, confirm: false)
|
|
252
323
|
# Validate inputs
|
|
253
|
-
raise ArgumentError, "
|
|
254
|
-
raise ArgumentError, "Must pass confirm: :confirmed to delete" unless confirm == :confirmed
|
|
324
|
+
raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
|
|
255
325
|
|
|
256
|
-
#
|
|
257
|
-
|
|
258
|
-
raise
|
|
326
|
+
# Permanent delete requires confirmation
|
|
327
|
+
if !soft && confirm != :confirmed
|
|
328
|
+
raise ArgumentError, "Permanent deletion requires confirm: :confirmed"
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Verify node exists (including soft-deleted for restore scenarios)
|
|
332
|
+
node = HTM::Models::Node.with_deleted.find_by(id: node_id)
|
|
333
|
+
raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
|
|
334
|
+
|
|
335
|
+
if soft
|
|
336
|
+
# Soft delete - mark as deleted but keep in database
|
|
337
|
+
node.soft_delete!
|
|
338
|
+
@long_term_memory.clear_cache! # Invalidate cache since node is no longer visible
|
|
339
|
+
HTM.logger.info "Node #{node_id} soft deleted"
|
|
340
|
+
else
|
|
341
|
+
# Permanent delete (also invalidates cache internally)
|
|
342
|
+
@long_term_memory.delete(node_id)
|
|
343
|
+
HTM.logger.info "Node #{node_id} permanently deleted"
|
|
259
344
|
end
|
|
260
345
|
|
|
261
|
-
#
|
|
262
|
-
@long_term_memory.delete(node_id)
|
|
346
|
+
# Remove from working memory either way
|
|
263
347
|
@working_memory.remove(node_id)
|
|
264
348
|
|
|
265
349
|
update_robot_activity
|
|
266
350
|
true
|
|
267
351
|
end
|
|
268
352
|
|
|
353
|
+
# Restore a soft-deleted memory node
|
|
354
|
+
#
|
|
355
|
+
# @param node_id [Integer] ID of the soft-deleted node to restore
|
|
356
|
+
# @return [Boolean] true if restored
|
|
357
|
+
# @raise [HTM::NotFoundError] if node doesn't exist or isn't deleted
|
|
358
|
+
#
|
|
359
|
+
# @example
|
|
360
|
+
# htm.forget(node_id) # Soft delete
|
|
361
|
+
# htm.restore(node_id) # Bring it back
|
|
362
|
+
#
|
|
363
|
+
def restore(node_id)
|
|
364
|
+
raise ArgumentError, "Node ID cannot be nil" if node_id.nil?
|
|
365
|
+
|
|
366
|
+
# Find including soft-deleted nodes
|
|
367
|
+
node = HTM::Models::Node.with_deleted.find_by(id: node_id)
|
|
368
|
+
raise HTM::NotFoundError, "Node not found: #{node_id}" unless node
|
|
369
|
+
|
|
370
|
+
unless node.deleted?
|
|
371
|
+
raise ArgumentError, "Node #{node_id} is not soft-deleted"
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
node.restore!
|
|
375
|
+
HTM.logger.info "Node #{node_id} restored"
|
|
376
|
+
|
|
377
|
+
update_robot_activity
|
|
378
|
+
true
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# Permanently delete all soft-deleted nodes older than specified time
|
|
382
|
+
#
|
|
383
|
+
# @param older_than [Time, ActiveSupport::Duration] Purge nodes soft-deleted before this time
|
|
384
|
+
# @param confirm [Symbol] Must be :confirmed to proceed
|
|
385
|
+
# @return [Integer] Number of nodes permanently deleted
|
|
386
|
+
# @raise [ArgumentError] if confirmation not provided
|
|
387
|
+
#
|
|
388
|
+
# @example Purge nodes deleted more than 30 days ago
|
|
389
|
+
# htm.purge_deleted(older_than: 30.days.ago, confirm: :confirmed)
|
|
390
|
+
#
|
|
391
|
+
# @example Purge nodes deleted before a specific date
|
|
392
|
+
# htm.purge_deleted(older_than: Time.new(2024, 1, 1), confirm: :confirmed)
|
|
393
|
+
#
|
|
394
|
+
def purge_deleted(older_than:, confirm: false)
|
|
395
|
+
raise ArgumentError, "Purge requires confirm: :confirmed" unless confirm == :confirmed
|
|
396
|
+
|
|
397
|
+
count = HTM::Models::Node.purge_deleted(older_than: older_than)
|
|
398
|
+
HTM.logger.info "Purged #{count} soft-deleted nodes older than #{older_than}"
|
|
399
|
+
|
|
400
|
+
count
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Clear all nodes from working memory
|
|
404
|
+
#
|
|
405
|
+
# Marks all nodes as evicted from working memory (in database) and clears
|
|
406
|
+
# the in-memory cache. Nodes remain in long-term memory.
|
|
407
|
+
#
|
|
408
|
+
# @return [Integer] Number of nodes cleared from working memory
|
|
409
|
+
#
|
|
410
|
+
# @example
|
|
411
|
+
# htm.clear_working_memory # => 5
|
|
412
|
+
#
|
|
413
|
+
def clear_working_memory
|
|
414
|
+
# Clear in-memory cache
|
|
415
|
+
@working_memory.clear
|
|
416
|
+
|
|
417
|
+
# Update database: mark all as evicted from working memory
|
|
418
|
+
count = HTM::Models::RobotNode
|
|
419
|
+
.where(robot_id: @robot_id, working_memory: true)
|
|
420
|
+
.update_all(working_memory: false)
|
|
421
|
+
|
|
422
|
+
HTM.logger.info "Cleared #{count} nodes from working memory"
|
|
423
|
+
count
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# Load a single file into long-term memory
|
|
427
|
+
#
|
|
428
|
+
# Reads a text-based file (starting with markdown), chunks it by paragraph,
|
|
429
|
+
# and stores each chunk as a node. YAML frontmatter is preserved as metadata
|
|
430
|
+
# on the first chunk.
|
|
431
|
+
#
|
|
432
|
+
# @param path [String] Path to file
|
|
433
|
+
# @param force [Boolean] Force re-sync even if mtime unchanged (default: false)
|
|
434
|
+
# @return [Hash] Result with keys:
|
|
435
|
+
# - :file_path [String] Absolute path to file
|
|
436
|
+
# - :chunks_created [Integer] Number of new chunks created
|
|
437
|
+
# - :chunks_updated [Integer] Number of existing chunks updated
|
|
438
|
+
# - :chunks_deleted [Integer] Number of chunks soft-deleted
|
|
439
|
+
# - :skipped [Boolean] True if file was unchanged and skipped
|
|
440
|
+
#
|
|
441
|
+
# @example Load a markdown file
|
|
442
|
+
# result = htm.load_file('/path/to/doc.md')
|
|
443
|
+
# # => { file_path: '/path/to/doc.md', chunks_created: 5, ... }
|
|
444
|
+
#
|
|
445
|
+
# @example Force re-sync even if unchanged
|
|
446
|
+
# result = htm.load_file('/path/to/doc.md', force: true)
|
|
447
|
+
#
|
|
448
|
+
def load_file(path, force: false)
|
|
449
|
+
loader = HTM::Loaders::MarkdownLoader.new(self)
|
|
450
|
+
result = loader.load_file(path, force: force)
|
|
451
|
+
|
|
452
|
+
update_robot_activity unless result[:skipped]
|
|
453
|
+
result
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Load all matching files from a directory into long-term memory
|
|
457
|
+
#
|
|
458
|
+
# @param path [String] Directory path
|
|
459
|
+
# @param pattern [String] Glob pattern (default: '**/*.md')
|
|
460
|
+
# @param force [Boolean] Force re-sync even if unchanged (default: false)
|
|
461
|
+
# @return [Array<Hash>] Results for each file
|
|
462
|
+
#
|
|
463
|
+
# @example Load all markdown files recursively
|
|
464
|
+
# results = htm.load_directory('/path/to/docs')
|
|
465
|
+
#
|
|
466
|
+
# @example Load only top-level markdown files
|
|
467
|
+
# results = htm.load_directory('/path/to/docs', pattern: '*.md')
|
|
468
|
+
#
|
|
469
|
+
def load_directory(path, pattern: '**/*.md', force: false)
|
|
470
|
+
loader = HTM::Loaders::MarkdownLoader.new(self)
|
|
471
|
+
results = loader.load_directory(path, pattern: pattern, force: force)
|
|
472
|
+
|
|
473
|
+
# Update activity if any files were processed
|
|
474
|
+
if results.any? { |r| !r[:skipped] && !r[:error] }
|
|
475
|
+
update_robot_activity
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
results
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
# Get all nodes loaded from a specific file
|
|
482
|
+
#
|
|
483
|
+
# @param file_path [String] Path to the source file
|
|
484
|
+
# @return [ActiveRecord::Relation] Nodes from this file, ordered by chunk_position
|
|
485
|
+
#
|
|
486
|
+
# @example
|
|
487
|
+
# nodes = htm.nodes_from_file('/path/to/doc.md')
|
|
488
|
+
# nodes.each { |node| puts node.content }
|
|
489
|
+
#
|
|
490
|
+
def nodes_from_file(file_path)
|
|
491
|
+
source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
|
|
492
|
+
return HTM::Models::Node.none unless source
|
|
493
|
+
|
|
494
|
+
HTM::Models::Node.from_source(source.id)
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Unload a file (soft-delete all its chunks and remove source record)
|
|
498
|
+
#
|
|
499
|
+
# @param file_path [String] Path to the source file
|
|
500
|
+
# @return [Integer] Number of nodes soft-deleted
|
|
501
|
+
#
|
|
502
|
+
# @example
|
|
503
|
+
# count = htm.unload_file('/path/to/doc.md')
|
|
504
|
+
# puts "Unloaded #{count} chunks"
|
|
505
|
+
#
|
|
506
|
+
def unload_file(file_path)
|
|
507
|
+
source = HTM::Models::FileSource.find_by(file_path: File.expand_path(file_path))
|
|
508
|
+
return 0 unless source
|
|
509
|
+
|
|
510
|
+
count = source.soft_delete_chunks!
|
|
511
|
+
@long_term_memory.clear_cache!
|
|
512
|
+
source.destroy
|
|
513
|
+
|
|
514
|
+
update_robot_activity
|
|
515
|
+
count
|
|
516
|
+
end
|
|
517
|
+
|
|
269
518
|
private
|
|
270
519
|
|
|
271
520
|
def register_robot
|
|
@@ -306,35 +555,30 @@ class HTM
|
|
|
306
555
|
token_count = node['token_count'].to_i
|
|
307
556
|
access_count = (node['access_count'] || 0).to_i
|
|
308
557
|
last_accessed = node['last_accessed'] ? Time.parse(node['last_accessed'].to_s) : nil
|
|
558
|
+
node_id = node['id']
|
|
309
559
|
|
|
310
|
-
|
|
311
|
-
@working_memory.add(
|
|
312
|
-
node['id'],
|
|
313
|
-
node['content'],
|
|
314
|
-
token_count: token_count,
|
|
315
|
-
access_count: access_count,
|
|
316
|
-
last_accessed: last_accessed,
|
|
317
|
-
from_recall: true
|
|
318
|
-
)
|
|
319
|
-
else
|
|
560
|
+
unless @working_memory.has_space?(token_count)
|
|
320
561
|
# Evict to make space
|
|
321
562
|
evicted = @working_memory.evict_to_make_space(token_count)
|
|
322
563
|
evicted_keys = evicted.map { |n| n[:key] }
|
|
323
|
-
@long_term_memory.mark_evicted(evicted_keys) if evicted_keys.any?
|
|
324
|
-
|
|
325
|
-
# Now add the recalled node
|
|
326
|
-
@working_memory.add(
|
|
327
|
-
node['id'],
|
|
328
|
-
node['content'],
|
|
329
|
-
token_count: token_count,
|
|
330
|
-
access_count: access_count,
|
|
331
|
-
last_accessed: last_accessed,
|
|
332
|
-
from_recall: true
|
|
333
|
-
)
|
|
564
|
+
@long_term_memory.mark_evicted(robot_id: @robot_id, node_ids: evicted_keys) if evicted_keys.any?
|
|
334
565
|
end
|
|
335
|
-
end
|
|
336
566
|
|
|
337
|
-
|
|
567
|
+
# Add to in-memory working memory
|
|
568
|
+
@working_memory.add(
|
|
569
|
+
node_id,
|
|
570
|
+
node['content'],
|
|
571
|
+
token_count: token_count,
|
|
572
|
+
access_count: access_count,
|
|
573
|
+
last_accessed: last_accessed,
|
|
574
|
+
from_recall: true
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# Mark node as in working memory in the robot_nodes join table
|
|
578
|
+
HTM::Models::RobotNode
|
|
579
|
+
.find_by(robot_id: @robot_id, node_id: node_id)
|
|
580
|
+
&.update!(working_memory: true)
|
|
581
|
+
end
|
|
338
582
|
|
|
339
583
|
# Validation helper methods
|
|
340
584
|
|
|
@@ -352,49 +596,24 @@ class HTM
|
|
|
352
596
|
|
|
353
597
|
|
|
354
598
|
def validate_timeframe!(timeframe)
|
|
355
|
-
return if
|
|
356
|
-
raise ValidationError, "
|
|
599
|
+
return if HTM::Timeframe.valid?(timeframe)
|
|
600
|
+
raise ValidationError, "Invalid timeframe type: #{timeframe.class}. " \
|
|
601
|
+
"Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
|
|
357
602
|
end
|
|
358
603
|
|
|
359
604
|
def validate_positive_integer!(value, name)
|
|
360
605
|
raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value > 0
|
|
361
606
|
end
|
|
362
607
|
|
|
363
|
-
|
|
608
|
+
def validate_metadata!(metadata)
|
|
609
|
+
raise ValidationError, "Metadata must be a Hash" unless metadata.is_a?(Hash)
|
|
364
610
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
parse_natural_timeframe(timeframe)
|
|
371
|
-
else
|
|
372
|
-
raise ArgumentError, "Invalid timeframe: #{timeframe}"
|
|
611
|
+
# Ensure all keys are strings or symbols (will be converted to strings in JSON)
|
|
612
|
+
metadata.each_key do |key|
|
|
613
|
+
unless key.is_a?(String) || key.is_a?(Symbol)
|
|
614
|
+
raise ValidationError, "Metadata keys must be strings or symbols"
|
|
615
|
+
end
|
|
373
616
|
end
|
|
374
617
|
end
|
|
375
618
|
|
|
376
|
-
def parse_natural_timeframe(text)
|
|
377
|
-
now = Time.now
|
|
378
|
-
|
|
379
|
-
case text.downcase
|
|
380
|
-
when /last week/
|
|
381
|
-
(now - 7 * 24 * 3600)..now
|
|
382
|
-
when /yesterday/
|
|
383
|
-
start_of_yesterday = Time.new(now.year, now.month, now.day - 1)
|
|
384
|
-
start_of_yesterday..(start_of_yesterday + 24 * 3600)
|
|
385
|
-
when /last (\d+) days?/
|
|
386
|
-
days = $1.to_i
|
|
387
|
-
(now - days * 24 * 3600)..now
|
|
388
|
-
when /this month/
|
|
389
|
-
start_of_month = Time.new(now.year, now.month, 1)
|
|
390
|
-
start_of_month..now
|
|
391
|
-
when /last month/
|
|
392
|
-
start_of_last_month = Time.new(now.year, now.month - 1, 1)
|
|
393
|
-
end_of_last_month = Time.new(now.year, now.month, 1) - 1
|
|
394
|
-
start_of_last_month..end_of_last_month
|
|
395
|
-
else
|
|
396
|
-
# Default to last 24 hours
|
|
397
|
-
(now - 24 * 3600)..now
|
|
398
|
-
end
|
|
399
|
-
end
|
|
400
619
|
end
|