htm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +227 -0
  3. data/.architecture/decisions/adrs/002-two-tier-memory-architecture.md +322 -0
  4. data/.architecture/decisions/adrs/003-ollama-default-embedding-provider.md +339 -0
  5. data/.architecture/decisions/adrs/004-multi-robot-shared-memory-hive-mind.md +374 -0
  6. data/.architecture/decisions/adrs/005-rag-based-retrieval-with-hybrid-search.md +443 -0
  7. data/.architecture/decisions/adrs/006-context-assembly-strategies.md +444 -0
  8. data/.architecture/decisions/adrs/007-working-memory-eviction-strategy.md +461 -0
  9. data/.architecture/decisions/adrs/008-robot-identification-system.md +550 -0
  10. data/.architecture/decisions/adrs/009-never-forget-explicit-deletion-only.md +570 -0
  11. data/.architecture/decisions/adrs/010-redis-working-memory-rejected.md +323 -0
  12. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +585 -0
  13. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +583 -0
  14. data/.architecture/decisions/adrs/013-activerecord-orm-and-many-to-many-tagging.md +299 -0
  15. data/.architecture/decisions/adrs/014-client-side-embedding-generation-workflow.md +569 -0
  16. data/.architecture/decisions/adrs/015-hierarchical-tag-ontology-and-llm-extraction.md +701 -0
  17. data/.architecture/decisions/adrs/016-async-embedding-and-tag-generation.md +694 -0
  18. data/.architecture/members.yml +144 -0
  19. data/.architecture/reviews/2025-10-29-llm-configuration-and-async-processing-review.md +1137 -0
  20. data/.architecture/reviews/initial-system-analysis.md +330 -0
  21. data/.envrc +32 -0
  22. data/.irbrc +145 -0
  23. data/CHANGELOG.md +150 -0
  24. data/COMMITS.md +196 -0
  25. data/LICENSE +21 -0
  26. data/README.md +1347 -0
  27. data/Rakefile +51 -0
  28. data/SETUP.md +268 -0
  29. data/config/database.yml +67 -0
  30. data/db/migrate/20250101000001_enable_extensions.rb +14 -0
  31. data/db/migrate/20250101000002_create_robots.rb +14 -0
  32. data/db/migrate/20250101000003_create_nodes.rb +42 -0
  33. data/db/migrate/20250101000005_create_tags.rb +38 -0
  34. data/db/migrate/20250101000007_add_node_vector_indexes.rb +30 -0
  35. data/db/schema.sql +473 -0
  36. data/db/seed_data/README.md +100 -0
  37. data/db/seed_data/presidents.md +136 -0
  38. data/db/seed_data/states.md +151 -0
  39. data/db/seeds.rb +208 -0
  40. data/dbdoc/README.md +173 -0
  41. data/dbdoc/public.node_stats.md +48 -0
  42. data/dbdoc/public.node_stats.svg +41 -0
  43. data/dbdoc/public.node_tags.md +40 -0
  44. data/dbdoc/public.node_tags.svg +112 -0
  45. data/dbdoc/public.nodes.md +54 -0
  46. data/dbdoc/public.nodes.svg +118 -0
  47. data/dbdoc/public.nodes_tags.md +39 -0
  48. data/dbdoc/public.nodes_tags.svg +112 -0
  49. data/dbdoc/public.ontology_structure.md +48 -0
  50. data/dbdoc/public.ontology_structure.svg +38 -0
  51. data/dbdoc/public.operations_log.md +42 -0
  52. data/dbdoc/public.operations_log.svg +130 -0
  53. data/dbdoc/public.relationships.md +39 -0
  54. data/dbdoc/public.relationships.svg +41 -0
  55. data/dbdoc/public.robot_activity.md +46 -0
  56. data/dbdoc/public.robot_activity.svg +35 -0
  57. data/dbdoc/public.robots.md +35 -0
  58. data/dbdoc/public.robots.svg +90 -0
  59. data/dbdoc/public.schema_migrations.md +29 -0
  60. data/dbdoc/public.schema_migrations.svg +26 -0
  61. data/dbdoc/public.tags.md +35 -0
  62. data/dbdoc/public.tags.svg +60 -0
  63. data/dbdoc/public.topic_relationships.md +45 -0
  64. data/dbdoc/public.topic_relationships.svg +32 -0
  65. data/dbdoc/schema.json +1437 -0
  66. data/dbdoc/schema.svg +154 -0
  67. data/docs/api/database.md +806 -0
  68. data/docs/api/embedding-service.md +532 -0
  69. data/docs/api/htm.md +797 -0
  70. data/docs/api/index.md +259 -0
  71. data/docs/api/long-term-memory.md +1096 -0
  72. data/docs/api/working-memory.md +665 -0
  73. data/docs/architecture/adrs/001-postgresql-timescaledb.md +314 -0
  74. data/docs/architecture/adrs/002-two-tier-memory.md +411 -0
  75. data/docs/architecture/adrs/003-ollama-embeddings.md +421 -0
  76. data/docs/architecture/adrs/004-hive-mind.md +437 -0
  77. data/docs/architecture/adrs/005-rag-retrieval.md +531 -0
  78. data/docs/architecture/adrs/006-context-assembly.md +496 -0
  79. data/docs/architecture/adrs/007-eviction-strategy.md +645 -0
  80. data/docs/architecture/adrs/008-robot-identification.md +625 -0
  81. data/docs/architecture/adrs/009-never-forget.md +648 -0
  82. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +323 -0
  83. data/docs/architecture/adrs/011-pgai-integration.md +494 -0
  84. data/docs/architecture/adrs/index.md +215 -0
  85. data/docs/architecture/hive-mind.md +736 -0
  86. data/docs/architecture/index.md +351 -0
  87. data/docs/architecture/overview.md +538 -0
  88. data/docs/architecture/two-tier-memory.md +873 -0
  89. data/docs/assets/css/custom.css +83 -0
  90. data/docs/assets/images/htm-core-components.svg +63 -0
  91. data/docs/assets/images/htm-database-schema.svg +93 -0
  92. data/docs/assets/images/htm-hive-mind-architecture.svg +125 -0
  93. data/docs/assets/images/htm-importance-scoring-framework.svg +83 -0
  94. data/docs/assets/images/htm-layered-architecture.svg +71 -0
  95. data/docs/assets/images/htm-long-term-memory-architecture.svg +115 -0
  96. data/docs/assets/images/htm-working-memory-architecture.svg +120 -0
  97. data/docs/assets/images/htm.jpg +0 -0
  98. data/docs/assets/images/htm_demo.gif +0 -0
  99. data/docs/assets/js/mathjax.js +18 -0
  100. data/docs/assets/videos/htm_video.mp4 +0 -0
  101. data/docs/database_rake_tasks.md +322 -0
  102. data/docs/development/contributing.md +787 -0
  103. data/docs/development/index.md +336 -0
  104. data/docs/development/schema.md +596 -0
  105. data/docs/development/setup.md +719 -0
  106. data/docs/development/testing.md +819 -0
  107. data/docs/guides/adding-memories.md +824 -0
  108. data/docs/guides/context-assembly.md +1009 -0
  109. data/docs/guides/getting-started.md +577 -0
  110. data/docs/guides/index.md +118 -0
  111. data/docs/guides/long-term-memory.md +941 -0
  112. data/docs/guides/multi-robot.md +866 -0
  113. data/docs/guides/recalling-memories.md +927 -0
  114. data/docs/guides/search-strategies.md +953 -0
  115. data/docs/guides/working-memory.md +717 -0
  116. data/docs/index.md +214 -0
  117. data/docs/installation.md +477 -0
  118. data/docs/multi_framework_support.md +519 -0
  119. data/docs/quick-start.md +655 -0
  120. data/docs/setup_local_database.md +302 -0
  121. data/docs/using_rake_tasks_in_your_app.md +383 -0
  122. data/examples/basic_usage.rb +93 -0
  123. data/examples/cli_app/README.md +317 -0
  124. data/examples/cli_app/htm_cli.rb +270 -0
  125. data/examples/custom_llm_configuration.rb +183 -0
  126. data/examples/example_app/Rakefile +71 -0
  127. data/examples/example_app/app.rb +206 -0
  128. data/examples/sinatra_app/Gemfile +21 -0
  129. data/examples/sinatra_app/app.rb +335 -0
  130. data/lib/htm/active_record_config.rb +113 -0
  131. data/lib/htm/configuration.rb +342 -0
  132. data/lib/htm/database.rb +594 -0
  133. data/lib/htm/embedding_service.rb +115 -0
  134. data/lib/htm/errors.rb +34 -0
  135. data/lib/htm/job_adapter.rb +154 -0
  136. data/lib/htm/jobs/generate_embedding_job.rb +65 -0
  137. data/lib/htm/jobs/generate_tags_job.rb +82 -0
  138. data/lib/htm/long_term_memory.rb +965 -0
  139. data/lib/htm/models/node.rb +109 -0
  140. data/lib/htm/models/node_tag.rb +33 -0
  141. data/lib/htm/models/robot.rb +52 -0
  142. data/lib/htm/models/tag.rb +76 -0
  143. data/lib/htm/railtie.rb +76 -0
  144. data/lib/htm/sinatra.rb +157 -0
  145. data/lib/htm/tag_service.rb +135 -0
  146. data/lib/htm/tasks.rb +38 -0
  147. data/lib/htm/version.rb +5 -0
  148. data/lib/htm/working_memory.rb +182 -0
  149. data/lib/htm.rb +400 -0
  150. data/lib/tasks/db.rake +19 -0
  151. data/lib/tasks/htm.rake +147 -0
  152. data/lib/tasks/jobs.rake +312 -0
  153. data/mkdocs.yml +190 -0
  154. data/scripts/install_local_database.sh +309 -0
  155. metadata +341 -0
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ # Working Memory - Token-limited active context for immediate LLM use
5
+ #
6
+ # WorkingMemory manages the active conversation context within token limits.
7
+ # When full, it evicts less important or older nodes back to long-term storage.
8
+ #
9
+ class WorkingMemory
10
+ attr_reader :max_tokens
11
+
12
+ # Initialize working memory
13
+ #
14
+ # @param max_tokens [Integer] Maximum tokens allowed in working memory
15
+ #
16
+ def initialize(max_tokens:)
17
+ @max_tokens = max_tokens
18
+ @nodes = {}
19
+ @access_order = []
20
+ end
21
+
22
+ # Add a node to working memory
23
+ #
24
+ # @param key [String] Node identifier
25
+ # @param value [String] Node content
26
+ # @param token_count [Integer] Number of tokens in this node
27
+ # @param access_count [Integer] Access count from long-term memory (default: 0)
28
+ # @param last_accessed [Time, nil] Last access time from long-term memory
29
+ # @param from_recall [Boolean] Whether this node was recalled from long-term memory
30
+ # @return [void]
31
+ #
32
+ def add(key, value, token_count:, access_count: 0, last_accessed: nil, from_recall: false)
33
+ @nodes[key] = {
34
+ value: value,
35
+ token_count: token_count,
36
+ access_count: access_count,
37
+ last_accessed: last_accessed || Time.now,
38
+ added_at: Time.now,
39
+ from_recall: from_recall
40
+ }
41
+ update_access(key)
42
+ end
43
+
44
+ # Remove a node from working memory
45
+ #
46
+ # @param key [String] Node identifier
47
+ # @return [void]
48
+ #
49
+ def remove(key)
50
+ @nodes.delete(key)
51
+ @access_order.delete(key)
52
+ end
53
+
54
+ # Check if there's space for a node
55
+ #
56
+ # @param token_count [Integer] Number of tokens needed
57
+ # @return [Boolean] true if space available
58
+ #
59
+ def has_space?(token_count)
60
+ current_tokens + token_count <= @max_tokens
61
+ end
62
+
63
+ # Evict nodes to make space
64
+ #
65
+ # Uses LFU + LRU strategy: Least Frequently Used + Least Recently Used
66
+ # Nodes with low access count and old timestamps are evicted first
67
+ #
68
+ # @param needed_tokens [Integer] Number of tokens needed
69
+ # @return [Array<Hash>] Evicted nodes
70
+ #
71
+ def evict_to_make_space(needed_tokens)
72
+ evicted = []
73
+ tokens_freed = 0
74
+
75
+ # Sort by access frequency + recency (lower score = more evictable)
76
+ candidates = @nodes.sort_by do |key, node|
77
+ access_frequency = node[:access_count] || 0
78
+ time_since_accessed = Time.now - (node[:last_accessed] || node[:added_at])
79
+
80
+ # Combined score: lower is more evictable
81
+ # Frequently accessed = higher score (keep)
82
+ # Recently accessed = higher score (keep)
83
+ access_score = Math.log(1 + access_frequency)
84
+ recency_score = 1.0 / (1 + time_since_accessed / 3600.0)
85
+
86
+ -(access_score + recency_score) # Negative for ascending sort
87
+ end
88
+
89
+ candidates.each do |key, node|
90
+ break if tokens_freed >= needed_tokens
91
+
92
+ evicted << { key: key, value: node[:value] }
93
+ tokens_freed += node[:token_count]
94
+ @nodes.delete(key)
95
+ @access_order.delete(key)
96
+ end
97
+
98
+ evicted
99
+ end
100
+
101
+ # Assemble context string for LLM
102
+ #
103
+ # @param strategy [Symbol] Assembly strategy (:recent, :frequent, :balanced)
104
+ # - :recent - Most recently accessed (LRU)
105
+ # - :frequent - Most frequently accessed (LFU)
106
+ # - :balanced - Combines frequency × recency
107
+ # @param max_tokens [Integer, nil] Optional token limit
108
+ # @return [String] Assembled context
109
+ #
110
+ def assemble_context(strategy:, max_tokens: nil)
111
+ max = max_tokens || @max_tokens
112
+
113
+ nodes = case strategy
114
+ when :recent
115
+ # Most recently accessed (LRU)
116
+ @access_order.reverse.map { |k| @nodes[k] }
117
+ when :frequent
118
+ # Most frequently accessed (LFU)
119
+ @nodes.sort_by { |k, v| -(v[:access_count] || 0) }.map(&:last)
120
+ when :balanced
121
+ # Combined frequency × recency
122
+ @nodes.sort_by { |k, v|
123
+ access_frequency = v[:access_count] || 0
124
+ time_since_accessed = Time.now - (v[:last_accessed] || v[:added_at])
125
+ recency_factor = 1.0 / (1 + time_since_accessed / 3600.0)
126
+
127
+ # Higher score = more relevant
128
+ -(Math.log(1 + access_frequency) * recency_factor)
129
+ }.map(&:last)
130
+ else
131
+ raise ArgumentError, "Unknown strategy: #{strategy}. Use :recent, :frequent, or :balanced"
132
+ end
133
+
134
+ # Build context up to token limit
135
+ context_parts = []
136
+ current_tokens = 0
137
+
138
+ nodes.each do |node|
139
+ break if current_tokens + node[:token_count] > max
140
+ context_parts << node[:value]
141
+ current_tokens += node[:token_count]
142
+ end
143
+
144
+ context_parts.join("\n\n")
145
+ end
146
+
147
+ # Get current token count
148
+ #
149
+ # @return [Integer] Total tokens in working memory
150
+ #
151
+ def token_count
152
+ @nodes.values.sum { |n| n[:token_count] }
153
+ end
154
+
155
+ # Get utilization percentage
156
+ #
157
+ # @return [Float] Percentage of working memory used
158
+ #
159
+ def utilization_percentage
160
+ (token_count.to_f / @max_tokens * 100).round(2)
161
+ end
162
+
163
+ # Get node count
164
+ #
165
+ # @return [Integer] Number of nodes in working memory
166
+ #
167
+ def node_count
168
+ @nodes.size
169
+ end
170
+
171
+ private
172
+
173
+ def current_tokens
174
+ token_count
175
+ end
176
+
177
+ def update_access(key)
178
+ @access_order.delete(key)
179
+ @access_order << key
180
+ end
181
+ end
182
+ end
data/lib/htm.rb ADDED
@@ -0,0 +1,400 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "htm/version"
4
+ require_relative "htm/errors"
5
+ require_relative "htm/configuration"
6
+ require_relative "htm/active_record_config"
7
+ require_relative "htm/database"
8
+ require_relative "htm/long_term_memory"
9
+ require_relative "htm/working_memory"
10
+ require_relative "htm/embedding_service"
11
+ require_relative "htm/tag_service"
12
+ require_relative "htm/job_adapter"
13
+ require_relative "htm/jobs/generate_embedding_job"
14
+ require_relative "htm/jobs/generate_tags_job"
15
+
16
+ require "pg"
17
+ require "securerandom"
18
+ require "uri"
19
+
20
+ # Load Rails integration if Rails is defined
21
+ require_relative "htm/railtie" if defined?(Rails::Railtie)
22
+
23
+ # HTM (Hierarchical Temporary Memory) - Intelligent memory management for LLM robots
24
+ #
25
+ # HTM implements a two-tier memory system:
26
+ # - Working Memory: Token-limited, active context for immediate LLM use
27
+ # - Long-term Memory: Durable PostgreSQL/TimescaleDB storage for permanent knowledge
28
+ #
29
+ # Key Features:
30
+ # - Never forgets unless explicitly told
31
+ # - RAG-based retrieval (temporal + semantic search)
32
+ # - Multi-robot "hive mind" - all robots share global memory
33
+ # - Relationship graphs for knowledge connections
34
+ # - Time-series optimized with TimescaleDB
35
+ #
36
+ # @example Basic usage
37
+ # htm = HTM.new(robot_name: "Code Helper")
38
+ #
39
+ # # Remember information
40
+ # htm.remember("We decided to use PostgreSQL for HTM", source: "architect")
41
+ #
42
+ # # Recall from the past
43
+ # memories = htm.recall(timeframe: "last week", topic: "PostgreSQL")
44
+ #
45
+ # # Create context for LLM
46
+ # context = htm.create_context(strategy: :balanced)
47
+ #
48
+ class HTM
49
+ attr_reader :robot_id, :robot_name, :working_memory, :long_term_memory
50
+
51
+ # Validation constants
52
+ MAX_KEY_LENGTH = 255
53
+ MAX_VALUE_LENGTH = 1_000_000 # 1MB
54
+ MAX_ARRAY_SIZE = 1000
55
+
56
+ VALID_RECALL_STRATEGIES = [:vector, :fulltext, :hybrid].freeze
57
+
58
+ # Initialize a new HTM instance
59
+ #
60
+ # @param working_memory_size [Integer] Maximum tokens for working memory (default: 128,000)
61
+ # @param robot_name [String] Human-readable name for this robot (auto-generated if not provided)
62
+ # @param db_config [Hash] Database configuration (uses ENV['HTM_DBURL'] if not provided)
63
+ # @param db_pool_size [Integer] Database connection pool size (default: 5)
64
+ # @param db_query_timeout [Integer] Database query timeout in milliseconds (default: 30000)
65
+ # @param db_cache_size [Integer] Number of database query results to cache (default: 1000, use 0 to disable)
66
+ # @param db_cache_ttl [Integer] Database cache TTL in seconds (default: 300)
67
+ #
68
+ def initialize(
69
+ working_memory_size: 128_000,
70
+ robot_name: nil,
71
+ db_config: nil,
72
+ db_pool_size: 5,
73
+ db_query_timeout: 30_000,
74
+ db_cache_size: 1000,
75
+ db_cache_ttl: 300
76
+ )
77
+ # Establish ActiveRecord connection if not already connected
78
+ HTM::ActiveRecordConfig.establish_connection! unless HTM::ActiveRecordConfig.connected?
79
+
80
+ @robot_name = robot_name || "robot_#{SecureRandom.uuid[0..7]}"
81
+
82
+ # Initialize components
83
+ @working_memory = HTM::WorkingMemory.new(max_tokens: working_memory_size)
84
+ @long_term_memory = HTM::LongTermMemory.new(
85
+ db_config || HTM::Database.default_config,
86
+ pool_size: db_pool_size,
87
+ query_timeout: db_query_timeout,
88
+ cache_size: db_cache_size,
89
+ cache_ttl: db_cache_ttl
90
+ )
91
+
92
+ # Register this robot in the database and get its integer ID
93
+ @robot_id = register_robot
94
+ end
95
+
96
+ # Remember new information
97
+ #
98
+ # Stores content in long-term memory and adds it to working memory.
99
+ # Embeddings and hierarchical tags are automatically extracted by LLM in the background.
100
+ #
101
+ # If content is empty, returns the ID of the most recent node without creating a duplicate.
102
+ # Nil values for content or source are converted to empty strings.
103
+ #
104
+ # @param content [String, nil] The information to remember
105
+ # @param source [String, nil] Where this content came from (defaults to empty string if not provided)
106
+ # @param tags [Array<String>] Manual tags to assign (optional, in addition to auto-extracted tags)
107
+ # @return [Integer] Database ID of the memory node
108
+ #
109
+ # @example Remember with source
110
+ # node_id = htm.remember("PostgreSQL is great for HTM", source: "user")
111
+ #
112
+ # @example Remember with manual tags
113
+ # node_id = htm.remember("Time-series data", source: "user", tags: ["database:timescaledb"])
114
+ #
115
+ def remember(content, source: "", tags: [])
116
+ # Convert nil to empty string
117
+ content = content.to_s
118
+ source = source.to_s
119
+
120
+ # If content is empty, return the last node ID without creating a new entry
121
+ if content.empty?
122
+ last_node = HTM::Models::Node.order(created_at: :desc).first
123
+ return last_node&.id || 0
124
+ end
125
+
126
+ # Calculate token count using configured counter
127
+ token_count = HTM.count_tokens(content)
128
+
129
+ # Store in long-term memory immediately (without embedding)
130
+ # Embedding and tags will be generated asynchronously
131
+ node_id = @long_term_memory.add(
132
+ content: content,
133
+ source: source,
134
+ token_count: token_count,
135
+ robot_id: @robot_id,
136
+ embedding: nil # Will be generated in background
137
+ )
138
+
139
+ HTM.logger.info "Node #{node_id} created for robot #{@robot_name} (#{token_count} tokens)"
140
+
141
+ # Enqueue background jobs for embedding and tag generation
142
+ # Both jobs run in parallel with equal priority
143
+ enqueue_embedding_job(node_id)
144
+ enqueue_tags_job(node_id, manual_tags: tags)
145
+
146
+ # Add to working memory (access_count starts at 0)
147
+ @working_memory.add(node_id, content, token_count: token_count, access_count: 0)
148
+
149
+ update_robot_activity
150
+ node_id
151
+ end
152
+
153
+ # Recall memories from a timeframe and topic
154
+ #
155
+ # @param topic [String] Topic to search for (required)
156
+ # @param timeframe [String, Range, nil] Time range (default: last 7 days). Examples: "last week", 7.days.ago..Time.now
157
+ # @param limit [Integer] Maximum number of nodes to retrieve (default: 20)
158
+ # @param strategy [Symbol] Search strategy (:vector, :fulltext, :hybrid) (default: :vector)
159
+ # @param with_relevance [Boolean] Include dynamic relevance scores (default: false)
160
+ # @param query_tags [Array<String>] Tags to boost relevance (default: [])
161
+ # @param raw [Boolean] Return full node hashes (true) or just content strings (false) (default: false)
162
+ # @return [Array<String>, Array<Hash>] Content strings (raw: false) or full node hashes (raw: true)
163
+ #
164
+ # @example Basic usage (returns content strings)
165
+ # memories = htm.recall("PostgreSQL")
166
+ # # => ["PostgreSQL is great for time-series data", "PostgreSQL with TimescaleDB..."]
167
+ #
168
+ # @example Get full node hashes
169
+ # nodes = htm.recall("PostgreSQL", raw: true)
170
+ # # => [{"id" => 1, "content" => "...", "created_at" => "...", ...}, ...]
171
+ #
172
+ # @example With timeframe
173
+ # memories = htm.recall("PostgreSQL", timeframe: "last week")
174
+ #
175
+ # @example With all options
176
+ # memories = htm.recall("PostgreSQL",
177
+ # timeframe: "last month",
178
+ # limit: 50,
179
+ # strategy: :hybrid,
180
+ # with_relevance: true,
181
+ # query_tags: ["database", "timeseries"])
182
+ #
183
+ def recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], raw: false)
184
+ # Use default timeframe if not provided (last 7 days)
185
+ timeframe ||= "last 7 days"
186
+
187
+ # Validate inputs
188
+ validate_timeframe!(timeframe)
189
+ validate_positive_integer!(limit, "limit")
190
+ validate_recall_strategy!(strategy)
191
+ validate_array!(query_tags, "query_tags")
192
+
193
+ parsed_timeframe = parse_timeframe(timeframe)
194
+
195
+ # Use relevance-based search if requested
196
+ if with_relevance
197
+ nodes = @long_term_memory.search_with_relevance(
198
+ timeframe: parsed_timeframe,
199
+ query: topic,
200
+ query_tags: query_tags,
201
+ limit: limit,
202
+ embedding_service: (strategy == :vector || strategy == :hybrid) ? HTM : nil
203
+ )
204
+ else
205
+ # Perform standard RAG-based retrieval
206
+ nodes = case strategy
207
+ when :vector
208
+ # Vector search using query embedding
209
+ @long_term_memory.search(
210
+ timeframe: parsed_timeframe,
211
+ query: topic,
212
+ limit: limit,
213
+ embedding_service: HTM
214
+ )
215
+ when :fulltext
216
+ @long_term_memory.search_fulltext(
217
+ timeframe: parsed_timeframe,
218
+ query: topic,
219
+ limit: limit
220
+ )
221
+ when :hybrid
222
+ # Hybrid search combining vector + fulltext
223
+ @long_term_memory.search_hybrid(
224
+ timeframe: parsed_timeframe,
225
+ query: topic,
226
+ limit: limit,
227
+ embedding_service: HTM
228
+ )
229
+ end
230
+ end
231
+
232
+ # Add to working memory (evict if needed)
233
+ nodes.each do |node|
234
+ add_to_working_memory(node)
235
+ end
236
+
237
+ update_robot_activity
238
+
239
+ # Return full nodes or just content based on raw parameter
240
+ raw ? nodes : nodes.map { |node| node['content'] }
241
+ end
242
+
243
+ # Forget a memory node (explicit deletion)
244
+ #
245
+ # @param key [String] Key of the node to delete
246
+ # @param confirm [Symbol] Must be :confirmed to proceed
247
+ # @return [Boolean] true if deleted
248
+ # @raise [ArgumentError] if confirmation not provided
249
+ # @raise [HTM::NotFoundError] if node doesn't exist
250
+ #
251
+ def forget(node_id, confirm: false)
252
+ # Validate inputs
253
+ raise ArgumentError, "node_id cannot be nil" if node_id.nil?
254
+ raise ArgumentError, "Must pass confirm: :confirmed to delete" unless confirm == :confirmed
255
+
256
+ # Verify node exists
257
+ unless @long_term_memory.exists?(node_id)
258
+ raise HTM::NotFoundError, "Node not found: #{node_id}"
259
+ end
260
+
261
+ # Delete the node and remove from working memory
262
+ @long_term_memory.delete(node_id)
263
+ @working_memory.remove(node_id)
264
+
265
+ update_robot_activity
266
+ true
267
+ end
268
+
269
+ private
270
+
271
+ def register_robot
272
+ @long_term_memory.register_robot(@robot_name)
273
+ end
274
+
275
+ def update_robot_activity
276
+ @long_term_memory.update_robot_activity(@robot_id)
277
+ end
278
+
279
+ def enqueue_embedding_job(node_id)
280
+ # Enqueue embedding generation using configured job backend
281
+ # Job will use HTM.embed which delegates to configured embedding_generator
282
+ HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: node_id)
283
+ rescue StandardError => e
284
+ HTM.logger.error "Failed to enqueue embedding job for node #{node_id}: #{e.message}"
285
+ end
286
+
287
+ def enqueue_tags_job(node_id, manual_tags: [])
288
+ # Add manual tags immediately if provided
289
+ if manual_tags.any?
290
+ manual_tags.each do |tag_name|
291
+ tag = HTM::Models::Tag.find_or_create_by!(name: tag_name)
292
+ HTM::Models::NodeTag.find_or_create_by!(node_id: node_id, tag_id: tag.id)
293
+ end
294
+ HTM.logger.debug "Added #{manual_tags.length} manual tags to node #{node_id}"
295
+ end
296
+
297
+ # Enqueue tag generation using configured job backend
298
+ # Job will use HTM.extract_tags which delegates to configured tag_extractor
299
+ HTM::JobAdapter.enqueue(HTM::Jobs::GenerateTagsJob, node_id: node_id)
300
+ rescue StandardError => e
301
+ HTM.logger.error "Failed to enqueue tags job for node #{node_id}: #{e.message}"
302
+ end
303
+
304
+ def add_to_working_memory(node)
305
+ # Convert token_count to integer (may be String from database/cache)
306
+ token_count = node['token_count'].to_i
307
+ access_count = (node['access_count'] || 0).to_i
308
+ last_accessed = node['last_accessed'] ? Time.parse(node['last_accessed'].to_s) : nil
309
+
310
+ if @working_memory.has_space?(token_count)
311
+ @working_memory.add(
312
+ node['id'],
313
+ node['content'],
314
+ token_count: token_count,
315
+ access_count: access_count,
316
+ last_accessed: last_accessed,
317
+ from_recall: true
318
+ )
319
+ else
320
+ # Evict to make space
321
+ evicted = @working_memory.evict_to_make_space(token_count)
322
+ evicted_keys = evicted.map { |n| n[:key] }
323
+ @long_term_memory.mark_evicted(evicted_keys) if evicted_keys.any?
324
+
325
+ # Now add the recalled node
326
+ @working_memory.add(
327
+ node['id'],
328
+ node['content'],
329
+ token_count: token_count,
330
+ access_count: access_count,
331
+ last_accessed: last_accessed,
332
+ from_recall: true
333
+ )
334
+ end
335
+ end
336
+
337
+ private
338
+
339
+ # Validation helper methods
340
+
341
+ def validate_array!(array, name, max_size: MAX_ARRAY_SIZE)
342
+ raise ValidationError, "#{name} must be an Array" unless array.is_a?(Array)
343
+ raise ValidationError, "#{name} too large (max #{max_size} items)" if array.size > max_size
344
+ end
345
+
346
+ def validate_recall_strategy!(strategy)
347
+ raise ValidationError, "Strategy must be a Symbol" unless strategy.is_a?(Symbol)
348
+ unless VALID_RECALL_STRATEGIES.include?(strategy)
349
+ raise ValidationError, "Invalid strategy: #{strategy}. Must be one of #{VALID_RECALL_STRATEGIES.join(', ')}"
350
+ end
351
+ end
352
+
353
+
354
+ def validate_timeframe!(timeframe)
355
+ return if timeframe.is_a?(Range) || timeframe.is_a?(String)
356
+ raise ValidationError, "Timeframe must be a Range or String, got #{timeframe.class}"
357
+ end
358
+
359
+ def validate_positive_integer!(value, name)
360
+ raise ValidationError, "#{name} must be a positive Integer" unless value.is_a?(Integer) && value > 0
361
+ end
362
+
363
+ # Timeframe parsing methods
364
+
365
+ def parse_timeframe(timeframe)
366
+ case timeframe
367
+ when Range
368
+ timeframe
369
+ when String
370
+ parse_natural_timeframe(timeframe)
371
+ else
372
+ raise ArgumentError, "Invalid timeframe: #{timeframe}"
373
+ end
374
+ end
375
+
376
+ def parse_natural_timeframe(text)
377
+ now = Time.now
378
+
379
+ case text.downcase
380
+ when /last week/
381
+ (now - 7 * 24 * 3600)..now
382
+ when /yesterday/
383
+ start_of_yesterday = Time.new(now.year, now.month, now.day - 1)
384
+ start_of_yesterday..(start_of_yesterday + 24 * 3600)
385
+ when /last (\d+) days?/
386
+ days = $1.to_i
387
+ (now - days * 24 * 3600)..now
388
+ when /this month/
389
+ start_of_month = Time.new(now.year, now.month, 1)
390
+ start_of_month..now
391
+ when /last month/
392
+ start_of_last_month = Time.new(now.year, now.month - 1, 1)
393
+ end_of_last_month = Time.new(now.year, now.month, 1) - 1
394
+ start_of_last_month..end_of_last_month
395
+ else
396
+ # Default to last 24 hours
397
+ (now - 24 * 3600)..now
398
+ end
399
+ end
400
+ end
data/lib/tasks/db.rake ADDED
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ namespace :db do
4
+ desc "Run database migrations"
5
+ task :migrate do
6
+ require_relative '../htm'
7
+
8
+ HTM::Database.migrate
9
+ puts "Database migrations completed successfully"
10
+ end
11
+
12
+ desc "Setup database schema (includes migrations)"
13
+ task :setup do
14
+ require_relative '../htm'
15
+
16
+ HTM::Database.setup
17
+ puts "Database setup completed successfully"
18
+ end
19
+ end