htm 0.0.30 → 0.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.irbrc +2 -3
- data/.rubocop.yml +184 -0
- data/CHANGELOG.md +46 -0
- data/README.md +2 -0
- data/Rakefile +93 -12
- data/db/migrate/00008_create_node_relationships.rb +54 -0
- data/db/migrate/00009_fix_node_relationships_column_types.rb +17 -0
- data/db/schema.sql +124 -1
- data/docs/api/database.md +35 -57
- data/docs/api/embedding-service.md +1 -1
- data/docs/api/index.md +26 -15
- data/docs/api/working-memory.md +8 -8
- data/docs/architecture/index.md +5 -7
- data/docs/architecture/overview.md +5 -8
- data/docs/assets/images/htm-architecture-overview.svg +1 -1
- data/docs/assets/images/htm-context-assembly-flow.svg +2 -2
- data/docs/assets/images/htm-layered-architecture.svg +3 -3
- data/docs/assets/images/two-tier-memory-architecture.svg +1 -1
- data/docs/database/README.md +1 -0
- data/docs/database_rake_tasks.md +20 -28
- data/docs/development/contributing.md +5 -5
- data/docs/development/index.md +4 -7
- data/docs/development/schema.md +71 -1
- data/docs/development/setup.md +40 -82
- data/docs/development/testing.md +1 -1
- data/docs/examples/file-loading.md +4 -4
- data/docs/examples/mcp-client.md +1 -1
- data/docs/getting-started/quick-start.md +4 -4
- data/docs/guides/adding-memories.md +14 -1
- data/docs/guides/configuration.md +5 -5
- data/docs/guides/context-assembly.md +4 -4
- data/docs/guides/file-loading.md +12 -12
- data/docs/guides/getting-started.md +2 -2
- data/docs/guides/long-term-memory.md +7 -27
- data/docs/guides/propositions.md +20 -19
- data/docs/guides/recalling-memories.md +5 -5
- data/docs/guides/tags.md +18 -13
- data/docs/multi_framework_support.md +1 -1
- data/docs/robots/hive-mind.md +1 -1
- data/docs/robots/multi-robot.md +2 -2
- data/docs/robots/robot-groups.md +1 -1
- data/docs/robots/two-tier-memory.md +72 -94
- data/docs/setup_local_database.md +8 -54
- data/docs/using_rake_tasks_in_your_app.md +6 -6
- data/examples/01_basic_usage.rb +1 -0
- data/examples/03_custom_llm_configuration.rb +1 -0
- data/examples/04_file_loader_usage.rb +1 -0
- data/examples/05_timeframe_demo.rb +1 -0
- data/examples/06_example_app/app.rb +1 -0
- data/examples/07_cli_app/htm_cli.rb +1 -0
- data/examples/09_mcp_client.rb +1 -0
- data/examples/10_telemetry/demo.rb +1 -0
- data/examples/11_robot_groups/multi_process.rb +1 -0
- data/examples/11_robot_groups/same_process.rb +1 -0
- data/examples/12_rails_app/.envrc +12 -0
- data/examples/12_rails_app/Gemfile +8 -3
- data/examples/12_rails_app/Gemfile.lock +94 -89
- data/examples/12_rails_app/README.md +70 -19
- data/examples/12_rails_app/app/controllers/application_controller.rb +6 -0
- data/examples/12_rails_app/app/controllers/chats_controller.rb +305 -0
- data/examples/12_rails_app/app/controllers/dashboard_controller.rb +3 -0
- data/examples/12_rails_app/app/controllers/files_controller.rb +17 -2
- data/examples/12_rails_app/app/controllers/home_controller.rb +8 -0
- data/examples/12_rails_app/app/controllers/memories_controller.rb +9 -4
- data/examples/12_rails_app/app/controllers/messages_controller.rb +214 -0
- data/examples/12_rails_app/app/controllers/robots_controller.rb +11 -1
- data/examples/12_rails_app/app/controllers/tags_controller.rb +14 -1
- data/examples/12_rails_app/app/javascript/application.js +1 -1
- data/examples/12_rails_app/app/models/application_record.rb +5 -0
- data/examples/12_rails_app/app/models/chat.rb +36 -0
- data/examples/12_rails_app/app/models/message.rb +5 -0
- data/examples/12_rails_app/app/models/model.rb +5 -0
- data/examples/12_rails_app/app/models/tool_call.rb +5 -0
- data/examples/12_rails_app/app/views/chats/index.html.erb +61 -0
- data/examples/12_rails_app/app/views/chats/show.html.erb +213 -0
- data/examples/12_rails_app/app/views/dashboard/index.html.erb +3 -0
- data/examples/12_rails_app/app/views/files/index.html.erb +10 -5
- data/examples/12_rails_app/app/views/files/new.html.erb +4 -2
- data/examples/12_rails_app/app/views/files/show.html.erb +19 -3
- data/examples/12_rails_app/app/views/home/index.html.erb +45 -0
- data/examples/12_rails_app/app/views/layouts/application.html.erb +20 -18
- data/examples/12_rails_app/app/views/memories/_memory_card.html.erb +1 -1
- data/examples/12_rails_app/app/views/memories/deleted.html.erb +3 -1
- data/examples/12_rails_app/app/views/memories/edit.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/memories/show.html.erb +4 -2
- data/examples/12_rails_app/app/views/messages/_message.html.erb +20 -0
- data/examples/12_rails_app/app/views/robots/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/new.html.erb +2 -0
- data/examples/12_rails_app/app/views/robots/show.html.erb +2 -0
- data/examples/12_rails_app/app/views/search/index.html.erb +59 -8
- data/examples/12_rails_app/app/views/shared/_navbar.html.erb +75 -29
- data/examples/12_rails_app/app/views/tags/index.html.erb +2 -0
- data/examples/12_rails_app/app/views/tags/show.html.erb +3 -1
- data/examples/12_rails_app/config/application.rb +1 -1
- data/examples/12_rails_app/config/database.yml +9 -5
- data/examples/12_rails_app/config/importmap.rb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +9 -2
- data/examples/12_rails_app/config/initializers/ruby_llm.rb +33 -0
- data/examples/12_rails_app/config/routes.rb +39 -23
- data/examples/12_rails_app/db/migrate/20250124000001_create_ruby_llm_tables.rb +34 -0
- data/examples/12_rails_app/db/migrate/20250124000002_create_models_table.rb +28 -0
- data/examples/12_rails_app/db/schema.rb +67 -0
- data/examples/examples_helper.rb +25 -0
- data/lib/htm/circuit_breaker.rb +5 -6
- data/lib/htm/config/builder.rb +12 -12
- data/lib/htm/config/database.rb +21 -27
- data/lib/htm/config/defaults.yml +25 -13
- data/lib/htm/config/validator.rb +12 -18
- data/lib/htm/config.rb +93 -173
- data/lib/htm/database.rb +193 -199
- data/lib/htm/embedding_service.rb +4 -9
- data/lib/htm/integrations/sinatra.rb +7 -7
- data/lib/htm/job_adapter.rb +14 -21
- data/lib/htm/jobs/generate_embedding_job.rb +28 -44
- data/lib/htm/jobs/generate_propositions_job.rb +29 -55
- data/lib/htm/jobs/generate_relationships_job.rb +137 -0
- data/lib/htm/jobs/generate_tags_job.rb +45 -67
- data/lib/htm/loaders/markdown_loader.rb +65 -112
- data/lib/htm/long_term_memory/fulltext_search.rb +1 -1
- data/lib/htm/long_term_memory/hybrid_search.rb +300 -128
- data/lib/htm/long_term_memory/node_operations.rb +2 -2
- data/lib/htm/long_term_memory/relevance_scorer.rb +100 -68
- data/lib/htm/long_term_memory/tag_operations.rb +87 -120
- data/lib/htm/long_term_memory/vector_search.rb +1 -1
- data/lib/htm/long_term_memory.rb +2 -1
- data/lib/htm/mcp/cli.rb +59 -58
- data/lib/htm/mcp/server.rb +5 -6
- data/lib/htm/mcp/tools.rb +30 -36
- data/lib/htm/migration.rb +10 -10
- data/lib/htm/models/node.rb +2 -3
- data/lib/htm/models/node_relationship.rb +72 -0
- data/lib/htm/models/node_tag.rb +2 -2
- data/lib/htm/models/robot_node.rb +2 -2
- data/lib/htm/models/tag.rb +41 -28
- data/lib/htm/observability.rb +45 -51
- data/lib/htm/proposition_service.rb +3 -7
- data/lib/htm/query_cache.rb +13 -15
- data/lib/htm/railtie.rb +1 -2
- data/lib/htm/robot_group.rb +9 -9
- data/lib/htm/sequel_config.rb +1 -0
- data/lib/htm/sql_builder.rb +1 -1
- data/lib/htm/tag_service.rb +2 -6
- data/lib/htm/timeframe.rb +4 -5
- data/lib/htm/timeframe_extractor.rb +42 -83
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +112 -115
- data/lib/htm/working_memory.rb +21 -26
- data/lib/htm.rb +103 -116
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +14 -13
- data/lib/tasks/files.rake +5 -12
- data/lib/tasks/htm.rake +70 -71
- data/lib/tasks/jobs.rake +41 -47
- data/lib/tasks/tags.rake +3 -8
- metadata +28 -106
- data/lib/htm/config/section.rb +0 -74
- data/lib/htm/loaders/defaults_loader.rb +0 -166
- data/lib/htm/loaders/xdg_config_loader.rb +0 -116
|
@@ -48,83 +48,28 @@ class HTM
|
|
|
48
48
|
# - :skipped [Boolean] True if file was unchanged and skipped
|
|
49
49
|
#
|
|
50
50
|
def load_file(path, force: false)
|
|
51
|
-
expanded_path =
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
unless File.file?(expanded_path)
|
|
58
|
-
raise ArgumentError, "Not a file: #{path}"
|
|
59
|
-
end
|
|
51
|
+
expanded_path = validate_file_path!(path)
|
|
52
|
+
content = read_file_content(expanded_path, path)
|
|
53
|
+
stat = File.stat(expanded_path)
|
|
54
|
+
file_hash = Digest::SHA256.hexdigest(content)
|
|
60
55
|
|
|
61
|
-
# Validate file size before reading
|
|
62
|
-
file_size = File.size(expanded_path)
|
|
63
|
-
if file_size > MAX_FILE_SIZE
|
|
64
|
-
raise ArgumentError, "File too large: #{path} (#{file_size} bytes). Maximum size is #{MAX_FILE_SIZE} bytes (10 MB)."
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Read file with encoding detection and fallback
|
|
68
|
-
# Try UTF-8 first, then fall back to binary if encoding errors occur
|
|
69
|
-
begin
|
|
70
|
-
content = File.read(expanded_path, encoding: 'UTF-8')
|
|
71
|
-
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
|
72
|
-
# Try reading as binary and force encoding to UTF-8, replacing invalid chars
|
|
73
|
-
content = File.read(expanded_path, encoding: 'BINARY')
|
|
74
|
-
content = content.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
|
75
|
-
HTM.logger.warn "File #{path} has non-UTF-8 encoding, some characters may be replaced"
|
|
76
|
-
end
|
|
77
|
-
stat = File.stat(expanded_path)
|
|
78
|
-
file_hash = Digest::SHA256.hexdigest(content)
|
|
79
|
-
|
|
80
|
-
# Find or create source record
|
|
81
56
|
source = HTM::Models::FileSource.first(file_path: expanded_path)
|
|
82
57
|
is_new = source.nil?
|
|
83
58
|
source ||= HTM::Models::FileSource.new(file_path: expanded_path)
|
|
84
59
|
|
|
85
|
-
# Check if sync needed
|
|
86
60
|
unless force || is_new || source.needs_sync?(stat.mtime)
|
|
87
|
-
return {
|
|
88
|
-
file_path: expanded_path,
|
|
89
|
-
chunks_created: 0,
|
|
90
|
-
chunks_updated: 0,
|
|
91
|
-
chunks_deleted: 0,
|
|
92
|
-
skipped: true
|
|
93
|
-
}
|
|
61
|
+
return { file_path: expanded_path, chunks_created: 0, chunks_updated: 0, chunks_deleted: 0, skipped: true }
|
|
94
62
|
end
|
|
95
63
|
|
|
96
|
-
# Parse frontmatter and body
|
|
97
64
|
frontmatter, body = extract_frontmatter(content)
|
|
98
|
-
|
|
99
|
-
# Chunk the body with metadata (includes cursor positions)
|
|
100
65
|
chunks = @chunker.chunk_with_metadata(body)
|
|
66
|
+
prepend_frontmatter_to_chunk(frontmatter, chunks)
|
|
101
67
|
|
|
102
|
-
# Prepend frontmatter to first chunk if present
|
|
103
|
-
if frontmatter.any? && chunks.any?
|
|
104
|
-
frontmatter_yaml = YAML.dump(frontmatter).sub(/\A---\n/, "---\n")
|
|
105
|
-
chunks[0][:text] = "#{frontmatter_yaml}---\n\n#{chunks[0][:text]}"
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
# Save source first (need ID for node association)
|
|
109
68
|
source.save if is_new
|
|
110
|
-
|
|
111
|
-
# Sync chunks to database (chunks now include cursor positions)
|
|
112
69
|
result = sync_chunks(source, chunks)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
source.
|
|
116
|
-
file_hash: file_hash,
|
|
117
|
-
mtime: stat.mtime,
|
|
118
|
-
file_size: stat.size,
|
|
119
|
-
frontmatter: frontmatter,
|
|
120
|
-
last_synced_at: Time.now
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
result.merge(
|
|
124
|
-
file_path: expanded_path,
|
|
125
|
-
file_source_id: source.id,
|
|
126
|
-
skipped: false
|
|
127
|
-
)
|
|
70
|
+
source.update(file_hash: file_hash, mtime: stat.mtime, file_size: stat.size,
|
|
71
|
+
frontmatter: frontmatter, last_synced_at: Time.now)
|
|
72
|
+
result.merge(file_path: expanded_path, file_source_id: source.id, skipped: false)
|
|
128
73
|
end
|
|
129
74
|
|
|
130
75
|
# Load all matching files from a directory
|
|
@@ -148,16 +93,60 @@ class HTM
|
|
|
148
93
|
files = Dir.glob(File.join(expanded_path, pattern))
|
|
149
94
|
|
|
150
95
|
files.map do |file_path|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
{ file_path: file_path, error: e.message, skipped: false }
|
|
155
|
-
end
|
|
96
|
+
load_file(file_path, force: force)
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
{ file_path: file_path, error: e.message, skipped: false }
|
|
156
99
|
end
|
|
157
100
|
end
|
|
158
101
|
|
|
159
102
|
private
|
|
160
103
|
|
|
104
|
+
def validate_file_path!(path)
|
|
105
|
+
expanded = File.expand_path(path)
|
|
106
|
+
raise ArgumentError, "File not found: #{path}" unless File.exist?(expanded)
|
|
107
|
+
raise ArgumentError, "Not a file: #{path}" unless File.file?(expanded)
|
|
108
|
+
size = File.size(expanded)
|
|
109
|
+
if size > MAX_FILE_SIZE
|
|
110
|
+
raise ArgumentError, "File too large: #{path} (#{size} bytes). Maximum size is #{MAX_FILE_SIZE} bytes (10 MB)."
|
|
111
|
+
end
|
|
112
|
+
expanded
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def read_file_content(expanded_path, path)
|
|
116
|
+
File.read(expanded_path, encoding: 'UTF-8')
|
|
117
|
+
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
|
118
|
+
content = File.read(expanded_path, encoding: 'BINARY')
|
|
119
|
+
HTM.logger.warn "File #{path} has non-UTF-8 encoding, some characters may be replaced"
|
|
120
|
+
content.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def prepend_frontmatter_to_chunk(frontmatter, chunks)
|
|
124
|
+
return unless frontmatter.any? && chunks.any?
|
|
125
|
+
frontmatter_yaml = YAML.dump(frontmatter).sub(/\A---\n/, "---\n")
|
|
126
|
+
chunks[0][:text] = "#{frontmatter_yaml}---\n\n#{chunks[0][:text]}"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def update_existing_chunk(node, position, chunk_cursor)
|
|
130
|
+
changes = {}
|
|
131
|
+
changes[:chunk_position] = position if node.chunk_position != position
|
|
132
|
+
changes[:deleted_at] = nil if node.deleted_at
|
|
133
|
+
current_cursor = node.metadata&.dig('cursor')
|
|
134
|
+
changes[:metadata] = (node.metadata || {}).merge('cursor' => chunk_cursor) if current_cursor != chunk_cursor
|
|
135
|
+
return false unless changes.any?
|
|
136
|
+
node.update(changes)
|
|
137
|
+
true
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def soft_delete_removed_chunks(existing_by_hash, matched_hashes)
|
|
141
|
+
count = 0
|
|
142
|
+
existing_by_hash.each_value do |node|
|
|
143
|
+
next if matched_hashes.include?(node.content_hash) || node.deleted_at
|
|
144
|
+
node.soft_delete!
|
|
145
|
+
count += 1
|
|
146
|
+
end
|
|
147
|
+
count
|
|
148
|
+
end
|
|
149
|
+
|
|
161
150
|
# Extract YAML frontmatter from content
|
|
162
151
|
#
|
|
163
152
|
# @param content [String] File content
|
|
@@ -194,60 +183,24 @@ class HTM
|
|
|
194
183
|
def sync_chunks(source, chunks)
|
|
195
184
|
created = 0
|
|
196
185
|
updated = 0
|
|
197
|
-
|
|
186
|
+
existing_nodes = source.id ? HTM::Models::Node.with_deleted.where(source_id: source.id).all : []
|
|
187
|
+
existing_by_hash = existing_nodes.to_h { |n| [n.content_hash, n] }
|
|
188
|
+
matched_hashes = Set.new
|
|
198
189
|
|
|
199
|
-
# Get existing nodes for this source (include soft-deleted for potential restore)
|
|
200
|
-
existing_nodes = source.id ?
|
|
201
|
-
HTM::Models::Node.with_deleted.where(source_id: source.id).all : []
|
|
202
|
-
existing_by_hash = existing_nodes.each_with_object({}) { |n, h| h[n.content_hash] = n }
|
|
203
|
-
|
|
204
|
-
# Track which existing nodes we've matched
|
|
205
|
-
matched_hashes = Set.new
|
|
206
|
-
|
|
207
|
-
# Process each new chunk (chunks are now Hashes with :text and :cursor)
|
|
208
190
|
chunks.each_with_index do |chunk_data, position|
|
|
209
191
|
chunk_content = chunk_data[:text].strip
|
|
210
|
-
chunk_cursor = chunk_data[:cursor]
|
|
211
192
|
next if chunk_content.empty?
|
|
212
193
|
|
|
213
194
|
chunk_hash = HTM::Models::Node.generate_content_hash(chunk_content)
|
|
214
|
-
|
|
215
195
|
if existing_by_hash[chunk_hash]
|
|
216
|
-
# Chunk exists - update position/cursor if needed, restore if soft-deleted
|
|
217
|
-
node = existing_by_hash[chunk_hash]
|
|
218
196
|
matched_hashes << chunk_hash
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
changes[:deleted_at] = nil if node.deleted_at
|
|
223
|
-
|
|
224
|
-
# Update cursor in metadata if changed
|
|
225
|
-
current_cursor = node.metadata&.dig('cursor')
|
|
226
|
-
if current_cursor != chunk_cursor
|
|
227
|
-
new_metadata = (node.metadata || {}).merge('cursor' => chunk_cursor)
|
|
228
|
-
changes[:metadata] = new_metadata
|
|
229
|
-
end
|
|
230
|
-
|
|
231
|
-
if changes.any?
|
|
232
|
-
node.update(changes)
|
|
233
|
-
updated += 1
|
|
234
|
-
end
|
|
235
|
-
else
|
|
236
|
-
# New chunk - create node with cursor in metadata
|
|
237
|
-
node = create_chunk_node(source, chunk_content, position, cursor: chunk_cursor)
|
|
238
|
-
created += 1 if node
|
|
197
|
+
updated += 1 if update_existing_chunk(existing_by_hash[chunk_hash], position, chunk_data[:cursor])
|
|
198
|
+
elsif create_chunk_node(source, chunk_content, position, cursor: chunk_data[:cursor])
|
|
199
|
+
created += 1
|
|
239
200
|
end
|
|
240
201
|
end
|
|
241
202
|
|
|
242
|
-
|
|
243
|
-
existing_by_hash.each do |hash, node|
|
|
244
|
-
next if matched_hashes.include?(hash)
|
|
245
|
-
next if node.deleted_at # Already deleted
|
|
246
|
-
|
|
247
|
-
node.soft_delete!
|
|
248
|
-
deleted += 1
|
|
249
|
-
end
|
|
250
|
-
|
|
203
|
+
deleted = soft_delete_removed_chunks(existing_by_hash, matched_hashes)
|
|
251
204
|
{ chunks_created: created, chunks_updated: updated, chunks_deleted: deleted }
|
|
252
205
|
end
|
|
253
206
|
|
|
@@ -35,7 +35,7 @@ class HTM
|
|
|
35
35
|
#
|
|
36
36
|
def search_fulltext(timeframe:, query:, limit:, metadata: {})
|
|
37
37
|
# Enforce limit to prevent DoS
|
|
38
|
-
safe_limit =
|
|
38
|
+
safe_limit = limit.to_i.clamp(1, MAX_FULLTEXT_LIMIT)
|
|
39
39
|
|
|
40
40
|
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
41
41
|
result = @cache.fetch(:fulltext, timeframe, query, safe_limit, metadata) do
|