htm 0.0.2 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.irbrc +283 -80
- data/.tbls.yml +2 -1
- data/CHANGELOG.md +294 -26
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +172 -1
- data/docs/api/database.md +1 -2
- data/docs/api/htm.md +197 -2
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/database/README.md +128 -128
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/docs/database/public.node_tags.md +4 -4
- data/docs/database/public.node_tags.svg +212 -79
- data/docs/database/public.nodes.md +22 -12
- data/docs/database/public.nodes.svg +246 -127
- data/docs/database/public.robot_nodes.md +11 -9
- data/docs/database/public.robot_nodes.svg +220 -98
- data/docs/database/public.robots.md +2 -2
- data/docs/database/public.robots.svg +136 -81
- data/docs/database/public.tags.md +3 -3
- data/docs/database/public.tags.svg +118 -39
- data/docs/database/schema.json +850 -771
- data/docs/database/schema.svg +256 -197
- data/docs/development/schema.md +67 -2
- data/docs/guides/adding-memories.md +93 -7
- data/docs/guides/recalling-memories.md +36 -1
- data/examples/README.md +280 -0
- data/examples/cli_app/htm_cli.rb +65 -5
- data/examples/cli_app/temp.log +93 -0
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +1 -1
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +59 -13
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +460 -343
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +80 -5
- data/lib/htm/models/robot.rb +24 -1
- data/lib/htm/models/robot_node.rb +1 -0
- data/lib/htm/models/tag.rb +254 -4
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +26 -1
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +328 -130
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +158 -3
- data/lib/tasks/jobs.rake +3 -9
- data/lib/tasks/tags.rake +166 -6
- data/mkdocs.yml +36 -1
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +125 -15
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
- data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
- data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
- data/db/migrate/20250126000001_create_working_memories.rb +0 -19
- data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
- data/docs/database/public.working_memories.md +0 -40
- data/docs/database/public.working_memories.svg +0 -112
- data/lib/htm/models/working_memory_entry.rb +0 -88
data/lib/tasks/doc.rake
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
namespace :htm do
|
|
4
|
+
namespace :doc do
|
|
5
|
+
desc "Build YARD API documentation (markdown format for MkDocs)"
|
|
6
|
+
task :yard do
|
|
7
|
+
require "fileutils"
|
|
8
|
+
|
|
9
|
+
output_dir = "docs/api/yard"
|
|
10
|
+
|
|
11
|
+
puts "Building YARD documentation (markdown format)..."
|
|
12
|
+
puts "Output directory: #{output_dir}"
|
|
13
|
+
puts
|
|
14
|
+
|
|
15
|
+
# Clean previous output
|
|
16
|
+
FileUtils.rm_rf(output_dir) if Dir.exist?(output_dir)
|
|
17
|
+
FileUtils.mkdir_p(output_dir)
|
|
18
|
+
|
|
19
|
+
# Build YARD documentation in markdown format
|
|
20
|
+
# Requires yard-markdown plugin to be installed
|
|
21
|
+
options = [
|
|
22
|
+
"--plugin markdown",
|
|
23
|
+
"--output-dir #{output_dir}",
|
|
24
|
+
"--format markdown",
|
|
25
|
+
"--title 'HTM - Hierarchical Temporary Memory API'",
|
|
26
|
+
"--markup markdown",
|
|
27
|
+
"--charset utf-8",
|
|
28
|
+
"--protected",
|
|
29
|
+
"--no-private",
|
|
30
|
+
"lib/**/*.rb",
|
|
31
|
+
"-",
|
|
32
|
+
"README.md",
|
|
33
|
+
"CHANGELOG.md"
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
system("yard doc #{options.join(' ')}")
|
|
37
|
+
|
|
38
|
+
if $?.success?
|
|
39
|
+
# Post-process markdown files for MkDocs compatibility
|
|
40
|
+
fix_yard_anchors_for_mkdocs(output_dir)
|
|
41
|
+
|
|
42
|
+
# Create MkDocs index page for the YARD docs
|
|
43
|
+
create_yard_index_page(output_dir)
|
|
44
|
+
|
|
45
|
+
puts
|
|
46
|
+
puts "Documentation built successfully!"
|
|
47
|
+
puts "YARD docs: #{output_dir}/"
|
|
48
|
+
puts "MkDocs index: docs/api/yard-reference.md"
|
|
49
|
+
else
|
|
50
|
+
puts
|
|
51
|
+
puts "Failed to build documentation. Make sure YARD and yard-markdown are installed:"
|
|
52
|
+
puts " gem install yard yard-markdown"
|
|
53
|
+
exit 1
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Convert YARD anchor format to MkDocs-compatible format
|
|
58
|
+
# YARD generates: ## method_name() [](#method-i-method_name)
|
|
59
|
+
# MkDocs needs: ## method_name() {: #method-i-method_name }
|
|
60
|
+
#
|
|
61
|
+
# Also escapes YARD annotations (@param, @return, etc.) to prevent
|
|
62
|
+
# pymdownx.magiclink from treating them as GitHub @mentions
|
|
63
|
+
def fix_yard_anchors_for_mkdocs(output_dir)
|
|
64
|
+
files_fixed = 0
|
|
65
|
+
anchors_fixed = 0
|
|
66
|
+
mentions_escaped = 0
|
|
67
|
+
|
|
68
|
+
Dir.glob(File.join(output_dir, "**/*.md")).each do |file|
|
|
69
|
+
content = File.read(file)
|
|
70
|
+
original = content.dup
|
|
71
|
+
|
|
72
|
+
# Pattern 0: Fix malformed YARD output where code fence is joined with heading
|
|
73
|
+
# "```## method_name() [](#anchor)" -> "```\n## method_name() {: #anchor }"
|
|
74
|
+
content.gsub!(%r{^(```)(\#{1,6}\s+.+?)\s*\[\]\(\#([^)]+)\)\s*$}) do
|
|
75
|
+
fence = Regexp.last_match(1)
|
|
76
|
+
heading = Regexp.last_match(2)
|
|
77
|
+
anchor_id = Regexp.last_match(3)
|
|
78
|
+
anchors_fixed += 1
|
|
79
|
+
"#{fence}\n#{heading} {: ##{anchor_id} }"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Pattern 1: Heading with trailing anchor link
|
|
83
|
+
# "## method_name() [](#anchor-id)" -> "## method_name() {: #anchor-id }"
|
|
84
|
+
# Use %r{} to avoid # interpolation issues in regex
|
|
85
|
+
content.gsub!(%r{^(\#{1,6}\s+.+?)\s*\[\]\(\#([^)]+)\)\s*$}) do
|
|
86
|
+
heading = Regexp.last_match(1)
|
|
87
|
+
anchor_id = Regexp.last_match(2)
|
|
88
|
+
anchors_fixed += 1
|
|
89
|
+
"#{heading} {: ##{anchor_id} }"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Pattern 2: Attribute headings with [RW]/[R]/[W] markers
|
|
93
|
+
# "## attr_name[RW] [](#attribute-i-attr_name)" -> "## attr_name [RW] {: #attribute-i-attr_name }"
|
|
94
|
+
content.gsub!(%r{^(\#{1,6}\s+\w+)\[([RW]+)\]\s*\[\]\(\#([^)]+)\)\s*$}) do
|
|
95
|
+
heading = Regexp.last_match(1)
|
|
96
|
+
rw_marker = Regexp.last_match(2)
|
|
97
|
+
anchor_id = Regexp.last_match(3)
|
|
98
|
+
anchors_fixed += 1
|
|
99
|
+
"#{heading} [#{rw_marker}] {: ##{anchor_id} }"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Pattern 3: Escape YARD annotations to prevent GitHub @mention linking
|
|
103
|
+
# "**@param**" -> "**`@param`**" (inline code prevents magiclink processing)
|
|
104
|
+
# Common YARD tags: @param, @return, @raise, @yield, @yieldparam, @yieldreturn,
|
|
105
|
+
# @option, @overload, @example, @see, @note, @todo, @deprecated
|
|
106
|
+
yard_tags = %w[param return raise yield yieldparam yieldreturn option overload example see note todo deprecated abstract api author since version private]
|
|
107
|
+
yard_tags.each do |tag|
|
|
108
|
+
# Match **@tag** and replace with **`@tag`**
|
|
109
|
+
if content.gsub!(/\*\*@#{tag}\*\*/i, "**`@#{tag}`**")
|
|
110
|
+
mentions_escaped += 1
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
if content != original
|
|
115
|
+
File.write(file, content)
|
|
116
|
+
files_fixed += 1
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
if files_fixed > 0
|
|
121
|
+
puts "Fixed #{anchors_fixed} anchors in #{files_fixed} files for MkDocs compatibility"
|
|
122
|
+
puts "Escaped #{mentions_escaped} YARD annotations to prevent @mention linking" if mentions_escaped > 0
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def create_yard_index_page(yard_output_dir)
|
|
127
|
+
# Generate list of documented classes/modules from YARD output
|
|
128
|
+
class_list = extract_yard_classes(yard_output_dir)
|
|
129
|
+
|
|
130
|
+
content = <<~MARKDOWN
|
|
131
|
+
# YARD API Reference
|
|
132
|
+
|
|
133
|
+
Complete API documentation generated by [YARD](https://yardoc.org/) with [yard-markdown](https://github.com/skatkov/yard-markdown).
|
|
134
|
+
|
|
135
|
+
<div class="grid cards" markdown>
|
|
136
|
+
|
|
137
|
+
- :material-book-open-variant:{ .lg .middle } **API Documentation**
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
Browse the complete API reference with method signatures, parameters, and examples.
|
|
142
|
+
|
|
143
|
+
- :material-chart-bar:{ .lg .middle } **Documentation Coverage**
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
Run `rake htm:doc:stats` to see documentation coverage statistics.
|
|
148
|
+
|
|
149
|
+
</div>
|
|
150
|
+
|
|
151
|
+
## Classes & Modules
|
|
152
|
+
|
|
153
|
+
| Class/Module | Description |
|
|
154
|
+
|--------------|-------------|
|
|
155
|
+
#{class_list}
|
|
156
|
+
|
|
157
|
+
## Generating Documentation
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# Build YARD documentation
|
|
161
|
+
rake htm:doc:yard
|
|
162
|
+
|
|
163
|
+
# Start live-reload server (HTML mode)
|
|
164
|
+
rake htm:doc:server
|
|
165
|
+
|
|
166
|
+
# Show coverage statistics
|
|
167
|
+
rake htm:doc:stats
|
|
168
|
+
```
|
|
169
|
+
MARKDOWN
|
|
170
|
+
|
|
171
|
+
File.write("docs/api/yard-reference.md", content)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def extract_yard_classes(yard_output_dir)
|
|
175
|
+
# Class descriptions for known classes
|
|
176
|
+
descriptions = {
|
|
177
|
+
"HTM" => "Main API class for memory operations",
|
|
178
|
+
"Configuration" => "Multi-provider LLM configuration",
|
|
179
|
+
"Database" => "Database schema and connection management",
|
|
180
|
+
"EmbeddingService" => "Vector embedding generation service",
|
|
181
|
+
"LongTermMemory" => "PostgreSQL-backed permanent storage",
|
|
182
|
+
"WorkingMemory" => "Token-limited in-memory cache",
|
|
183
|
+
"TagService" => "Hierarchical tag extraction service",
|
|
184
|
+
"Observability" => "Metrics and logging instrumentation",
|
|
185
|
+
"CircuitBreaker" => "Fault tolerance for external services",
|
|
186
|
+
"Timeframe" => "Time-based query filtering",
|
|
187
|
+
"TimeframeExtractor" => "Natural language time parsing",
|
|
188
|
+
"JobAdapter" => "Background job abstraction layer"
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
# Dynamically discover classes from YARD markdown output
|
|
192
|
+
classes = []
|
|
193
|
+
|
|
194
|
+
# Check for markdown files in output directory
|
|
195
|
+
Dir.glob(File.join(yard_output_dir, "**/*.md")).sort.each do |file|
|
|
196
|
+
relative_path = file.sub("#{yard_output_dir}/", "")
|
|
197
|
+
basename = File.basename(file, ".md")
|
|
198
|
+
|
|
199
|
+
# Skip index files and non-class files
|
|
200
|
+
next if basename == "index" || basename == "_index"
|
|
201
|
+
next if basename.start_with?("_")
|
|
202
|
+
|
|
203
|
+
# Determine class name from path
|
|
204
|
+
class_name = relative_path.sub(".md", "").gsub("/", "::")
|
|
205
|
+
|
|
206
|
+
# Skip error classes and internal classes
|
|
207
|
+
next if class_name.end_with?("Error")
|
|
208
|
+
next if class_name.include?("Railtie")
|
|
209
|
+
next if class_name.include?("ActiveRecordConfig")
|
|
210
|
+
|
|
211
|
+
# Get description
|
|
212
|
+
simple_name = basename
|
|
213
|
+
desc = descriptions[simple_name] || "#{class_name} class"
|
|
214
|
+
|
|
215
|
+
classes << [class_name, desc, "yard/#{relative_path}"]
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# If no markdown files found, fall back to checking for HTML (in case yard-markdown not installed)
|
|
219
|
+
if classes.empty?
|
|
220
|
+
htm_file = File.join(yard_output_dir, "HTM.html")
|
|
221
|
+
if File.exist?(htm_file)
|
|
222
|
+
classes << ["HTM", descriptions["HTM"] || "Main module", "yard/HTM.html"]
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
htm_dir = File.join(yard_output_dir, "HTM")
|
|
226
|
+
if Dir.exist?(htm_dir)
|
|
227
|
+
Dir.glob(File.join(htm_dir, "*.html")).sort.each do |file|
|
|
228
|
+
basename = File.basename(file, ".html")
|
|
229
|
+
next if basename.end_with?("Error")
|
|
230
|
+
next if basename == "Railtie"
|
|
231
|
+
next if basename == "ActiveRecordConfig"
|
|
232
|
+
|
|
233
|
+
desc = descriptions[basename] || "#{basename} class"
|
|
234
|
+
classes << ["HTM::#{basename}", desc, "yard/HTM/#{basename}.html"]
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
classes.map do |name, desc, path|
|
|
240
|
+
"| [#{name}](#{path}) | #{desc} |"
|
|
241
|
+
end.join("\n")
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
desc "Start YARD documentation server (live reload)"
|
|
245
|
+
task :server, [:port] do |_t, args|
|
|
246
|
+
port = args[:port] || 8808
|
|
247
|
+
|
|
248
|
+
puts "Starting YARD documentation server on http://localhost:#{port}"
|
|
249
|
+
puts "Press Ctrl+C to stop"
|
|
250
|
+
puts
|
|
251
|
+
|
|
252
|
+
exec("yard server --reload --port #{port}")
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
desc "Show documentation coverage statistics"
|
|
256
|
+
task :stats do
|
|
257
|
+
puts "YARD Documentation Coverage:"
|
|
258
|
+
puts
|
|
259
|
+
system("yard stats --list-undoc lib/**/*.rb")
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
desc "Fix YARD anchor links for MkDocs compatibility"
|
|
263
|
+
task :fix_anchors do
|
|
264
|
+
output_dir = "docs/api/yard"
|
|
265
|
+
|
|
266
|
+
unless Dir.exist?(output_dir)
|
|
267
|
+
puts "YARD output directory not found: #{output_dir}"
|
|
268
|
+
puts "Run 'rake htm:doc:yard' first to generate documentation"
|
|
269
|
+
exit 1
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
fix_yard_anchors_for_mkdocs(output_dir)
|
|
273
|
+
puts "Done! Run 'mkdocs build' to verify no anchor warnings."
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
desc "Clean generated documentation"
|
|
277
|
+
task :clean do
|
|
278
|
+
require "fileutils"
|
|
279
|
+
|
|
280
|
+
dirs_to_clean = ["docs/api/yard", ".yardoc"]
|
|
281
|
+
files_to_clean = ["docs/api/yard-reference.md"]
|
|
282
|
+
|
|
283
|
+
dirs_to_clean.each do |dir|
|
|
284
|
+
if Dir.exist?(dir)
|
|
285
|
+
FileUtils.rm_rf(dir)
|
|
286
|
+
puts "Removed: #{dir}"
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
files_to_clean.each do |file|
|
|
291
|
+
if File.exist?(file)
|
|
292
|
+
FileUtils.rm(file)
|
|
293
|
+
puts "Removed: #{file}"
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
puts "Documentation cleaned."
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# HTM File Loading Tasks
|
|
4
|
+
#
|
|
5
|
+
# These tasks are available to any application using the HTM gem.
|
|
6
|
+
# Add to your application's Rakefile:
|
|
7
|
+
#
|
|
8
|
+
# require 'htm/tasks'
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
namespace :htm do
|
|
12
|
+
namespace :files do
|
|
13
|
+
desc "Load a markdown file into long-term memory. Usage: rake htm:files:load[path/to/file.md]"
|
|
14
|
+
task :load, [:path] do |_t, args|
|
|
15
|
+
require 'htm'
|
|
16
|
+
|
|
17
|
+
path = args[:path]
|
|
18
|
+
unless path
|
|
19
|
+
puts "Error: File path required."
|
|
20
|
+
puts "Usage: rake 'htm:files:load[path/to/file.md]'"
|
|
21
|
+
exit 1
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
unless File.exist?(path)
|
|
25
|
+
puts "Error: File not found: #{path}"
|
|
26
|
+
exit 1
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Ensure database connection
|
|
30
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
31
|
+
|
|
32
|
+
htm = HTM.new(robot_name: "FileLoader")
|
|
33
|
+
force = ENV['FORCE'] == 'true'
|
|
34
|
+
|
|
35
|
+
puts "Loading file: #{path}#{force ? ' (force)' : ''}"
|
|
36
|
+
result = htm.load_file(path, force: force)
|
|
37
|
+
|
|
38
|
+
if result[:skipped]
|
|
39
|
+
puts "Skipped: File unchanged since last sync."
|
|
40
|
+
puts "Use FORCE=true to reload anyway."
|
|
41
|
+
else
|
|
42
|
+
puts "Loaded successfully:"
|
|
43
|
+
puts " File source ID: #{result[:file_source_id]}"
|
|
44
|
+
puts " Chunks created: #{result[:chunks_created]}"
|
|
45
|
+
puts " Chunks updated: #{result[:chunks_updated]}"
|
|
46
|
+
puts " Chunks deleted: #{result[:chunks_deleted]}"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
desc "Load all markdown files from a directory. Usage: rake htm:files:load_dir[path/to/dir]"
|
|
51
|
+
task :load_dir, [:path, :pattern] do |_t, args|
|
|
52
|
+
require 'htm'
|
|
53
|
+
|
|
54
|
+
path = args[:path]
|
|
55
|
+
unless path
|
|
56
|
+
puts "Error: Directory path required."
|
|
57
|
+
puts "Usage: rake 'htm:files:load_dir[path/to/dir]'"
|
|
58
|
+
puts " rake 'htm:files:load_dir[path/to/dir,**/*.md]'"
|
|
59
|
+
exit 1
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
unless File.directory?(path)
|
|
63
|
+
puts "Error: Directory not found: #{path}"
|
|
64
|
+
exit 1
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Ensure database connection
|
|
68
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
69
|
+
|
|
70
|
+
htm = HTM.new(robot_name: "FileLoader")
|
|
71
|
+
pattern = args[:pattern] || '**/*.md'
|
|
72
|
+
force = ENV['FORCE'] == 'true'
|
|
73
|
+
|
|
74
|
+
puts "Loading files from: #{path}"
|
|
75
|
+
puts "Pattern: #{pattern}#{force ? ' (force)' : ''}"
|
|
76
|
+
puts
|
|
77
|
+
|
|
78
|
+
results = htm.load_directory(path, pattern: pattern, force: force)
|
|
79
|
+
|
|
80
|
+
total_created = 0
|
|
81
|
+
total_updated = 0
|
|
82
|
+
total_deleted = 0
|
|
83
|
+
skipped = 0
|
|
84
|
+
|
|
85
|
+
results.each do |result|
|
|
86
|
+
if result[:skipped]
|
|
87
|
+
skipped += 1
|
|
88
|
+
else
|
|
89
|
+
total_created += result[:chunks_created]
|
|
90
|
+
total_updated += result[:chunks_updated]
|
|
91
|
+
total_deleted += result[:chunks_deleted]
|
|
92
|
+
puts " #{result[:file_path]}: #{result[:chunks_created]} created, #{result[:chunks_updated]} updated, #{result[:chunks_deleted]} deleted"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
puts
|
|
97
|
+
puts "Summary:"
|
|
98
|
+
puts " Files processed: #{results.size}"
|
|
99
|
+
puts " Files skipped (unchanged): #{skipped}"
|
|
100
|
+
puts " Total chunks created: #{total_created}"
|
|
101
|
+
puts " Total chunks updated: #{total_updated}"
|
|
102
|
+
puts " Total chunks deleted: #{total_deleted}"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
desc "List all loaded file sources"
|
|
106
|
+
task :list do
|
|
107
|
+
require 'htm'
|
|
108
|
+
|
|
109
|
+
# Ensure database connection
|
|
110
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
111
|
+
|
|
112
|
+
sources = HTM::Models::FileSource.order(:file_path)
|
|
113
|
+
count = sources.count
|
|
114
|
+
|
|
115
|
+
if count.zero?
|
|
116
|
+
puts "No files loaded."
|
|
117
|
+
next
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
puts "Loaded files (#{count}):"
|
|
121
|
+
puts "-" * 80
|
|
122
|
+
|
|
123
|
+
sources.each do |source|
|
|
124
|
+
chunks = source.chunks.count
|
|
125
|
+
sync_status = ""
|
|
126
|
+
if File.exist?(source.file_path)
|
|
127
|
+
current_mtime = File.mtime(source.file_path)
|
|
128
|
+
sync_status = source.needs_sync?(current_mtime) ? " [needs sync]" : ""
|
|
129
|
+
else
|
|
130
|
+
sync_status = " [missing]"
|
|
131
|
+
end
|
|
132
|
+
puts " #{source.file_path}"
|
|
133
|
+
puts " ID: #{source.id} | Chunks: #{chunks} | Last synced: #{source.last_synced_at&.strftime('%Y-%m-%d %H:%M')}#{sync_status}"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
desc "Show details for a loaded file. Usage: rake htm:files:info[path/to/file.md]"
|
|
138
|
+
task :info, [:path] do |_t, args|
|
|
139
|
+
require 'htm'
|
|
140
|
+
|
|
141
|
+
path = args[:path]
|
|
142
|
+
unless path
|
|
143
|
+
puts "Error: File path required."
|
|
144
|
+
puts "Usage: rake 'htm:files:info[path/to/file.md]'"
|
|
145
|
+
exit 1
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Ensure database connection
|
|
149
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
150
|
+
|
|
151
|
+
# Try to find by exact path or expanded path
|
|
152
|
+
source = HTM::Models::FileSource.find_by(file_path: path) ||
|
|
153
|
+
HTM::Models::FileSource.find_by(file_path: File.expand_path(path))
|
|
154
|
+
|
|
155
|
+
unless source
|
|
156
|
+
puts "Error: File not loaded: #{path}"
|
|
157
|
+
exit 1
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
puts "File: #{source.file_path}"
|
|
161
|
+
puts "-" * 60
|
|
162
|
+
puts " ID: #{source.id}"
|
|
163
|
+
puts " File size: #{source.file_size} bytes"
|
|
164
|
+
puts " Last synced: #{source.last_synced_at}"
|
|
165
|
+
|
|
166
|
+
if File.exist?(source.file_path)
|
|
167
|
+
current_mtime = File.mtime(source.file_path)
|
|
168
|
+
puts " Needs sync: #{source.needs_sync?(current_mtime) ? 'Yes' : 'No'}"
|
|
169
|
+
else
|
|
170
|
+
puts " Needs sync: File missing!"
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
puts " Created: #{source.created_at}"
|
|
174
|
+
puts
|
|
175
|
+
|
|
176
|
+
if source.frontmatter.any?
|
|
177
|
+
puts "Frontmatter:"
|
|
178
|
+
source.frontmatter.each do |key, value|
|
|
179
|
+
puts " #{key}: #{value}"
|
|
180
|
+
end
|
|
181
|
+
puts
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
chunks = source.chunks
|
|
185
|
+
puts "Chunks (#{chunks.count}):"
|
|
186
|
+
chunks.each_with_index do |chunk, idx|
|
|
187
|
+
preview = chunk.content[0..60].gsub("\n", " ")
|
|
188
|
+
preview += "..." if chunk.content.length > 60
|
|
189
|
+
puts " [#{idx}] #{preview}"
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
desc "Unload a file from memory. Usage: rake htm:files:unload[path/to/file.md]"
|
|
194
|
+
task :unload, [:path] do |_t, args|
|
|
195
|
+
require 'htm'
|
|
196
|
+
|
|
197
|
+
path = args[:path]
|
|
198
|
+
unless path
|
|
199
|
+
puts "Error: File path required."
|
|
200
|
+
puts "Usage: rake 'htm:files:unload[path/to/file.md]'"
|
|
201
|
+
exit 1
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Ensure database connection
|
|
205
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
206
|
+
|
|
207
|
+
htm = HTM.new(robot_name: "FileLoader")
|
|
208
|
+
result = htm.unload_file(path)
|
|
209
|
+
|
|
210
|
+
if result
|
|
211
|
+
puts "Unloaded: #{path}"
|
|
212
|
+
else
|
|
213
|
+
puts "File not found: #{path}"
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
desc "Sync all loaded files (reload changed files)"
|
|
218
|
+
task :sync do
|
|
219
|
+
require 'htm'
|
|
220
|
+
|
|
221
|
+
# Ensure database connection
|
|
222
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
223
|
+
|
|
224
|
+
htm = HTM.new(robot_name: "FileLoader")
|
|
225
|
+
sources = HTM::Models::FileSource.all
|
|
226
|
+
|
|
227
|
+
if sources.count.zero?
|
|
228
|
+
puts "No files loaded."
|
|
229
|
+
next
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
puts "Syncing #{sources.count} files..."
|
|
233
|
+
puts
|
|
234
|
+
|
|
235
|
+
synced = 0
|
|
236
|
+
skipped = 0
|
|
237
|
+
missing = 0
|
|
238
|
+
|
|
239
|
+
sources.each do |source|
|
|
240
|
+
unless File.exist?(source.file_path)
|
|
241
|
+
puts " [missing] #{source.file_path}"
|
|
242
|
+
missing += 1
|
|
243
|
+
next
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
current_mtime = File.mtime(source.file_path)
|
|
247
|
+
unless source.needs_sync?(current_mtime)
|
|
248
|
+
skipped += 1
|
|
249
|
+
next
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
result = htm.load_file(source.file_path)
|
|
253
|
+
puts " [synced] #{source.file_path}: #{result[:chunks_created]} created, #{result[:chunks_updated]} updated, #{result[:chunks_deleted]} deleted"
|
|
254
|
+
synced += 1
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
puts
|
|
258
|
+
puts "Summary:"
|
|
259
|
+
puts " Synced: #{synced}"
|
|
260
|
+
puts " Skipped (unchanged): #{skipped}"
|
|
261
|
+
puts " Missing files: #{missing}"
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
desc "Show file loading statistics"
|
|
265
|
+
task :stats do
|
|
266
|
+
require 'htm'
|
|
267
|
+
|
|
268
|
+
# Ensure database connection
|
|
269
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
270
|
+
|
|
271
|
+
total_sources = HTM::Models::FileSource.count
|
|
272
|
+
total_chunks = HTM::Models::Node.where.not(source_id: nil).count
|
|
273
|
+
|
|
274
|
+
# Count files needing sync (checking actual file mtime)
|
|
275
|
+
needs_sync = 0
|
|
276
|
+
missing = 0
|
|
277
|
+
HTM::Models::FileSource.find_each do |source|
|
|
278
|
+
if File.exist?(source.file_path)
|
|
279
|
+
current_mtime = File.mtime(source.file_path)
|
|
280
|
+
needs_sync += 1 if source.needs_sync?(current_mtime)
|
|
281
|
+
else
|
|
282
|
+
missing += 1
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
puts "File Loading Statistics"
|
|
287
|
+
puts "=" * 40
|
|
288
|
+
puts " Total files loaded: #{total_sources}"
|
|
289
|
+
puts " Total chunks: #{total_chunks}"
|
|
290
|
+
puts " Files needing sync: #{needs_sync}"
|
|
291
|
+
puts " Missing files: #{missing}" if missing > 0
|
|
292
|
+
|
|
293
|
+
if total_sources > 0
|
|
294
|
+
avg_chunks = (total_chunks.to_f / total_sources).round(1)
|
|
295
|
+
puts " Average chunks per file: #{avg_chunks}"
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|