htm 0.0.1 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.aigcm_msg +1 -0
  3. data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
  4. data/.claude/settings.local.json +92 -0
  5. data/.envrc +1 -0
  6. data/.irbrc +283 -80
  7. data/.tbls.yml +31 -0
  8. data/CHANGELOG.md +314 -16
  9. data/CLAUDE.md +603 -0
  10. data/README.md +76 -5
  11. data/Rakefile +5 -0
  12. data/SETUP.md +132 -101
  13. data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
  14. data/db/migrate/00002_create_robots.rb +11 -0
  15. data/db/migrate/00003_create_file_sources.rb +20 -0
  16. data/db/migrate/00004_create_nodes.rb +65 -0
  17. data/db/migrate/00005_create_tags.rb +13 -0
  18. data/db/migrate/00006_create_node_tags.rb +18 -0
  19. data/db/migrate/00007_create_robot_nodes.rb +26 -0
  20. data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
  21. data/db/schema.sql +390 -36
  22. data/docs/api/database.md +19 -232
  23. data/docs/api/embedding-service.md +1 -7
  24. data/docs/api/htm.md +305 -364
  25. data/docs/api/index.md +1 -7
  26. data/docs/api/long-term-memory.md +342 -590
  27. data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
  28. data/docs/api/yard/HTM/AuthorizationError.md +11 -0
  29. data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
  30. data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
  31. data/docs/api/yard/HTM/Configuration.md +175 -0
  32. data/docs/api/yard/HTM/Database.md +99 -0
  33. data/docs/api/yard/HTM/DatabaseError.md +14 -0
  34. data/docs/api/yard/HTM/EmbeddingError.md +18 -0
  35. data/docs/api/yard/HTM/EmbeddingService.md +58 -0
  36. data/docs/api/yard/HTM/Error.md +11 -0
  37. data/docs/api/yard/HTM/JobAdapter.md +39 -0
  38. data/docs/api/yard/HTM/LongTermMemory.md +342 -0
  39. data/docs/api/yard/HTM/NotFoundError.md +17 -0
  40. data/docs/api/yard/HTM/Observability.md +107 -0
  41. data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
  42. data/docs/api/yard/HTM/Railtie.md +27 -0
  43. data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
  44. data/docs/api/yard/HTM/TagError.md +18 -0
  45. data/docs/api/yard/HTM/TagService.md +67 -0
  46. data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
  47. data/docs/api/yard/HTM/Timeframe.md +40 -0
  48. data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
  49. data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
  50. data/docs/api/yard/HTM/ValidationError.md +20 -0
  51. data/docs/api/yard/HTM/WorkingMemory.md +131 -0
  52. data/docs/api/yard/HTM.md +80 -0
  53. data/docs/api/yard/index.csv +179 -0
  54. data/docs/api/yard-reference.md +51 -0
  55. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  56. data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
  57. data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
  58. data/docs/architecture/adrs/index.md +2 -13
  59. data/docs/architecture/hive-mind.md +165 -166
  60. data/docs/architecture/index.md +2 -2
  61. data/docs/architecture/overview.md +5 -171
  62. data/docs/architecture/two-tier-memory.md +1 -35
  63. data/docs/assets/images/adr-010-current-architecture.svg +37 -0
  64. data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
  65. data/docs/assets/images/adr-dependency-tree.svg +93 -0
  66. data/docs/assets/images/class-hierarchy.svg +55 -0
  67. data/docs/assets/images/exception-hierarchy.svg +45 -0
  68. data/docs/assets/images/htm-architecture-overview.svg +83 -0
  69. data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
  70. data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
  71. data/docs/assets/images/htm-eviction-process.svg +141 -0
  72. data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
  73. data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
  74. data/docs/assets/images/htm-node-states.svg +123 -0
  75. data/docs/assets/images/project-structure.svg +78 -0
  76. data/docs/assets/images/test-directory-structure.svg +38 -0
  77. data/{dbdoc → docs/database}/README.md +127 -125
  78. data/docs/database/public.file_sources.md +42 -0
  79. data/docs/database/public.file_sources.svg +211 -0
  80. data/{dbdoc → docs/database}/public.node_tags.md +7 -8
  81. data/docs/database/public.node_tags.svg +239 -0
  82. data/{dbdoc → docs/database}/public.nodes.md +22 -17
  83. data/docs/database/public.nodes.svg +271 -0
  84. data/docs/database/public.robot_nodes.md +46 -0
  85. data/docs/database/public.robot_nodes.svg +243 -0
  86. data/{dbdoc → docs/database}/public.robots.md +2 -3
  87. data/docs/database/public.robots.svg +161 -0
  88. data/docs/database/public.tags.svg +139 -0
  89. data/{dbdoc → docs/database}/schema.json +941 -630
  90. data/docs/database/schema.svg +282 -0
  91. data/docs/development/index.md +1 -29
  92. data/docs/development/schema.md +134 -309
  93. data/docs/development/testing.md +1 -9
  94. data/docs/getting-started/index.md +47 -0
  95. data/docs/{installation.md → getting-started/installation.md} +2 -2
  96. data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
  97. data/docs/guides/adding-memories.md +295 -643
  98. data/docs/guides/recalling-memories.md +36 -1
  99. data/docs/guides/search-strategies.md +85 -51
  100. data/docs/images/htm-er-diagram.svg +156 -0
  101. data/docs/index.md +16 -31
  102. data/docs/multi_framework_support.md +4 -4
  103. data/examples/README.md +280 -0
  104. data/examples/basic_usage.rb +18 -16
  105. data/examples/cli_app/htm_cli.rb +146 -8
  106. data/examples/cli_app/temp.log +93 -0
  107. data/examples/custom_llm_configuration.rb +1 -2
  108. data/examples/example_app/app.rb +11 -14
  109. data/examples/file_loader_usage.rb +177 -0
  110. data/examples/robot_groups/lib/robot_group.rb +419 -0
  111. data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
  112. data/examples/robot_groups/multi_process.rb +286 -0
  113. data/examples/robot_groups/robot_worker.rb +136 -0
  114. data/examples/robot_groups/same_process.rb +229 -0
  115. data/examples/sinatra_app/Gemfile +1 -0
  116. data/examples/sinatra_app/Gemfile.lock +166 -0
  117. data/examples/sinatra_app/app.rb +219 -24
  118. data/examples/timeframe_demo.rb +276 -0
  119. data/lib/htm/active_record_config.rb +10 -3
  120. data/lib/htm/circuit_breaker.rb +202 -0
  121. data/lib/htm/configuration.rb +313 -80
  122. data/lib/htm/database.rb +67 -36
  123. data/lib/htm/embedding_service.rb +39 -2
  124. data/lib/htm/errors.rb +131 -11
  125. data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
  126. data/lib/htm/job_adapter.rb +10 -3
  127. data/lib/htm/jobs/generate_embedding_job.rb +5 -4
  128. data/lib/htm/jobs/generate_tags_job.rb +4 -0
  129. data/lib/htm/loaders/markdown_loader.rb +263 -0
  130. data/lib/htm/loaders/paragraph_chunker.rb +112 -0
  131. data/lib/htm/long_term_memory.rb +601 -321
  132. data/lib/htm/models/file_source.rb +99 -0
  133. data/lib/htm/models/node.rb +116 -12
  134. data/lib/htm/models/robot.rb +53 -4
  135. data/lib/htm/models/robot_node.rb +51 -0
  136. data/lib/htm/models/tag.rb +302 -0
  137. data/lib/htm/observability.rb +395 -0
  138. data/lib/htm/tag_service.rb +60 -3
  139. data/lib/htm/tasks.rb +29 -0
  140. data/lib/htm/timeframe.rb +194 -0
  141. data/lib/htm/timeframe_extractor.rb +307 -0
  142. data/lib/htm/version.rb +1 -1
  143. data/lib/htm/working_memory.rb +165 -70
  144. data/lib/htm.rb +352 -133
  145. data/lib/tasks/doc.rake +300 -0
  146. data/lib/tasks/files.rake +299 -0
  147. data/lib/tasks/htm.rake +188 -2
  148. data/lib/tasks/jobs.rake +10 -12
  149. data/lib/tasks/tags.rake +194 -0
  150. data/mkdocs.yml +91 -9
  151. data/notes/ARCHITECTURE_REVIEW.md +1167 -0
  152. data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
  153. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
  154. data/notes/next_steps.md +100 -0
  155. data/notes/plan.md +627 -0
  156. data/notes/tag_ontology_enhancement_ideas.md +222 -0
  157. data/notes/timescaledb_removal_summary.md +200 -0
  158. metadata +177 -37
  159. data/db/migrate/20250101000002_create_robots.rb +0 -14
  160. data/db/migrate/20250101000003_create_nodes.rb +0 -42
  161. data/db/migrate/20250101000005_create_tags.rb +0 -38
  162. data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
  163. data/dbdoc/public.node_tags.svg +0 -112
  164. data/dbdoc/public.nodes.svg +0 -118
  165. data/dbdoc/public.robots.svg +0 -90
  166. data/dbdoc/public.tags.svg +0 -60
  167. data/dbdoc/schema.svg +0 -154
  168. data/{dbdoc → docs/database}/public.node_stats.md +0 -0
  169. data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
  170. data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
  171. data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
  172. data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
  173. data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
  174. data/{dbdoc → docs/database}/public.operations_log.md +0 -0
  175. data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
  176. data/{dbdoc → docs/database}/public.relationships.md +0 -0
  177. data/{dbdoc → docs/database}/public.relationships.svg +0 -0
  178. data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
  179. data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
  180. data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
  181. data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
  182. data/{dbdoc → docs/database}/public.tags.md +3 -3
  183. /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
  184. /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
@@ -0,0 +1,300 @@
1
+ # frozen_string_literal: true
2
+
3
+ namespace :htm do
4
+ namespace :doc do
5
+ desc "Build YARD API documentation (markdown format for MkDocs)"
6
+ task :yard do
7
+ require "fileutils"
8
+
9
+ output_dir = "docs/api/yard"
10
+
11
+ puts "Building YARD documentation (markdown format)..."
12
+ puts "Output directory: #{output_dir}"
13
+ puts
14
+
15
+ # Clean previous output
16
+ FileUtils.rm_rf(output_dir) if Dir.exist?(output_dir)
17
+ FileUtils.mkdir_p(output_dir)
18
+
19
+ # Build YARD documentation in markdown format
20
+ # Requires yard-markdown plugin to be installed
21
+ options = [
22
+ "--plugin markdown",
23
+ "--output-dir #{output_dir}",
24
+ "--format markdown",
25
+ "--title 'HTM - Hierarchical Temporary Memory API'",
26
+ "--markup markdown",
27
+ "--charset utf-8",
28
+ "--protected",
29
+ "--no-private",
30
+ "lib/**/*.rb",
31
+ "-",
32
+ "README.md",
33
+ "CHANGELOG.md"
34
+ ]
35
+
36
+ system("yard doc #{options.join(' ')}")
37
+
38
+ if $?.success?
39
+ # Post-process markdown files for MkDocs compatibility
40
+ fix_yard_anchors_for_mkdocs(output_dir)
41
+
42
+ # Create MkDocs index page for the YARD docs
43
+ create_yard_index_page(output_dir)
44
+
45
+ puts
46
+ puts "Documentation built successfully!"
47
+ puts "YARD docs: #{output_dir}/"
48
+ puts "MkDocs index: docs/api/yard-reference.md"
49
+ else
50
+ puts
51
+ puts "Failed to build documentation. Make sure YARD and yard-markdown are installed:"
52
+ puts " gem install yard yard-markdown"
53
+ exit 1
54
+ end
55
+ end
56
+
57
+ # Convert YARD anchor format to MkDocs-compatible format
58
+ # YARD generates: ## method_name() [](#method-i-method_name)
59
+ # MkDocs needs: ## method_name() {: #method-i-method_name }
60
+ #
61
+ # Also escapes YARD annotations (@param, @return, etc.) to prevent
62
+ # pymdownx.magiclink from treating them as GitHub @mentions
63
+ def fix_yard_anchors_for_mkdocs(output_dir)
64
+ files_fixed = 0
65
+ anchors_fixed = 0
66
+ mentions_escaped = 0
67
+
68
+ Dir.glob(File.join(output_dir, "**/*.md")).each do |file|
69
+ content = File.read(file)
70
+ original = content.dup
71
+
72
+ # Pattern 0: Fix malformed YARD output where code fence is joined with heading
73
+ # "```## method_name() [](#anchor)" -> "```\n## method_name() {: #anchor }"
74
+ content.gsub!(%r{^(```)(\#{1,6}\s+.+?)\s*\[\]\(\#([^)]+)\)\s*$}) do
75
+ fence = Regexp.last_match(1)
76
+ heading = Regexp.last_match(2)
77
+ anchor_id = Regexp.last_match(3)
78
+ anchors_fixed += 1
79
+ "#{fence}\n#{heading} {: ##{anchor_id} }"
80
+ end
81
+
82
+ # Pattern 1: Heading with trailing anchor link
83
+ # "## method_name() [](#anchor-id)" -> "## method_name() {: #anchor-id }"
84
+ # Use %r{} to avoid # interpolation issues in regex
85
+ content.gsub!(%r{^(\#{1,6}\s+.+?)\s*\[\]\(\#([^)]+)\)\s*$}) do
86
+ heading = Regexp.last_match(1)
87
+ anchor_id = Regexp.last_match(2)
88
+ anchors_fixed += 1
89
+ "#{heading} {: ##{anchor_id} }"
90
+ end
91
+
92
+ # Pattern 2: Attribute headings with [RW]/[R]/[W] markers
93
+ # "## attr_name[RW] [](#attribute-i-attr_name)" -> "## attr_name [RW] {: #attribute-i-attr_name }"
94
+ content.gsub!(%r{^(\#{1,6}\s+\w+)\[([RW]+)\]\s*\[\]\(\#([^)]+)\)\s*$}) do
95
+ heading = Regexp.last_match(1)
96
+ rw_marker = Regexp.last_match(2)
97
+ anchor_id = Regexp.last_match(3)
98
+ anchors_fixed += 1
99
+ "#{heading} [#{rw_marker}] {: ##{anchor_id} }"
100
+ end
101
+
102
+ # Pattern 3: Escape YARD annotations to prevent GitHub @mention linking
103
+ # "**@param**" -> "**`@param`**" (inline code prevents magiclink processing)
104
+ # Common YARD tags: @param, @return, @raise, @yield, @yieldparam, @yieldreturn,
105
+ # @option, @overload, @example, @see, @note, @todo, @deprecated
106
+ yard_tags = %w[param return raise yield yieldparam yieldreturn option overload example see note todo deprecated abstract api author since version private]
107
+ yard_tags.each do |tag|
108
+ # Match **@tag** and replace with **`@tag`**
109
+ if content.gsub!(/\*\*@#{tag}\*\*/i, "**`@#{tag}`**")
110
+ mentions_escaped += 1
111
+ end
112
+ end
113
+
114
+ if content != original
115
+ File.write(file, content)
116
+ files_fixed += 1
117
+ end
118
+ end
119
+
120
+ if files_fixed > 0
121
+ puts "Fixed #{anchors_fixed} anchors in #{files_fixed} files for MkDocs compatibility"
122
+ puts "Escaped #{mentions_escaped} YARD annotations to prevent @mention linking" if mentions_escaped > 0
123
+ end
124
+ end
125
+
126
+ def create_yard_index_page(yard_output_dir)
127
+ # Generate list of documented classes/modules from YARD output
128
+ class_list = extract_yard_classes(yard_output_dir)
129
+
130
+ content = <<~MARKDOWN
131
+ # YARD API Reference
132
+
133
+ Complete API documentation generated by [YARD](https://yardoc.org/) with [yard-markdown](https://github.com/skatkov/yard-markdown).
134
+
135
+ <div class="grid cards" markdown>
136
+
137
+ - :material-book-open-variant:{ .lg .middle } **API Documentation**
138
+
139
+ ---
140
+
141
+ Browse the complete API reference with method signatures, parameters, and examples.
142
+
143
+ - :material-chart-bar:{ .lg .middle } **Documentation Coverage**
144
+
145
+ ---
146
+
147
+ Run `rake htm:doc:stats` to see documentation coverage statistics.
148
+
149
+ </div>
150
+
151
+ ## Classes & Modules
152
+
153
+ | Class/Module | Description |
154
+ |--------------|-------------|
155
+ #{class_list}
156
+
157
+ ## Generating Documentation
158
+
159
+ ```bash
160
+ # Build YARD documentation
161
+ rake htm:doc:yard
162
+
163
+ # Start live-reload server (HTML mode)
164
+ rake htm:doc:server
165
+
166
+ # Show coverage statistics
167
+ rake htm:doc:stats
168
+ ```
169
+ MARKDOWN
170
+
171
+ File.write("docs/api/yard-reference.md", content)
172
+ end
173
+
174
+ def extract_yard_classes(yard_output_dir)
175
+ # Class descriptions for known classes
176
+ descriptions = {
177
+ "HTM" => "Main API class for memory operations",
178
+ "Configuration" => "Multi-provider LLM configuration",
179
+ "Database" => "Database schema and connection management",
180
+ "EmbeddingService" => "Vector embedding generation service",
181
+ "LongTermMemory" => "PostgreSQL-backed permanent storage",
182
+ "WorkingMemory" => "Token-limited in-memory cache",
183
+ "TagService" => "Hierarchical tag extraction service",
184
+ "Observability" => "Metrics and logging instrumentation",
185
+ "CircuitBreaker" => "Fault tolerance for external services",
186
+ "Timeframe" => "Time-based query filtering",
187
+ "TimeframeExtractor" => "Natural language time parsing",
188
+ "JobAdapter" => "Background job abstraction layer"
189
+ }
190
+
191
+ # Dynamically discover classes from YARD markdown output
192
+ classes = []
193
+
194
+ # Check for markdown files in output directory
195
+ Dir.glob(File.join(yard_output_dir, "**/*.md")).sort.each do |file|
196
+ relative_path = file.sub("#{yard_output_dir}/", "")
197
+ basename = File.basename(file, ".md")
198
+
199
+ # Skip index files and non-class files
200
+ next if basename == "index" || basename == "_index"
201
+ next if basename.start_with?("_")
202
+
203
+ # Determine class name from path
204
+ class_name = relative_path.sub(".md", "").gsub("/", "::")
205
+
206
+ # Skip error classes and internal classes
207
+ next if class_name.end_with?("Error")
208
+ next if class_name.include?("Railtie")
209
+ next if class_name.include?("ActiveRecordConfig")
210
+
211
+ # Get description
212
+ simple_name = basename
213
+ desc = descriptions[simple_name] || "#{class_name} class"
214
+
215
+ classes << [class_name, desc, "yard/#{relative_path}"]
216
+ end
217
+
218
+ # If no markdown files found, fall back to checking for HTML (in case yard-markdown not installed)
219
+ if classes.empty?
220
+ htm_file = File.join(yard_output_dir, "HTM.html")
221
+ if File.exist?(htm_file)
222
+ classes << ["HTM", descriptions["HTM"] || "Main module", "yard/HTM.html"]
223
+ end
224
+
225
+ htm_dir = File.join(yard_output_dir, "HTM")
226
+ if Dir.exist?(htm_dir)
227
+ Dir.glob(File.join(htm_dir, "*.html")).sort.each do |file|
228
+ basename = File.basename(file, ".html")
229
+ next if basename.end_with?("Error")
230
+ next if basename == "Railtie"
231
+ next if basename == "ActiveRecordConfig"
232
+
233
+ desc = descriptions[basename] || "#{basename} class"
234
+ classes << ["HTM::#{basename}", desc, "yard/HTM/#{basename}.html"]
235
+ end
236
+ end
237
+ end
238
+
239
+ classes.map do |name, desc, path|
240
+ "| [#{name}](#{path}) | #{desc} |"
241
+ end.join("\n")
242
+ end
243
+
244
+ desc "Start YARD documentation server (live reload)"
245
+ task :server, [:port] do |_t, args|
246
+ port = args[:port] || 8808
247
+
248
+ puts "Starting YARD documentation server on http://localhost:#{port}"
249
+ puts "Press Ctrl+C to stop"
250
+ puts
251
+
252
+ exec("yard server --reload --port #{port}")
253
+ end
254
+
255
+ desc "Show documentation coverage statistics"
256
+ task :stats do
257
+ puts "YARD Documentation Coverage:"
258
+ puts
259
+ system("yard stats --list-undoc lib/**/*.rb")
260
+ end
261
+
262
+ desc "Fix YARD anchor links for MkDocs compatibility"
263
+ task :fix_anchors do
264
+ output_dir = "docs/api/yard"
265
+
266
+ unless Dir.exist?(output_dir)
267
+ puts "YARD output directory not found: #{output_dir}"
268
+ puts "Run 'rake htm:doc:yard' first to generate documentation"
269
+ exit 1
270
+ end
271
+
272
+ fix_yard_anchors_for_mkdocs(output_dir)
273
+ puts "Done! Run 'mkdocs build' to verify no anchor warnings."
274
+ end
275
+
276
+ desc "Clean generated documentation"
277
+ task :clean do
278
+ require "fileutils"
279
+
280
+ dirs_to_clean = ["docs/api/yard", ".yardoc"]
281
+ files_to_clean = ["docs/api/yard-reference.md"]
282
+
283
+ dirs_to_clean.each do |dir|
284
+ if Dir.exist?(dir)
285
+ FileUtils.rm_rf(dir)
286
+ puts "Removed: #{dir}"
287
+ end
288
+ end
289
+
290
+ files_to_clean.each do |file|
291
+ if File.exist?(file)
292
+ FileUtils.rm(file)
293
+ puts "Removed: #{file}"
294
+ end
295
+ end
296
+
297
+ puts "Documentation cleaned."
298
+ end
299
+ end
300
+ end
@@ -0,0 +1,299 @@
1
+ # frozen_string_literal: true
2
+
3
+ # HTM File Loading Tasks
4
+ #
5
+ # These tasks are available to any application using the HTM gem.
6
+ # Add to your application's Rakefile:
7
+ #
8
+ # require 'htm/tasks'
9
+ #
10
+
11
+ namespace :htm do
12
+ namespace :files do
13
+ desc "Load a markdown file into long-term memory. Usage: rake htm:files:load[path/to/file.md]"
14
+ task :load, [:path] do |_t, args|
15
+ require 'htm'
16
+
17
+ path = args[:path]
18
+ unless path
19
+ puts "Error: File path required."
20
+ puts "Usage: rake 'htm:files:load[path/to/file.md]'"
21
+ exit 1
22
+ end
23
+
24
+ unless File.exist?(path)
25
+ puts "Error: File not found: #{path}"
26
+ exit 1
27
+ end
28
+
29
+ # Ensure database connection
30
+ HTM::ActiveRecordConfig.establish_connection!
31
+
32
+ htm = HTM.new(robot_name: "FileLoader")
33
+ force = ENV['FORCE'] == 'true'
34
+
35
+ puts "Loading file: #{path}#{force ? ' (force)' : ''}"
36
+ result = htm.load_file(path, force: force)
37
+
38
+ if result[:skipped]
39
+ puts "Skipped: File unchanged since last sync."
40
+ puts "Use FORCE=true to reload anyway."
41
+ else
42
+ puts "Loaded successfully:"
43
+ puts " File source ID: #{result[:file_source_id]}"
44
+ puts " Chunks created: #{result[:chunks_created]}"
45
+ puts " Chunks updated: #{result[:chunks_updated]}"
46
+ puts " Chunks deleted: #{result[:chunks_deleted]}"
47
+ end
48
+ end
49
+
50
+ desc "Load all markdown files from a directory. Usage: rake htm:files:load_dir[path/to/dir]"
51
+ task :load_dir, [:path, :pattern] do |_t, args|
52
+ require 'htm'
53
+
54
+ path = args[:path]
55
+ unless path
56
+ puts "Error: Directory path required."
57
+ puts "Usage: rake 'htm:files:load_dir[path/to/dir]'"
58
+ puts " rake 'htm:files:load_dir[path/to/dir,**/*.md]'"
59
+ exit 1
60
+ end
61
+
62
+ unless File.directory?(path)
63
+ puts "Error: Directory not found: #{path}"
64
+ exit 1
65
+ end
66
+
67
+ # Ensure database connection
68
+ HTM::ActiveRecordConfig.establish_connection!
69
+
70
+ htm = HTM.new(robot_name: "FileLoader")
71
+ pattern = args[:pattern] || '**/*.md'
72
+ force = ENV['FORCE'] == 'true'
73
+
74
+ puts "Loading files from: #{path}"
75
+ puts "Pattern: #{pattern}#{force ? ' (force)' : ''}"
76
+ puts
77
+
78
+ results = htm.load_directory(path, pattern: pattern, force: force)
79
+
80
+ total_created = 0
81
+ total_updated = 0
82
+ total_deleted = 0
83
+ skipped = 0
84
+
85
+ results.each do |result|
86
+ if result[:skipped]
87
+ skipped += 1
88
+ else
89
+ total_created += result[:chunks_created]
90
+ total_updated += result[:chunks_updated]
91
+ total_deleted += result[:chunks_deleted]
92
+ puts " #{result[:file_path]}: #{result[:chunks_created]} created, #{result[:chunks_updated]} updated, #{result[:chunks_deleted]} deleted"
93
+ end
94
+ end
95
+
96
+ puts
97
+ puts "Summary:"
98
+ puts " Files processed: #{results.size}"
99
+ puts " Files skipped (unchanged): #{skipped}"
100
+ puts " Total chunks created: #{total_created}"
101
+ puts " Total chunks updated: #{total_updated}"
102
+ puts " Total chunks deleted: #{total_deleted}"
103
+ end
104
+
105
+ desc "List all loaded file sources"
106
+ task :list do
107
+ require 'htm'
108
+
109
+ # Ensure database connection
110
+ HTM::ActiveRecordConfig.establish_connection!
111
+
112
+ sources = HTM::Models::FileSource.order(:file_path)
113
+ count = sources.count
114
+
115
+ if count.zero?
116
+ puts "No files loaded."
117
+ next
118
+ end
119
+
120
+ puts "Loaded files (#{count}):"
121
+ puts "-" * 80
122
+
123
+ sources.each do |source|
124
+ chunks = source.chunks.count
125
+ sync_status = ""
126
+ if File.exist?(source.file_path)
127
+ current_mtime = File.mtime(source.file_path)
128
+ sync_status = source.needs_sync?(current_mtime) ? " [needs sync]" : ""
129
+ else
130
+ sync_status = " [missing]"
131
+ end
132
+ puts " #{source.file_path}"
133
+ puts " ID: #{source.id} | Chunks: #{chunks} | Last synced: #{source.last_synced_at&.strftime('%Y-%m-%d %H:%M')}#{sync_status}"
134
+ end
135
+ end
136
+
137
+ desc "Show details for a loaded file. Usage: rake htm:files:info[path/to/file.md]"
138
+ task :info, [:path] do |_t, args|
139
+ require 'htm'
140
+
141
+ path = args[:path]
142
+ unless path
143
+ puts "Error: File path required."
144
+ puts "Usage: rake 'htm:files:info[path/to/file.md]'"
145
+ exit 1
146
+ end
147
+
148
+ # Ensure database connection
149
+ HTM::ActiveRecordConfig.establish_connection!
150
+
151
+ # Try to find by exact path or expanded path
152
+ source = HTM::Models::FileSource.find_by(file_path: path) ||
153
+ HTM::Models::FileSource.find_by(file_path: File.expand_path(path))
154
+
155
+ unless source
156
+ puts "Error: File not loaded: #{path}"
157
+ exit 1
158
+ end
159
+
160
+ puts "File: #{source.file_path}"
161
+ puts "-" * 60
162
+ puts " ID: #{source.id}"
163
+ puts " File size: #{source.file_size} bytes"
164
+ puts " Last synced: #{source.last_synced_at}"
165
+
166
+ if File.exist?(source.file_path)
167
+ current_mtime = File.mtime(source.file_path)
168
+ puts " Needs sync: #{source.needs_sync?(current_mtime) ? 'Yes' : 'No'}"
169
+ else
170
+ puts " Needs sync: File missing!"
171
+ end
172
+
173
+ puts " Created: #{source.created_at}"
174
+ puts
175
+
176
+ if source.frontmatter.any?
177
+ puts "Frontmatter:"
178
+ source.frontmatter.each do |key, value|
179
+ puts " #{key}: #{value}"
180
+ end
181
+ puts
182
+ end
183
+
184
+ chunks = source.chunks
185
+ puts "Chunks (#{chunks.count}):"
186
+ chunks.each_with_index do |chunk, idx|
187
+ preview = chunk.content[0..60].gsub("\n", " ")
188
+ preview += "..." if chunk.content.length > 60
189
+ puts " [#{idx}] #{preview}"
190
+ end
191
+ end
192
+
193
+ desc "Unload a file from memory. Usage: rake htm:files:unload[path/to/file.md]"
194
+ task :unload, [:path] do |_t, args|
195
+ require 'htm'
196
+
197
+ path = args[:path]
198
+ unless path
199
+ puts "Error: File path required."
200
+ puts "Usage: rake 'htm:files:unload[path/to/file.md]'"
201
+ exit 1
202
+ end
203
+
204
+ # Ensure database connection
205
+ HTM::ActiveRecordConfig.establish_connection!
206
+
207
+ htm = HTM.new(robot_name: "FileLoader")
208
+ result = htm.unload_file(path)
209
+
210
+ if result
211
+ puts "Unloaded: #{path}"
212
+ else
213
+ puts "File not found: #{path}"
214
+ end
215
+ end
216
+
217
+ desc "Sync all loaded files (reload changed files)"
218
+ task :sync do
219
+ require 'htm'
220
+
221
+ # Ensure database connection
222
+ HTM::ActiveRecordConfig.establish_connection!
223
+
224
+ htm = HTM.new(robot_name: "FileLoader")
225
+ sources = HTM::Models::FileSource.all
226
+
227
+ if sources.count.zero?
228
+ puts "No files loaded."
229
+ next
230
+ end
231
+
232
+ puts "Syncing #{sources.count} files..."
233
+ puts
234
+
235
+ synced = 0
236
+ skipped = 0
237
+ missing = 0
238
+
239
+ sources.each do |source|
240
+ unless File.exist?(source.file_path)
241
+ puts " [missing] #{source.file_path}"
242
+ missing += 1
243
+ next
244
+ end
245
+
246
+ current_mtime = File.mtime(source.file_path)
247
+ unless source.needs_sync?(current_mtime)
248
+ skipped += 1
249
+ next
250
+ end
251
+
252
+ result = htm.load_file(source.file_path)
253
+ puts " [synced] #{source.file_path}: #{result[:chunks_created]} created, #{result[:chunks_updated]} updated, #{result[:chunks_deleted]} deleted"
254
+ synced += 1
255
+ end
256
+
257
+ puts
258
+ puts "Summary:"
259
+ puts " Synced: #{synced}"
260
+ puts " Skipped (unchanged): #{skipped}"
261
+ puts " Missing files: #{missing}"
262
+ end
263
+
264
+ desc "Show file loading statistics"
265
+ task :stats do
266
+ require 'htm'
267
+
268
+ # Ensure database connection
269
+ HTM::ActiveRecordConfig.establish_connection!
270
+
271
+ total_sources = HTM::Models::FileSource.count
272
+ total_chunks = HTM::Models::Node.where.not(source_id: nil).count
273
+
274
+ # Count files needing sync (checking actual file mtime)
275
+ needs_sync = 0
276
+ missing = 0
277
+ HTM::Models::FileSource.find_each do |source|
278
+ if File.exist?(source.file_path)
279
+ current_mtime = File.mtime(source.file_path)
280
+ needs_sync += 1 if source.needs_sync?(current_mtime)
281
+ else
282
+ missing += 1
283
+ end
284
+ end
285
+
286
+ puts "File Loading Statistics"
287
+ puts "=" * 40
288
+ puts " Total files loaded: #{total_sources}"
289
+ puts " Total chunks: #{total_chunks}"
290
+ puts " Files needing sync: #{needs_sync}"
291
+ puts " Missing files: #{missing}" if missing > 0
292
+
293
+ if total_sources > 0
294
+ avg_chunks = (total_chunks.to_f / total_sources).round(1)
295
+ puts " Average chunks per file: #{avg_chunks}"
296
+ end
297
+ end
298
+ end
299
+ end