htm 0.0.20 → 0.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +60 -0
  3. data/Rakefile +104 -18
  4. data/db/migrate/00001_enable_extensions.rb +9 -5
  5. data/db/migrate/00002_create_robots.rb +18 -6
  6. data/db/migrate/00003_create_file_sources.rb +30 -17
  7. data/db/migrate/00004_create_nodes.rb +60 -48
  8. data/db/migrate/00005_create_tags.rb +24 -12
  9. data/db/migrate/00006_create_node_tags.rb +28 -13
  10. data/db/migrate/00007_create_robot_nodes.rb +40 -26
  11. data/db/schema.sql +17 -1
  12. data/db/seeds.rb +33 -33
  13. data/docs/database/naming-convention.md +244 -0
  14. data/docs/database_rake_tasks.md +31 -0
  15. data/docs/development/rake-tasks.md +80 -35
  16. data/docs/guides/mcp-server.md +70 -1
  17. data/examples/.envrc +6 -0
  18. data/examples/.gitignore +2 -0
  19. data/examples/00_create_examples_db.rb +94 -0
  20. data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
  21. data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
  22. data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
  23. data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
  24. data/examples/{example_app → 06_example_app}/app.rb +15 -15
  25. data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
  26. data/examples/08_sinatra_app/Gemfile.lock +241 -0
  27. data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
  28. data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
  29. data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
  30. data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
  31. data/examples/11_robot_groups/README.md +335 -0
  32. data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
  33. data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
  34. data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
  35. data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
  36. data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
  37. data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
  38. data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
  39. data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
  40. data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
  41. data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
  42. data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
  43. data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
  44. data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
  45. data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
  46. data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
  47. data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
  48. data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
  49. data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
  50. data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
  51. data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
  52. data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
  53. data/examples/12_rails_app/config/initializers/htm.rb +7 -0
  54. data/examples/12_rails_app/config/initializers/rack.rb +5 -0
  55. data/examples/README.md +230 -211
  56. data/examples/examples_helper.rb +138 -0
  57. data/lib/htm/config/builder.rb +167 -0
  58. data/lib/htm/config/database.rb +317 -0
  59. data/lib/htm/config/defaults.yml +62 -22
  60. data/lib/htm/config/validator.rb +83 -0
  61. data/lib/htm/config.rb +75 -462
  62. data/lib/htm/database.rb +85 -127
  63. data/lib/htm/errors.rb +14 -0
  64. data/lib/htm/integrations/sinatra.rb +13 -44
  65. data/lib/htm/jobs/generate_embedding_job.rb +3 -4
  66. data/lib/htm/jobs/generate_propositions_job.rb +4 -5
  67. data/lib/htm/jobs/generate_tags_job.rb +16 -15
  68. data/lib/htm/loaders/markdown_loader.rb +17 -15
  69. data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
  70. data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
  71. data/lib/htm/long_term_memory/node_operations.rb +24 -23
  72. data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
  73. data/lib/htm/long_term_memory/robot_operations.rb +4 -4
  74. data/lib/htm/long_term_memory/tag_operations.rb +91 -77
  75. data/lib/htm/long_term_memory/vector_search.rb +4 -5
  76. data/lib/htm/long_term_memory.rb +13 -13
  77. data/lib/htm/mcp/cli.rb +115 -8
  78. data/lib/htm/mcp/resources.rb +4 -3
  79. data/lib/htm/mcp/server.rb +5 -4
  80. data/lib/htm/mcp/tools.rb +37 -28
  81. data/lib/htm/migration.rb +72 -0
  82. data/lib/htm/models/file_source.rb +52 -31
  83. data/lib/htm/models/node.rb +224 -108
  84. data/lib/htm/models/node_tag.rb +49 -28
  85. data/lib/htm/models/robot.rb +38 -27
  86. data/lib/htm/models/robot_node.rb +63 -35
  87. data/lib/htm/models/tag.rb +126 -123
  88. data/lib/htm/observability.rb +45 -41
  89. data/lib/htm/proposition_service.rb +76 -7
  90. data/lib/htm/railtie.rb +2 -2
  91. data/lib/htm/robot_group.rb +30 -18
  92. data/lib/htm/sequel_config.rb +215 -0
  93. data/lib/htm/sql_builder.rb +14 -16
  94. data/lib/htm/tag_service.rb +78 -0
  95. data/lib/htm/tasks.rb +3 -0
  96. data/lib/htm/version.rb +1 -1
  97. data/lib/htm/workflows/remember_workflow.rb +6 -5
  98. data/lib/htm.rb +26 -22
  99. data/lib/tasks/db.rake +0 -2
  100. data/lib/tasks/doc.rake +2 -2
  101. data/lib/tasks/files.rake +11 -18
  102. data/lib/tasks/htm.rake +190 -62
  103. data/lib/tasks/jobs.rake +179 -54
  104. data/lib/tasks/tags.rake +8 -13
  105. data/scripts/backfill_parent_tags.rb +376 -0
  106. data/scripts/normalize_plural_tags.rb +335 -0
  107. metadata +111 -85
  108. data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
  109. data/examples/sinatra_app/Gemfile.lock +0 -166
  110. data/lib/htm/active_record_config.rb +0 -104
  111. data/lib/htm/loaders/defaults_loader.rb +0 -143
  112. data/lib/htm/loaders/xdg_config_loader.rb +0 -116
  113. /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
  114. /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
  115. /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
  116. /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
  117. /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
  118. /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
  119. /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
  120. /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
  121. /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
  122. /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
  123. /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
  124. /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
  125. /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
  126. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
  127. /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
  128. /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
  129. /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
  130. /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
  131. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
  132. /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
  133. /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
  134. /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
  135. /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
  136. /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
  137. /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
  138. /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
  139. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
  140. /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
  141. /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
  142. /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
  143. /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
  144. /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
  145. /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
  146. /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
  147. /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
  148. /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
  149. /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
  150. /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
  151. /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
  152. /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
  153. /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
@@ -49,7 +49,7 @@ defaults:
49
49
  # Access: HTM.config.embedding.provider, HTM.config.embedding.model, etc.
50
50
  # ---------------------------------------------------------------------------
51
51
  embedding:
52
- provider: ollama
52
+ provider: :ollama
53
53
  model: nomic-embed-text:latest
54
54
  dimensions: 768
55
55
  timeout: 120
@@ -65,7 +65,7 @@ defaults:
65
65
  # %{taxonomy_context} - existing taxonomy info or new taxonomy message
66
66
  # ---------------------------------------------------------------------------
67
67
  tag:
68
- provider: ollama
68
+ provider: :ollama
69
69
  model: gemma3:latest
70
70
  timeout: 180
71
71
  max_depth: 4
@@ -91,8 +91,9 @@ defaults:
91
91
  RULES:
92
92
  1. Each concept belongs to ONE path only
93
93
  2. Use lowercase, hyphens for multi-word terms
94
- 3. Return 2-5 tags that best classify this text
95
- 4. Match existing taxonomy paths when applicable
94
+ 3. ALWAYS use SINGULAR forms, never plurals (user NOT users, framework NOT frameworks, model NOT models)
95
+ 4. Return 2-5 tags that best classify this text
96
+ 5. Match existing taxonomy paths when applicable
96
97
 
97
98
  TEXT: %{text}
98
99
 
@@ -109,38 +110,66 @@ defaults:
109
110
  # %{text} - the content to extract propositions from
110
111
  # ---------------------------------------------------------------------------
111
112
  proposition:
112
- provider: ollama
113
+ provider: :ollama
113
114
  model: gemma3:latest
114
115
  timeout: 180
115
116
  enabled: true
117
+ min_length: 10
118
+ max_length: 1000
119
+ min_words: 5
116
120
 
117
121
  system_prompt: |
118
- You are an atomic fact extraction system. Your goal is maximum decomposition.
119
- Break every statement into its smallest possible factual units.
122
+ You are an atomic fact extraction system. Extract factual propositions from text.
120
123
  Output ONLY propositions, one per line, prefixed with a dash (-).
124
+ NEVER ask for clarification or more text - extract what you can from the given text.
125
+ If the text contains no extractable facts, output nothing.
121
126
 
122
127
  user_prompt_template: |
123
- Extract all ATOMIC factual propositions from the following text.
128
+ Extract ATOMIC factual propositions from the following text.
124
129
 
125
- An atomic proposition expresses exactly ONE relationship or fact.
130
+ An atomic proposition:
131
+ - Expresses exactly ONE fact or relationship
132
+ - Is understandable WITHOUT any additional context
133
+ - Uses FULL NAMES, never pronouns (he, she, it, they, this, that)
134
+ - Contains at least 5 words for sufficient context
126
135
 
127
- Rules:
136
+ PRONOUN REPLACEMENT EXAMPLES:
137
+ BAD: "It is important to know the enemy."
138
+ GOOD: "Knowing the enemy is important for spiritual warfare."
139
+
140
+ BAD: "They support thousands of users."
141
+ GOOD: "The Every.to products support thousands of daily users."
142
+
143
+ BAD: "This results in a calculation."
144
+ GOOD: "The age calculation depends on whether the birthday has occurred."
145
+
146
+ CONTEXT ENRICHMENT EXAMPLES:
147
+ BAD: "Wiring costs $1,000."
148
+ GOOD: "Solar panel wiring costs $1,000 for the Oklahoma barndominium project."
149
+
150
+ BAD: "The driveway is gravel."
151
+ GOOD: "Dewayne's barndominium driveway is gravel."
152
+
153
+ RULES:
128
154
  1. Split compound statements into separate atomic facts
129
155
  2. Each proposition = exactly one fact
130
- 3. Use full names, never pronouns
131
- 4. Make each proposition understandable in isolation
156
+ 3. Replace ALL pronouns with the actual names/nouns they refer to
157
+ 4. Include enough context to understand the fact in isolation
158
+ 5. If unsure what a pronoun refers to, omit that proposition
132
159
 
133
160
  TEXT: %{text}
134
161
 
135
162
  Return ONLY atomic propositions, one per line. Use a dash (-) prefix for each.
163
+ If you cannot extract any facts, return nothing. Do NOT ask for more text.
136
164
 
137
165
  # ---------------------------------------------------------------------------
138
166
  # Chunking Configuration (for file loading)
139
- # Access: HTM.config.chunking.size, HTM.config.chunking.overlap
167
+ # Access: HTM.config.chunking.chunk_size, HTM.config.chunking.chunk_overlap
168
+ # Note: Using chunk_size/chunk_overlap to avoid collision with Enumerable#size
140
169
  # ---------------------------------------------------------------------------
141
170
  chunking:
142
- size: 1024
143
- overlap: 64
171
+ chunk_size: 1024
172
+ chunk_overlap: 64
144
173
 
145
174
  # ---------------------------------------------------------------------------
146
175
  # Circuit Breaker Configuration
@@ -167,16 +196,16 @@ defaults:
167
196
  # Access: HTM.config.job.backend
168
197
  # ---------------------------------------------------------------------------
169
198
  job:
170
- backend: fiber
199
+ backend: :fiber
171
200
 
172
201
  # ---------------------------------------------------------------------------
173
202
  # General Settings
174
203
  # Access: HTM.config.week_start, HTM.config.connection_timeout, etc.
175
204
  # ---------------------------------------------------------------------------
176
- week_start: sunday
205
+ week_start: :sunday
177
206
  connection_timeout: 60
178
207
  telemetry_enabled: false
179
- log_level: info
208
+ log_level: :info
180
209
 
181
210
  # ---------------------------------------------------------------------------
182
211
  # Provider Credentials
@@ -222,7 +251,7 @@ defaults:
222
251
  development:
223
252
  database:
224
253
  name: htm_development
225
- log_level: debug
254
+ log_level: :debug
226
255
 
227
256
  # =============================================================================
228
257
  # Test Environment Overrides
@@ -231,8 +260,8 @@ test:
231
260
  database:
232
261
  name: htm_test
233
262
  job:
234
- backend: inline
235
- log_level: warn
263
+ backend: :inline
264
+ log_level: :warn
236
265
  telemetry_enabled: false
237
266
 
238
267
  # =============================================================================
@@ -242,5 +271,16 @@ production:
242
271
  database:
243
272
  pool_size: 25
244
273
  sslmode: require
245
- log_level: warn
274
+ log_level: :warn
246
275
  telemetry_enabled: true
276
+
277
+ # =============================================================================
278
+ # Examples Environment Overrides (for running example scripts)
279
+ # =============================================================================
280
+ examples:
281
+ database:
282
+ name: htm_examples
283
+ job:
284
+ backend: :inline
285
+ log_level: :info
286
+ telemetry_enabled: false
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTM
4
+ class Config
5
+ module Validator
6
+ SUPPORTED_PROVIDERS = %i[
7
+ openai anthropic gemini azure ollama
8
+ huggingface openrouter bedrock deepseek
9
+ ].freeze
10
+
11
+ SUPPORTED_JOB_BACKENDS = %i[active_job sidekiq inline thread fiber].freeze
12
+ SUPPORTED_WEEK_STARTS = %i[sunday monday].freeze
13
+
14
+ def validate_config
15
+ validate_providers
16
+ validate_job_backend
17
+ validate_week_start
18
+ validate_relevance_weights
19
+ end
20
+
21
+ def validate_providers
22
+ validate_provider(:embedding_provider, embedding_provider)
23
+ validate_provider(:tag_provider, tag_provider)
24
+ validate_provider(:proposition_provider, proposition_provider)
25
+ end
26
+
27
+ def validate_provider(name, value)
28
+ return if value.nil?
29
+
30
+ unless SUPPORTED_PROVIDERS.include?(value)
31
+ raise_validation_error("#{name} must be one of: #{SUPPORTED_PROVIDERS.join(', ')} (got #{value.inspect})")
32
+ end
33
+ end
34
+
35
+ def validate_job_backend
36
+ return unless job_backend
37
+
38
+ unless SUPPORTED_JOB_BACKENDS.include?(job_backend)
39
+ raise_validation_error("job.backend must be one of: #{SUPPORTED_JOB_BACKENDS.join(', ')} (got #{job_backend.inspect})")
40
+ end
41
+ end
42
+
43
+ def validate_week_start
44
+ unless SUPPORTED_WEEK_STARTS.include?(week_start)
45
+ raise_validation_error("week_start must be one of: #{SUPPORTED_WEEK_STARTS.join(', ')} (got #{week_start.inspect})")
46
+ end
47
+ end
48
+
49
+ def validate_relevance_weights
50
+ total = relevance_semantic_weight + relevance_tag_weight +
51
+ relevance_recency_weight + relevance_access_weight
52
+
53
+ unless (0.99..1.01).cover?(total)
54
+ raise_validation_error("relevance weights must sum to 1.0 (got #{total})")
55
+ end
56
+ end
57
+
58
+ def validate_callables
59
+ unless @embedding_generator.respond_to?(:call)
60
+ raise HTM::ValidationError, "embedding_generator must be callable"
61
+ end
62
+
63
+ unless @tag_extractor.respond_to?(:call)
64
+ raise HTM::ValidationError, "tag_extractor must be callable"
65
+ end
66
+
67
+ unless @proposition_extractor.respond_to?(:call)
68
+ raise HTM::ValidationError, "proposition_extractor must be callable"
69
+ end
70
+
71
+ unless @token_counter.respond_to?(:call)
72
+ raise HTM::ValidationError, "token_counter must be callable"
73
+ end
74
+ end
75
+
76
+ def validate_logger
77
+ unless @logger.respond_to?(:info) && @logger.respond_to?(:warn) && @logger.respond_to?(:error)
78
+ raise HTM::ValidationError, "logger must respond to :info, :warn, and :error"
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end