htm 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +1 -1
  3. data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +4 -4
  4. data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +1 -1
  5. data/.envrc +12 -25
  6. data/.irbrc +7 -7
  7. data/.tbls.yml +2 -2
  8. data/CHANGELOG.md +71 -0
  9. data/README.md +1 -1
  10. data/Rakefile +8 -3
  11. data/SETUP.md +12 -12
  12. data/bin/htm_mcp +0 -4
  13. data/db/seed_data/README.md +2 -2
  14. data/db/seeds.rb +2 -2
  15. data/docs/api/database.md +37 -37
  16. data/docs/api/htm.md +1 -1
  17. data/docs/api/yard/HTM/ActiveRecordConfig.md +2 -2
  18. data/docs/api/yard/HTM/Configuration.md +26 -15
  19. data/docs/api/yard/HTM/Database.md +7 -8
  20. data/docs/api/yard/HTM/JobAdapter.md +1 -1
  21. data/docs/api/yard/HTM/Railtie.md +2 -2
  22. data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
  23. data/docs/architecture/adrs/011-pgai-integration.md +4 -4
  24. data/docs/database_rake_tasks.md +5 -5
  25. data/docs/development/rake-tasks.md +11 -11
  26. data/docs/development/setup.md +21 -21
  27. data/docs/development/testing.md +1 -1
  28. data/docs/getting-started/installation.md +20 -20
  29. data/docs/getting-started/quick-start.md +12 -12
  30. data/docs/guides/getting-started.md +2 -2
  31. data/docs/guides/long-term-memory.md +1 -1
  32. data/docs/guides/mcp-server.md +17 -17
  33. data/docs/guides/robot-groups.md +8 -8
  34. data/docs/index.md +4 -4
  35. data/docs/multi_framework_support.md +8 -8
  36. data/docs/setup_local_database.md +19 -19
  37. data/docs/using_rake_tasks_in_your_app.md +14 -14
  38. data/examples/README.md +50 -6
  39. data/examples/basic_usage.rb +31 -21
  40. data/examples/cli_app/README.md +8 -8
  41. data/examples/cli_app/htm_cli.rb +5 -5
  42. data/examples/config_file_example/README.md +256 -0
  43. data/examples/config_file_example/config/htm.local.yml +34 -0
  44. data/examples/config_file_example/custom_config.yml +22 -0
  45. data/examples/config_file_example/show_config.rb +125 -0
  46. data/examples/custom_llm_configuration.rb +7 -7
  47. data/examples/example_app/Rakefile +2 -2
  48. data/examples/example_app/app.rb +8 -8
  49. data/examples/file_loader_usage.rb +9 -9
  50. data/examples/mcp_client.rb +5 -5
  51. data/examples/rails_app/Gemfile.lock +48 -56
  52. data/examples/rails_app/README.md +1 -1
  53. data/examples/robot_groups/multi_process.rb +5 -5
  54. data/examples/robot_groups/robot_worker.rb +5 -5
  55. data/examples/robot_groups/same_process.rb +9 -9
  56. data/examples/sinatra_app/app.rb +1 -1
  57. data/examples/timeframe_demo.rb +1 -1
  58. data/lib/htm/active_record_config.rb +12 -25
  59. data/lib/htm/circuit_breaker.rb +0 -2
  60. data/lib/htm/config/defaults.yml +246 -0
  61. data/lib/htm/config.rb +888 -0
  62. data/lib/htm/database.rb +23 -27
  63. data/lib/htm/embedding_service.rb +0 -4
  64. data/lib/htm/integrations/sinatra.rb +3 -7
  65. data/lib/htm/job_adapter.rb +1 -15
  66. data/lib/htm/jobs/generate_embedding_job.rb +1 -7
  67. data/lib/htm/jobs/generate_propositions_job.rb +2 -12
  68. data/lib/htm/jobs/generate_tags_job.rb +1 -8
  69. data/lib/htm/loaders/defaults_loader.rb +143 -0
  70. data/lib/htm/loaders/xdg_config_loader.rb +116 -0
  71. data/lib/htm/mcp/cli.rb +200 -58
  72. data/lib/htm/mcp/server.rb +3 -3
  73. data/lib/htm/proposition_service.rb +2 -12
  74. data/lib/htm/railtie.rb +3 -4
  75. data/lib/htm/tag_service.rb +1 -8
  76. data/lib/htm/version.rb +1 -1
  77. data/lib/htm.rb +124 -5
  78. metadata +24 -4
  79. data/config/database.yml +0 -77
  80. data/lib/htm/configuration.rb +0 -799
@@ -0,0 +1,246 @@
1
+ # HTM Bundled Defaults
2
+ #
3
+ # This file is the SINGLE SOURCE OF TRUTH for HTM configuration schema.
4
+ # All attributes must be declared here (even if nil) to be recognized.
5
+ # It is bundled with the gem and loaded automatically at lowest priority.
6
+ #
7
+ # Loading priority (lowest to highest):
8
+ # 1. This file (bundled defaults)
9
+ # 2. XDG user config (~/.config/htm/htm.yml)
10
+ # 3. Project config (./config/htm.yml)
11
+ # 4. Local overrides (./config/htm.local.yml)
12
+ # 5. Environment variables (HTM_*)
13
+ # 6. Programmatic (HTM.configure block)
14
+ #
15
+ # Structure:
16
+ # - defaults: Base values for all environments (with nested sections)
17
+ # - development: Overrides for development environment
18
+ # - test: Overrides for test environment
19
+ # - production: Overrides for production environment
20
+
21
+ # =============================================================================
22
+ # Shared Defaults (base for all environments)
23
+ # =============================================================================
24
+ defaults:
25
+ # ---------------------------------------------------------------------------
26
+ # Database Configuration
27
+ # Access: HTM.config.database.host, HTM.config.database.port, etc.
28
+ # ---------------------------------------------------------------------------
29
+ database:
30
+ url: ~
31
+ host: localhost
32
+ port: 5432
33
+ name: ~
34
+ user: ~
35
+ password: ~
36
+ pool_size: 10
37
+ timeout: 5000
38
+ sslmode: prefer
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Service Configuration
42
+ # Access: HTM.config.service.name
43
+ # ---------------------------------------------------------------------------
44
+ service:
45
+ name: htm
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Embedding Configuration
49
+ # Access: HTM.config.embedding.provider, HTM.config.embedding.model, etc.
50
+ # ---------------------------------------------------------------------------
51
+ embedding:
52
+ provider: ollama
53
+ model: nomic-embed-text:latest
54
+ dimensions: 768
55
+ timeout: 120
56
+ max_dimension: 2000
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Tag Extraction Configuration
60
+ # Access: HTM.config.tag.provider, HTM.config.tag.model, etc.
61
+ #
62
+ # Prompt templates use %{placeholder} for runtime interpolation:
63
+ # %{text} - the content to extract tags from
64
+ # %{max_depth} - maximum tag hierarchy depth
65
+ # %{taxonomy_context} - existing taxonomy info or new taxonomy message
66
+ # ---------------------------------------------------------------------------
67
+ tag:
68
+ provider: ollama
69
+ model: gemma3:latest
70
+ timeout: 180
71
+ max_depth: 4
72
+
73
+ system_prompt: |
74
+ You are a taxonomy classifier that assigns texts to a hierarchical classification tree.
75
+ Each concept has ONE canonical location in the tree.
76
+ Output 2-5 classification paths, one per line.
77
+
78
+ user_prompt_template: |
79
+ Extract classification tags for this text using a HIERARCHICAL TAXONOMY.
80
+
81
+ %{taxonomy_context}
82
+
83
+ TAG FORMAT: domain:category:subcategory:term (colon-separated, max %{max_depth} levels)
84
+
85
+ LEVEL GUIDELINES:
86
+ - Level 1 (domain): Broad field (database, ai, web, security, devops)
87
+ - Level 2 (category): Major subdivision (database:relational, ai:machine-learning)
88
+ - Level 3 (subcategory): Specific area (database:relational:postgresql)
89
+ - Level 4 (term): Fine detail, use sparingly (database:relational:postgresql:extensions)
90
+
91
+ RULES:
92
+ 1. Each concept belongs to ONE path only
93
+ 2. Use lowercase, hyphens for multi-word terms
94
+ 3. Return 2-5 tags that best classify this text
95
+ 4. Match existing taxonomy paths when applicable
96
+
97
+ TEXT: %{text}
98
+
99
+ Return ONLY tags, one per line.
100
+
101
+ taxonomy_context_existing: "Existing taxonomy paths: %{sample_tags}\n\nPrefer reusing these paths when the text matches their domain."
102
+ taxonomy_context_empty: "This is a new taxonomy - establish clear root categories."
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # Proposition Extraction Configuration
106
+ # Access: HTM.config.proposition.provider, HTM.config.proposition.model, etc.
107
+ #
108
+ # Prompt templates use %{placeholder} for runtime interpolation:
109
+ # %{text} - the content to extract propositions from
110
+ # ---------------------------------------------------------------------------
111
+ proposition:
112
+ provider: ollama
113
+ model: gemma3:latest
114
+ timeout: 180
115
+ enabled: false
116
+
117
+ system_prompt: |
118
+ You are an atomic fact extraction system. Your goal is maximum decomposition.
119
+ Break every statement into its smallest possible factual units.
120
+ Output ONLY propositions, one per line, prefixed with a dash (-).
121
+
122
+ user_prompt_template: |
123
+ Extract all ATOMIC factual propositions from the following text.
124
+
125
+ An atomic proposition expresses exactly ONE relationship or fact.
126
+
127
+ Rules:
128
+ 1. Split compound statements into separate atomic facts
129
+ 2. Each proposition = exactly one fact
130
+ 3. Use full names, never pronouns
131
+ 4. Make each proposition understandable in isolation
132
+
133
+ TEXT: %{text}
134
+
135
+ Return ONLY atomic propositions, one per line. Use a dash (-) prefix for each.
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # Chunking Configuration (for file loading)
139
+ # Access: HTM.config.chunking.size, HTM.config.chunking.overlap
140
+ # ---------------------------------------------------------------------------
141
+ chunking:
142
+ size: 1024
143
+ overlap: 64
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # Circuit Breaker Configuration
147
+ # Access: HTM.config.circuit_breaker.failure_threshold, etc.
148
+ # ---------------------------------------------------------------------------
149
+ circuit_breaker:
150
+ failure_threshold: 5
151
+ reset_timeout: 60
152
+ half_open_max_calls: 3
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Relevance Scoring Configuration
156
+ # Access: HTM.config.relevance.semantic_weight, etc.
157
+ # ---------------------------------------------------------------------------
158
+ relevance:
159
+ semantic_weight: 0.5
160
+ tag_weight: 0.3
161
+ recency_weight: 0.1
162
+ access_weight: 0.1
163
+ recency_half_life_hours: 168.0
164
+
165
+ # ---------------------------------------------------------------------------
166
+ # Job Backend Configuration
167
+ # Access: HTM.config.job.backend
168
+ # ---------------------------------------------------------------------------
169
+ job:
170
+ backend: ~
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # General Settings
174
+ # Access: HTM.config.week_start, HTM.config.connection_timeout, etc.
175
+ # ---------------------------------------------------------------------------
176
+ week_start: sunday
177
+ connection_timeout: 30
178
+ telemetry_enabled: false
179
+ log_level: info
180
+
181
+ # ---------------------------------------------------------------------------
182
+ # Provider Credentials
183
+ # Access: HTM.config.providers.openai.api_key, etc.
184
+ # ---------------------------------------------------------------------------
185
+ providers:
186
+ openai:
187
+ api_key: ~
188
+ organization: ~
189
+ project: ~
190
+
191
+ anthropic:
192
+ api_key: ~
193
+
194
+ gemini:
195
+ api_key: ~
196
+
197
+ azure:
198
+ api_key: ~
199
+ endpoint: ~
200
+ api_version: '2024-02-01'
201
+
202
+ ollama:
203
+ url: http://localhost:11434
204
+
205
+ huggingface:
206
+ api_key: ~
207
+
208
+ openrouter:
209
+ api_key: ~
210
+
211
+ bedrock:
212
+ access_key: ~
213
+ secret_key: ~
214
+ region: us-east-1
215
+
216
+ deepseek:
217
+ api_key: ~
218
+
219
+ # =============================================================================
220
+ # Development Environment Overrides
221
+ # =============================================================================
222
+ development:
223
+ database:
224
+ name: htm_development
225
+ log_level: debug
226
+
227
+ # =============================================================================
228
+ # Test Environment Overrides
229
+ # =============================================================================
230
+ test:
231
+ database:
232
+ name: htm_test
233
+ job:
234
+ backend: inline
235
+ log_level: warn
236
+ telemetry_enabled: false
237
+
238
+ # =============================================================================
239
+ # Production Environment Overrides
240
+ # =============================================================================
241
+ production:
242
+ database:
243
+ pool_size: 25
244
+ sslmode: require
245
+ log_level: warn
246
+ telemetry_enabled: true