htm 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.architecture/decisions/adrs/001-use-postgresql-timescaledb-storage.md +1 -1
- data/.architecture/decisions/adrs/011-database-side-embedding-generation-with-pgai.md +4 -4
- data/.architecture/decisions/adrs/012-llm-driven-ontology-topic-extraction.md +1 -1
- data/.envrc +12 -25
- data/.irbrc +7 -7
- data/.tbls.yml +2 -2
- data/CHANGELOG.md +71 -0
- data/README.md +1 -1
- data/Rakefile +8 -3
- data/SETUP.md +12 -12
- data/bin/htm_mcp +0 -4
- data/db/seed_data/README.md +2 -2
- data/db/seeds.rb +2 -2
- data/docs/api/database.md +37 -37
- data/docs/api/htm.md +1 -1
- data/docs/api/yard/HTM/ActiveRecordConfig.md +2 -2
- data/docs/api/yard/HTM/Configuration.md +26 -15
- data/docs/api/yard/HTM/Database.md +7 -8
- data/docs/api/yard/HTM/JobAdapter.md +1 -1
- data/docs/api/yard/HTM/Railtie.md +2 -2
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
- data/docs/architecture/adrs/011-pgai-integration.md +4 -4
- data/docs/database_rake_tasks.md +5 -5
- data/docs/development/rake-tasks.md +11 -11
- data/docs/development/setup.md +21 -21
- data/docs/development/testing.md +1 -1
- data/docs/getting-started/installation.md +20 -20
- data/docs/getting-started/quick-start.md +12 -12
- data/docs/guides/getting-started.md +2 -2
- data/docs/guides/long-term-memory.md +1 -1
- data/docs/guides/mcp-server.md +17 -17
- data/docs/guides/robot-groups.md +8 -8
- data/docs/index.md +4 -4
- data/docs/multi_framework_support.md +8 -8
- data/docs/setup_local_database.md +19 -19
- data/docs/using_rake_tasks_in_your_app.md +14 -14
- data/examples/README.md +50 -6
- data/examples/basic_usage.rb +31 -21
- data/examples/cli_app/README.md +8 -8
- data/examples/cli_app/htm_cli.rb +5 -5
- data/examples/config_file_example/README.md +256 -0
- data/examples/config_file_example/config/htm.local.yml +34 -0
- data/examples/config_file_example/custom_config.yml +22 -0
- data/examples/config_file_example/show_config.rb +125 -0
- data/examples/custom_llm_configuration.rb +7 -7
- data/examples/example_app/Rakefile +2 -2
- data/examples/example_app/app.rb +8 -8
- data/examples/file_loader_usage.rb +9 -9
- data/examples/mcp_client.rb +5 -5
- data/examples/rails_app/Gemfile.lock +48 -56
- data/examples/rails_app/README.md +1 -1
- data/examples/robot_groups/multi_process.rb +5 -5
- data/examples/robot_groups/robot_worker.rb +5 -5
- data/examples/robot_groups/same_process.rb +9 -9
- data/examples/sinatra_app/app.rb +1 -1
- data/examples/timeframe_demo.rb +1 -1
- data/lib/htm/active_record_config.rb +12 -25
- data/lib/htm/circuit_breaker.rb +0 -2
- data/lib/htm/config/defaults.yml +246 -0
- data/lib/htm/config.rb +888 -0
- data/lib/htm/database.rb +23 -27
- data/lib/htm/embedding_service.rb +0 -4
- data/lib/htm/integrations/sinatra.rb +3 -7
- data/lib/htm/job_adapter.rb +1 -15
- data/lib/htm/jobs/generate_embedding_job.rb +1 -7
- data/lib/htm/jobs/generate_propositions_job.rb +2 -12
- data/lib/htm/jobs/generate_tags_job.rb +1 -8
- data/lib/htm/loaders/defaults_loader.rb +143 -0
- data/lib/htm/loaders/xdg_config_loader.rb +116 -0
- data/lib/htm/mcp/cli.rb +200 -58
- data/lib/htm/mcp/server.rb +3 -3
- data/lib/htm/proposition_service.rb +2 -12
- data/lib/htm/railtie.rb +3 -4
- data/lib/htm/tag_service.rb +1 -8
- data/lib/htm/version.rb +1 -1
- data/lib/htm.rb +124 -5
- metadata +24 -4
- data/config/database.yml +0 -77
- data/lib/htm/configuration.rb +0 -799
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
# HTM Bundled Defaults
|
|
2
|
+
#
|
|
3
|
+
# This file is the SINGLE SOURCE OF TRUTH for HTM configuration schema.
|
|
4
|
+
# All attributes must be declared here (even if nil) to be recognized.
|
|
5
|
+
# It is bundled with the gem and loaded automatically at lowest priority.
|
|
6
|
+
#
|
|
7
|
+
# Loading priority (lowest to highest):
|
|
8
|
+
# 1. This file (bundled defaults)
|
|
9
|
+
# 2. XDG user config (~/.config/htm/htm.yml)
|
|
10
|
+
# 3. Project config (./config/htm.yml)
|
|
11
|
+
# 4. Local overrides (./config/htm.local.yml)
|
|
12
|
+
# 5. Environment variables (HTM_*)
|
|
13
|
+
# 6. Programmatic (HTM.configure block)
|
|
14
|
+
#
|
|
15
|
+
# Structure:
|
|
16
|
+
# - defaults: Base values for all environments (with nested sections)
|
|
17
|
+
# - development: Overrides for development environment
|
|
18
|
+
# - test: Overrides for test environment
|
|
19
|
+
# - production: Overrides for production environment
|
|
20
|
+
|
|
21
|
+
# =============================================================================
|
|
22
|
+
# Shared Defaults (base for all environments)
|
|
23
|
+
# =============================================================================
|
|
24
|
+
defaults:
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Database Configuration
|
|
27
|
+
# Access: HTM.config.database.host, HTM.config.database.port, etc.
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
database:
|
|
30
|
+
url: ~
|
|
31
|
+
host: localhost
|
|
32
|
+
port: 5432
|
|
33
|
+
name: ~
|
|
34
|
+
user: ~
|
|
35
|
+
password: ~
|
|
36
|
+
pool_size: 10
|
|
37
|
+
timeout: 5000
|
|
38
|
+
sslmode: prefer
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Service Configuration
|
|
42
|
+
# Access: HTM.config.service.name
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
service:
|
|
45
|
+
name: htm
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Embedding Configuration
|
|
49
|
+
# Access: HTM.config.embedding.provider, HTM.config.embedding.model, etc.
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
embedding:
|
|
52
|
+
provider: ollama
|
|
53
|
+
model: nomic-embed-text:latest
|
|
54
|
+
dimensions: 768
|
|
55
|
+
timeout: 120
|
|
56
|
+
max_dimension: 2000
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Tag Extraction Configuration
|
|
60
|
+
# Access: HTM.config.tag.provider, HTM.config.tag.model, etc.
|
|
61
|
+
#
|
|
62
|
+
# Prompt templates use %{placeholder} for runtime interpolation:
|
|
63
|
+
# %{text} - the content to extract tags from
|
|
64
|
+
# %{max_depth} - maximum tag hierarchy depth
|
|
65
|
+
# %{taxonomy_context} - existing taxonomy info or new taxonomy message
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
tag:
|
|
68
|
+
provider: ollama
|
|
69
|
+
model: gemma3:latest
|
|
70
|
+
timeout: 180
|
|
71
|
+
max_depth: 4
|
|
72
|
+
|
|
73
|
+
system_prompt: |
|
|
74
|
+
You are a taxonomy classifier that assigns texts to a hierarchical classification tree.
|
|
75
|
+
Each concept has ONE canonical location in the tree.
|
|
76
|
+
Output 2-5 classification paths, one per line.
|
|
77
|
+
|
|
78
|
+
user_prompt_template: |
|
|
79
|
+
Extract classification tags for this text using a HIERARCHICAL TAXONOMY.
|
|
80
|
+
|
|
81
|
+
%{taxonomy_context}
|
|
82
|
+
|
|
83
|
+
TAG FORMAT: domain:category:subcategory:term (colon-separated, max %{max_depth} levels)
|
|
84
|
+
|
|
85
|
+
LEVEL GUIDELINES:
|
|
86
|
+
- Level 1 (domain): Broad field (database, ai, web, security, devops)
|
|
87
|
+
- Level 2 (category): Major subdivision (database:relational, ai:machine-learning)
|
|
88
|
+
- Level 3 (subcategory): Specific area (database:relational:postgresql)
|
|
89
|
+
- Level 4 (term): Fine detail, use sparingly (database:relational:postgresql:extensions)
|
|
90
|
+
|
|
91
|
+
RULES:
|
|
92
|
+
1. Each concept belongs to ONE path only
|
|
93
|
+
2. Use lowercase, hyphens for multi-word terms
|
|
94
|
+
3. Return 2-5 tags that best classify this text
|
|
95
|
+
4. Match existing taxonomy paths when applicable
|
|
96
|
+
|
|
97
|
+
TEXT: %{text}
|
|
98
|
+
|
|
99
|
+
Return ONLY tags, one per line.
|
|
100
|
+
|
|
101
|
+
taxonomy_context_existing: "Existing taxonomy paths: %{sample_tags}\n\nPrefer reusing these paths when the text matches their domain."
|
|
102
|
+
taxonomy_context_empty: "This is a new taxonomy - establish clear root categories."
|
|
103
|
+
|
|
104
|
+
# ---------------------------------------------------------------------------
|
|
105
|
+
# Proposition Extraction Configuration
|
|
106
|
+
# Access: HTM.config.proposition.provider, HTM.config.proposition.model, etc.
|
|
107
|
+
#
|
|
108
|
+
# Prompt templates use %{placeholder} for runtime interpolation:
|
|
109
|
+
# %{text} - the content to extract propositions from
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
proposition:
|
|
112
|
+
provider: ollama
|
|
113
|
+
model: gemma3:latest
|
|
114
|
+
timeout: 180
|
|
115
|
+
enabled: false
|
|
116
|
+
|
|
117
|
+
system_prompt: |
|
|
118
|
+
You are an atomic fact extraction system. Your goal is maximum decomposition.
|
|
119
|
+
Break every statement into its smallest possible factual units.
|
|
120
|
+
Output ONLY propositions, one per line, prefixed with a dash (-).
|
|
121
|
+
|
|
122
|
+
user_prompt_template: |
|
|
123
|
+
Extract all ATOMIC factual propositions from the following text.
|
|
124
|
+
|
|
125
|
+
An atomic proposition expresses exactly ONE relationship or fact.
|
|
126
|
+
|
|
127
|
+
Rules:
|
|
128
|
+
1. Split compound statements into separate atomic facts
|
|
129
|
+
2. Each proposition = exactly one fact
|
|
130
|
+
3. Use full names, never pronouns
|
|
131
|
+
4. Make each proposition understandable in isolation
|
|
132
|
+
|
|
133
|
+
TEXT: %{text}
|
|
134
|
+
|
|
135
|
+
Return ONLY atomic propositions, one per line. Use a dash (-) prefix for each.
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# Chunking Configuration (for file loading)
|
|
139
|
+
# Access: HTM.config.chunking.size, HTM.config.chunking.overlap
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
chunking:
|
|
142
|
+
size: 1024
|
|
143
|
+
overlap: 64
|
|
144
|
+
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
# Circuit Breaker Configuration
|
|
147
|
+
# Access: HTM.config.circuit_breaker.failure_threshold, etc.
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
circuit_breaker:
|
|
150
|
+
failure_threshold: 5
|
|
151
|
+
reset_timeout: 60
|
|
152
|
+
half_open_max_calls: 3
|
|
153
|
+
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
# Relevance Scoring Configuration
|
|
156
|
+
# Access: HTM.config.relevance.semantic_weight, etc.
|
|
157
|
+
# ---------------------------------------------------------------------------
|
|
158
|
+
relevance:
|
|
159
|
+
semantic_weight: 0.5
|
|
160
|
+
tag_weight: 0.3
|
|
161
|
+
recency_weight: 0.1
|
|
162
|
+
access_weight: 0.1
|
|
163
|
+
recency_half_life_hours: 168.0
|
|
164
|
+
|
|
165
|
+
# ---------------------------------------------------------------------------
|
|
166
|
+
# Job Backend Configuration
|
|
167
|
+
# Access: HTM.config.job.backend
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
job:
|
|
170
|
+
backend: ~
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# General Settings
|
|
174
|
+
# Access: HTM.config.week_start, HTM.config.connection_timeout, etc.
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
week_start: sunday
|
|
177
|
+
connection_timeout: 30
|
|
178
|
+
telemetry_enabled: false
|
|
179
|
+
log_level: info
|
|
180
|
+
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
# Provider Credentials
|
|
183
|
+
# Access: HTM.config.providers.openai.api_key, etc.
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
providers:
|
|
186
|
+
openai:
|
|
187
|
+
api_key: ~
|
|
188
|
+
organization: ~
|
|
189
|
+
project: ~
|
|
190
|
+
|
|
191
|
+
anthropic:
|
|
192
|
+
api_key: ~
|
|
193
|
+
|
|
194
|
+
gemini:
|
|
195
|
+
api_key: ~
|
|
196
|
+
|
|
197
|
+
azure:
|
|
198
|
+
api_key: ~
|
|
199
|
+
endpoint: ~
|
|
200
|
+
api_version: '2024-02-01'
|
|
201
|
+
|
|
202
|
+
ollama:
|
|
203
|
+
url: http://localhost:11434
|
|
204
|
+
|
|
205
|
+
huggingface:
|
|
206
|
+
api_key: ~
|
|
207
|
+
|
|
208
|
+
openrouter:
|
|
209
|
+
api_key: ~
|
|
210
|
+
|
|
211
|
+
bedrock:
|
|
212
|
+
access_key: ~
|
|
213
|
+
secret_key: ~
|
|
214
|
+
region: us-east-1
|
|
215
|
+
|
|
216
|
+
deepseek:
|
|
217
|
+
api_key: ~
|
|
218
|
+
|
|
219
|
+
# =============================================================================
|
|
220
|
+
# Development Environment Overrides
|
|
221
|
+
# =============================================================================
|
|
222
|
+
development:
|
|
223
|
+
database:
|
|
224
|
+
name: htm_development
|
|
225
|
+
log_level: debug
|
|
226
|
+
|
|
227
|
+
# =============================================================================
|
|
228
|
+
# Test Environment Overrides
|
|
229
|
+
# =============================================================================
|
|
230
|
+
test:
|
|
231
|
+
database:
|
|
232
|
+
name: htm_test
|
|
233
|
+
job:
|
|
234
|
+
backend: inline
|
|
235
|
+
log_level: warn
|
|
236
|
+
telemetry_enabled: false
|
|
237
|
+
|
|
238
|
+
# =============================================================================
|
|
239
|
+
# Production Environment Overrides
|
|
240
|
+
# =============================================================================
|
|
241
|
+
production:
|
|
242
|
+
database:
|
|
243
|
+
pool_size: 25
|
|
244
|
+
sslmode: require
|
|
245
|
+
log_level: warn
|
|
246
|
+
telemetry_enabled: true
|