htm 0.0.11 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.dictate.toml +46 -0
  3. data/.envrc +2 -0
  4. data/CHANGELOG.md +52 -2
  5. data/README.md +79 -0
  6. data/Rakefile +14 -2
  7. data/bin/htm_mcp.rb +94 -0
  8. data/config/database.yml +20 -13
  9. data/db/migrate/00010_add_soft_delete_to_associations.rb +29 -0
  10. data/db/migrate/00011_add_performance_indexes.rb +21 -0
  11. data/db/migrate/00012_add_tags_trigram_index.rb +18 -0
  12. data/db/migrate/00013_enable_lz4_compression.rb +43 -0
  13. data/db/schema.sql +49 -92
  14. data/docs/api/index.md +1 -1
  15. data/docs/api/yard/HTM.md +2 -4
  16. data/docs/architecture/index.md +1 -1
  17. data/docs/development/index.md +1 -1
  18. data/docs/getting-started/index.md +1 -1
  19. data/docs/guides/index.md +1 -1
  20. data/docs/images/telemetry-architecture.svg +153 -0
  21. data/docs/telemetry.md +391 -0
  22. data/examples/README.md +46 -1
  23. data/examples/cli_app/README.md +1 -1
  24. data/examples/cli_app/htm_cli.rb +1 -1
  25. data/examples/sinatra_app/app.rb +1 -1
  26. data/examples/telemetry/README.md +147 -0
  27. data/examples/telemetry/SETUP_README.md +169 -0
  28. data/examples/telemetry/demo.rb +498 -0
  29. data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
  30. data/lib/htm/configuration.rb +261 -70
  31. data/lib/htm/database.rb +46 -22
  32. data/lib/htm/embedding_service.rb +24 -14
  33. data/lib/htm/errors.rb +15 -1
  34. data/lib/htm/jobs/generate_embedding_job.rb +19 -0
  35. data/lib/htm/jobs/generate_propositions_job.rb +103 -0
  36. data/lib/htm/jobs/generate_tags_job.rb +24 -0
  37. data/lib/htm/loaders/markdown_chunker.rb +79 -0
  38. data/lib/htm/loaders/markdown_loader.rb +41 -15
  39. data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
  40. data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
  41. data/lib/htm/long_term_memory/node_operations.rb +209 -0
  42. data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
  43. data/lib/htm/long_term_memory/robot_operations.rb +34 -0
  44. data/lib/htm/long_term_memory/tag_operations.rb +428 -0
  45. data/lib/htm/long_term_memory/vector_search.rb +109 -0
  46. data/lib/htm/long_term_memory.rb +51 -1153
  47. data/lib/htm/models/node.rb +35 -2
  48. data/lib/htm/models/node_tag.rb +31 -0
  49. data/lib/htm/models/robot_node.rb +31 -0
  50. data/lib/htm/models/tag.rb +44 -0
  51. data/lib/htm/proposition_service.rb +169 -0
  52. data/lib/htm/query_cache.rb +214 -0
  53. data/lib/htm/sql_builder.rb +178 -0
  54. data/lib/htm/tag_service.rb +16 -6
  55. data/lib/htm/tasks.rb +8 -2
  56. data/lib/htm/telemetry.rb +224 -0
  57. data/lib/htm/version.rb +1 -1
  58. data/lib/htm.rb +64 -3
  59. data/lib/tasks/doc.rake +1 -1
  60. data/lib/tasks/htm.rake +259 -13
  61. data/mkdocs.yml +96 -96
  62. metadata +42 -16
  63. data/.aigcm_msg +0 -1
  64. data/.claude/settings.local.json +0 -95
  65. data/CLAUDE.md +0 -603
  66. data/examples/cli_app/temp.log +0 -93
  67. data/lib/htm/loaders/paragraph_chunker.rb +0 -112
  68. data/notes/ARCHITECTURE_REVIEW.md +0 -1167
  69. data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
  70. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
  71. data/notes/next_steps.md +0 -100
  72. data/notes/plan.md +0 -627
  73. data/notes/tag_ontology_enhancement_ideas.md +0 -222
  74. data/notes/timescaledb_removal_summary.md +0 -200
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c6e41109b86fd5c51c8f96cec8a11ea9455035da54d72ae461b3b2f0a7ab7a8d
4
- data.tar.gz: 1fe15917933aefa827a12c6e92406711ab95449097cb78bdab4c0f6510f595c1
3
+ metadata.gz: 5658951570d47a6988a3e6c5078ae90c83a8bbbaebd4bc18b8a70152d0648ab1
4
+ data.tar.gz: e4216f5bbe7cbbcfb1b78058f16fb5c190a8902f4778874c858acf7401f0da14
5
5
  SHA512:
6
- metadata.gz: c02e326797a6986e1a5b987bec948f09c0ffc7cb45045898e3fc6faf5c57c9bf3ca4d399324e88528f994c7e3e1989bb71f462aae8adbb270b7c99654aa135d3
7
- data.tar.gz: 4ee90ea84f36ab2d85c72cae4f5a1382e9a174d354f2d976d73b8915cb5dc8df68f6ae799ec082076293ce9b386500b94b4fe12d291903c0804351f5fd027151
6
+ metadata.gz: 3d206a81c9120d7c2ffb8a8afb41056ee93a9b614a7abfabe1e14196e60570daaab048e37cc74601bd04afed0af5290616ff925a7610dc37e8816cb4d797956a
7
+ data.tar.gz: d2977a3e77e2d7a0b6cd886d3edadbfe8c04b3af55e4280d8b70393de41ad019a76fe12e19c1bf141d3f6b604ac4a221caca5ea914c64985521a4ccbe2fbfce9
data/.dictate.toml ADDED
@@ -0,0 +1,46 @@
1
+ # Dictator Configuration for HTM
2
+ # Decree-based structural enforcement
3
+ # https://github.com/seuros/dictator
4
+
5
+ [decree.supreme]
6
+ # Universal structural rules (applies to ALL files)
7
+ trailing_whitespace = "deny"
8
+ tabs_vs_spaces = "spaces"
9
+ tab_width = 2
10
+ final_newline = "require"
11
+ line_endings = "lf"
12
+ blank_line_whitespace = "deny"
13
+
14
+ [decree.ruby]
15
+ # Ruby-specific structural enforcement
16
+ max_line_length = 120
17
+ max_lines = 300
18
+ ignore_comments = true
19
+ ignore_blank_lines = true
20
+ method_visibility_order = ["public", "protected", "private"]
21
+ comment_spacing = true
22
+
23
+ [decree.ruby.linter]
24
+ # External linter - Dictator adds: -A --format json
25
+ command = "rubocop"
26
+
27
+ [decree.frontmatter]
28
+ # YAML frontmatter ordering for .md files
29
+ order = ["title", "slug", "pubDate", "description", "tags", "draft"]
30
+ required = ["title"]
31
+
32
+ # Disable unused language decrees
33
+ [decree.typescript]
34
+ enabled = false
35
+
36
+ [decree.golang]
37
+ enabled = false
38
+
39
+ [decree.rust]
40
+ enabled = false
41
+
42
+ [decree.python]
43
+ enabled = false
44
+
45
+ [decree.kjr]
46
+ enabled = false
data/.envrc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  export RR=`pwd`
4
4
 
5
+ export HTM_EXTRACT_PROPOSITIONS=true
6
+
5
7
  # Database connection - Localhost PostgreSQL
6
8
  export HTM_DBHOST=localhost
7
9
  export HTM_DBPORT=5432
data/CHANGELOG.md CHANGED
@@ -7,6 +7,56 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.0.14] - 2025-12-05
11
+
12
+ ### Added
13
+ - **OpenTelemetry metrics** - Optional observability with zero overhead when disabled
14
+ - New `HTM::Telemetry` module with null object pattern
15
+ - Metrics: `htm.jobs` (counter), `htm.embedding.latency`, `htm.tag.latency`, `htm.search.latency` (histograms), `htm.cache.operations` (counter)
16
+ - Instrumented: `GenerateEmbeddingJob`, `GenerateTagsJob`, search methods, `QueryCache`
17
+ - Enable via `HTM_TELEMETRY_ENABLED=true` or `config.telemetry_enabled = true`
18
+ - Works with 50+ OTLP-compatible backends (Jaeger, Prometheus, Datadog, etc.)
19
+ - Comprehensive documentation in `docs/telemetry.md`
20
+ - 18 new telemetry tests
21
+ - **Telemetry demo with Grafana visualization** - `examples/telemetry/`
22
+ - Live dashboard showing HTM metrics (job counts, latencies, cache hit rates)
23
+ - Uses Homebrew-installed Prometheus and Grafana (no Docker required)
24
+ - Auto-configures Prometheus scrape target
25
+ - Pre-built Grafana dashboard JSON for easy import
26
+ - Interactive shutdown prompt for service management
27
+ - **`htm:db:create` rake task** - Create database if it doesn't exist (respects `RAILS_ENV`)
28
+
29
+ ### Changed
30
+ - **All `htm:db:*` tasks now respect `RAILS_ENV`** - Following Rails conventions
31
+ - Database selection based on environment: `htm_development`, `htm_test`, `htm_production`
32
+ - `rake test` automatically sets `RAILS_ENV=test`
33
+ - Example: `RAILS_ENV=test rake htm:db:setup` operates on `htm_test`
34
+ - **`config/database.yml` refactored** - Extracts base name from `HTM_DBURL` and appends environment suffix
35
+ - `HTM_DBURL=postgresql://...htm_development` + `RAILS_ENV=test` → connects to `htm_test`
36
+ - **`HTM::Database.default_config` now respects `RAILS_ENV`** - Uses `ActiveRecordConfig.load_database_config`
37
+ - **Renamed `htm:db:test` to `htm:db:verify`** - Avoids naming collision with test database namespace
38
+ - `htm:db:verify` verifies database connection
39
+ - `RAILS_ENV=test rake htm:db:*` operates on test database
40
+
41
+ ## [0.0.13] - 2025-12-04
42
+
43
+ ### Changed
44
+ - **MarkdownChunker now uses Baran gem** - Replaced custom ParagraphChunker with Baran's `MarkdownSplitter`
45
+ - Respects markdown structure (headers, code blocks, horizontal rules)
46
+ - Configurable `chunk_size` and `chunk_overlap` settings
47
+ - Returns cursor positions for each chunk
48
+ - **Fuzzy tag search with trigram matching** - Tags now searchable with fuzzy matching via pg_trgm
49
+ - **LongTermMemory modularization** - Refactored into separate concerns for better maintainability
50
+ - Search optimizations for vector, fulltext, and hybrid strategies
51
+
52
+ ### Fixed
53
+ - **Configurable limits** - The following are now configurable (previously hard-coded):
54
+ - `max_embedding_dimension` (default: 2000)
55
+ - `max_tag_depth` (default: 4)
56
+ - Circuit breaker settings: `failure_threshold`, `reset_timeout`, `half_open_max_calls`
57
+ - Relevance scoring weights: `semantic_weight`, `tag_weight`, `recency_weight`, `access_weight`
58
+ - `relevance_recency_half_life_hours` (default: 168 = 1 week)
59
+
10
60
  ## [0.0.11] - 2025-12-02
11
61
 
12
62
  ### Added
@@ -260,7 +310,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
260
310
  - **Markdown file loader** - Load markdown files into long-term memory
261
311
  - `FileSource` model to track loaded files with metadata and sync status
262
312
  - `MarkdownLoader` with YAML frontmatter extraction
263
- - `ParagraphChunker` for splitting content into semantic chunks
313
+ - `MarkdownChunker` for splitting content into semantic chunks (uses Baran gem)
264
314
  - DELTA_TIME tolerance (5 seconds) for reliable file change detection
265
315
  - **New HTM API methods** for file operations:
266
316
  - `htm.load_file(path, force: false)` - Load single markdown file
@@ -347,7 +397,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
347
397
  ## [0.0.1] - 2025-10-25
348
398
 
349
399
  ### Added
350
- - Initial release of HTM (Hierarchical Temporary Memory)
400
+ - Initial release of HTM (Hierarchical Temporal Memory)
351
401
  - Two-tier memory system:
352
402
  - Working memory: Token-limited, in-memory active context
353
403
  - Long-term memory: Durable PostgreSQL/TimescaleDB storage
data/README.md CHANGED
@@ -68,6 +68,12 @@
68
68
  - Source file tracking with re-sync support
69
69
  - YAML frontmatter extraction as metadata
70
70
 
71
+ - **Telemetry (OpenTelemetry)**
72
+ - Optional metrics collection via OpenTelemetry
73
+ - Zero overhead when disabled (null object pattern)
74
+ - Works with 50+ backends (Jaeger, Prometheus, Datadog, etc.)
75
+ - Tracks job latency, search performance, and cache effectiveness
76
+
71
77
  ## Installation
72
78
 
73
79
  Add this line to your application's Gemfile:
@@ -373,6 +379,68 @@ rake htm:jobs:clear_all
373
379
 
374
380
  See `rake -T htm:jobs` for complete list of job management tasks.
375
381
 
382
+ ## Telemetry (OpenTelemetry)
383
+
384
+ HTM includes optional OpenTelemetry-based metrics for production observability. Telemetry is **disabled by default** with zero overhead when off.
385
+
386
+ ### Enabling Telemetry
387
+
388
+ ```ruby
389
+ HTM.configure do |config|
390
+ config.telemetry_enabled = true
391
+ end
392
+
393
+ # Or via environment variable
394
+ # HTM_TELEMETRY_ENABLED=true
395
+ ```
396
+
397
+ ### Configuring the Destination
398
+
399
+ HTM emits metrics via standard OpenTelemetry protocols. Configure your destination using environment variables:
400
+
401
+ ```bash
402
+ # Export to any OTLP-compatible backend
403
+ export OTEL_METRICS_EXPORTER="otlp"
404
+ export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318"
405
+ ```
406
+
407
+ ### Available Metrics
408
+
409
+ | Metric | Type | Attributes | Description |
410
+ |--------|------|------------|-------------|
411
+ | `htm.jobs` | Counter | `job`, `status` | Job execution counts (embedding, tags) |
412
+ | `htm.embedding.latency` | Histogram | `provider`, `status` | Embedding generation time (ms) |
413
+ | `htm.tag.latency` | Histogram | `provider`, `status` | Tag extraction time (ms) |
414
+ | `htm.search.latency` | Histogram | `strategy` | Search operation time (ms) |
415
+ | `htm.cache.operations` | Counter | `operation` | Cache hits/misses |
416
+
417
+ ### Compatible Backends
418
+
419
+ HTM works with any OTLP-compatible observability platform:
420
+
421
+ **Open Source:** Jaeger, Prometheus, Grafana Tempo/Mimir, SigNoz, Uptrace
422
+
423
+ **Commercial:** Datadog, New Relic, Honeycomb, Splunk, Dynatrace, AWS X-Ray, Google Cloud Trace, Azure Monitor
424
+
425
+ ### Optional Dependencies
426
+
427
+ Users who want telemetry should add these gems:
428
+
429
+ ```ruby
430
+ gem 'opentelemetry-sdk'
431
+ gem 'opentelemetry-metrics-sdk'
432
+ gem 'opentelemetry-exporter-otlp' # For OTLP export
433
+ ```
434
+
435
+ ### Design
436
+
437
+ HTM uses a **null object pattern** for telemetry. When disabled or when the SDK is not installed:
438
+ - All metric operations are no-ops
439
+ - Zero runtime overhead
440
+ - No errors or exceptions
441
+
442
+ See [docs/telemetry.md](docs/telemetry.md) for detailed configuration and usage examples.
443
+
376
444
  ## Configuration
377
445
 
378
446
  HTM uses dependency injection for LLM access, allowing you to configure embedding generation, tag extraction, logging, and token counting.
@@ -1160,6 +1228,17 @@ export HTM_LOG_LEVEL="INFO" # Default
1160
1228
 
1161
1229
  This is used by the default logger when `HTM.configure` is called without a custom logger.
1162
1230
 
1231
+ #### HTM_TELEMETRY_ENABLED
1232
+
1233
+ Enable OpenTelemetry metrics collection:
1234
+
1235
+ ```bash
1236
+ export HTM_TELEMETRY_ENABLED="true" # Enable telemetry
1237
+ export HTM_TELEMETRY_ENABLED="false" # Disable (default)
1238
+ ```
1239
+
1240
+ When enabled, HTM emits metrics to configured OpenTelemetry collectors. See [Telemetry](#telemetry-opentelemetry) for details.
1241
+
1163
1242
  ### Quick Setup Examples
1164
1243
 
1165
1244
  #### Local Development (PostgreSQL)
data/Rakefile CHANGED
@@ -10,6 +10,18 @@ Rake::TestTask.new(:test) do |t|
10
10
  t.verbose = true
11
11
  end
12
12
 
13
+ # Ensure test task runs with RAILS_ENV=test
14
+ task :test do
15
+ ENV['RAILS_ENV'] = 'test'
16
+ end
17
+
18
+ # Prepend environment setup before test runs
19
+ Rake::Task[:test].enhance [:set_test_env]
20
+
21
+ task :set_test_env do
22
+ ENV['RAILS_ENV'] = 'test'
23
+ end
24
+
13
25
  task default: :test
14
26
 
15
27
  # Load HTM database tasks from lib/tasks/htm.rake
@@ -20,8 +32,8 @@ require_relative "lib/htm/tasks"
20
32
  desc "Run database setup (deprecated: use htm:db:setup)"
21
33
  task :db_setup => "htm:db:setup"
22
34
 
23
- desc "Test database connection (deprecated: use htm:db:test)"
24
- task :db_test => "htm:db:test"
35
+ desc "Verify database connection (deprecated: use htm:db:verify)"
36
+ task :db_test => "htm:db:verify"
25
37
 
26
38
  desc "Run example"
27
39
  task :example do
data/bin/htm_mcp.rb CHANGED
@@ -395,6 +395,98 @@ class ListTagsTool < FastMcp::Tool
395
395
  end
396
396
  end
397
397
 
398
+ # Tool: Search tags with fuzzy matching
399
+ class SearchTagsTool < FastMcp::Tool
400
+ description "Search for tags using fuzzy matching (typo-tolerant). Use this when you're unsure of exact tag names."
401
+
402
+ arguments do
403
+ required(:query).filled(:string).description("Search query - can contain typos (e.g., 'postgrsql' finds 'database:postgresql')")
404
+ optional(:limit).filled(:integer).description("Maximum number of results (default: 20)")
405
+ optional(:min_similarity).filled(:float).description("Minimum similarity threshold 0.0-1.0 (default: 0.3, lower = more fuzzy)")
406
+ end
407
+
408
+ def call(query:, limit: 20, min_similarity: 0.3)
409
+ MCP_STDERR_LOG.info "SearchTagsTool called: query=#{query.inspect}, limit=#{limit}, min_similarity=#{min_similarity}"
410
+
411
+ htm = MCPSession.htm_instance
412
+ ltm = htm.instance_variable_get(:@long_term_memory)
413
+
414
+ results = ltm.search_tags(query, limit: limit, min_similarity: min_similarity)
415
+
416
+ # Enrich with node counts
417
+ tags = results.map do |result|
418
+ tag = HTM::Models::Tag.find_by(name: result[:name])
419
+ {
420
+ name: result[:name],
421
+ similarity: result[:similarity].round(3),
422
+ node_count: tag&.nodes&.count || 0
423
+ }
424
+ end
425
+
426
+ MCP_STDERR_LOG.info "SearchTagsTool complete: found #{tags.length} tags"
427
+
428
+ {
429
+ success: true,
430
+ query: query,
431
+ min_similarity: min_similarity,
432
+ count: tags.length,
433
+ tags: tags
434
+ }.to_json
435
+ end
436
+ end
437
+
438
+ # Tool: Find nodes by topic with fuzzy option
439
+ class FindByTopicTool < FastMcp::Tool
440
+ description "Find memory nodes by topic/tag with optional fuzzy matching for typo tolerance"
441
+
442
+ arguments do
443
+ required(:topic).filled(:string).description("Topic or tag to search for (e.g., 'database:postgresql' or 'postgrsql' with fuzzy)")
444
+ optional(:fuzzy).filled(:bool).description("Enable fuzzy matching for typo tolerance (default: false)")
445
+ optional(:exact).filled(:bool).description("Require exact tag match (default: false, uses prefix matching)")
446
+ optional(:limit).filled(:integer).description("Maximum number of results (default: 20)")
447
+ optional(:min_similarity).filled(:float).description("Minimum similarity for fuzzy mode (default: 0.3)")
448
+ end
449
+
450
+ def call(topic:, fuzzy: false, exact: false, limit: 20, min_similarity: 0.3)
451
+ MCP_STDERR_LOG.info "FindByTopicTool called: topic=#{topic.inspect}, fuzzy=#{fuzzy}, exact=#{exact}"
452
+
453
+ htm = MCPSession.htm_instance
454
+ ltm = htm.instance_variable_get(:@long_term_memory)
455
+
456
+ nodes = ltm.nodes_by_topic(
457
+ topic,
458
+ fuzzy: fuzzy,
459
+ exact: exact,
460
+ min_similarity: min_similarity,
461
+ limit: limit
462
+ )
463
+
464
+ # Enrich with tags
465
+ results = nodes.map do |node_attrs|
466
+ node = HTM::Models::Node.includes(:tags).find_by(id: node_attrs['id'])
467
+ next unless node
468
+
469
+ {
470
+ id: node.id,
471
+ content: node.content[0..200],
472
+ tags: node.tags.map(&:name),
473
+ created_at: node.created_at.iso8601
474
+ }
475
+ end.compact
476
+
477
+ MCP_STDERR_LOG.info "FindByTopicTool complete: found #{results.length} nodes"
478
+
479
+ {
480
+ success: true,
481
+ topic: topic,
482
+ fuzzy: fuzzy,
483
+ exact: exact,
484
+ count: results.length,
485
+ results: results
486
+ }.to_json
487
+ end
488
+ end
489
+
398
490
  # Tool: Get memory statistics
399
491
  class StatsTool < FastMcp::Tool
400
492
  description "Get statistics about HTM memory usage"
@@ -516,6 +608,8 @@ server.register_tool(RecallTool)
516
608
  server.register_tool(ForgetTool)
517
609
  server.register_tool(RestoreTool)
518
610
  server.register_tool(ListTagsTool)
611
+ server.register_tool(SearchTagsTool) # Fuzzy tag search with typo tolerance
612
+ server.register_tool(FindByTopicTool) # Find nodes by topic with fuzzy option
519
613
  server.register_tool(StatsTool)
520
614
 
521
615
  # Register resources
data/config/database.yml CHANGED
@@ -2,36 +2,46 @@
2
2
  # Uses ERB to read from environment variables
3
3
  #
4
4
  # Priority:
5
- # 1. HTM_DBURL - Full connection URL (preferred)
5
+ # 1. HTM_DBURL - Full connection URL (preferred for development/production)
6
6
  # 2. Individual HTM_DB* variables - Host, name, user, password, port
7
7
  # 3. Defaults for development/test
8
8
  #
9
9
  # Example HTM_DBURL format:
10
10
  # postgresql://user:password@host:port/database?sslmode=require
11
+ #
12
+ # Test database:
13
+ # Tests always use htm_test database (Rails convention).
14
+ # Set RAILS_ENV=test or RACK_ENV=test to use the test configuration.
11
15
 
12
16
  <%
13
17
  require 'uri'
14
-
18
+
19
+ # Determine current environment
20
+ current_env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
21
+
15
22
  # Parse connection from HTM_DBURL or use individual variables
16
23
  if ENV['HTM_DBURL']
17
24
  uri = URI.parse(ENV['HTM_DBURL'])
18
25
  params = URI.decode_www_form(uri.query || '').to_h
19
-
26
+ base_database = uri.path[1..-1]
27
+
28
+ # Extract base name (remove _development, _test, _production suffixes if present)
29
+ base_name = base_database.sub(/_(development|test|production)$/, '')
30
+
20
31
  db_config = {
21
32
  'host' => uri.host,
22
33
  'port' => uri.port || 5432,
23
- 'database' => uri.path[1..-1],
34
+ 'base_name' => base_name,
24
35
  'username' => uri.user,
25
36
  'password' => uri.password,
26
37
  'sslmode' => params['sslmode'] || 'prefer'
27
38
  }
28
39
  else
29
- env = ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development'
30
40
  db_config = {
31
41
  'host' => ENV.fetch('HTM_DBHOST', 'localhost'),
32
42
  'port' => ENV.fetch('HTM_DBPORT', 5432).to_i,
33
- 'database' => ENV.fetch('HTM_DBNAME', "htm_#{env}"),
34
- 'username' => ENV.fetch('HTM_DBUSER', 'postgres'),
43
+ 'base_name' => ENV.fetch('HTM_DBNAME', 'htm').sub(/_(development|test|production)$/, ''),
44
+ 'username' => ENV.fetch('HTM_DBUSER', ENV['USER']),
35
45
  'password' => ENV.fetch('HTM_DBPASS', ''),
36
46
  'sslmode' => ENV.fetch('HTM_SSLMODE', 'prefer')
37
47
  }
@@ -53,15 +63,12 @@ default: &default
53
63
 
54
64
  development:
55
65
  <<: *default
56
- database: <%= db_config['database'] %>
66
+ database: <%= db_config['base_name'] %>_development
57
67
 
58
68
  test:
59
69
  <<: *default
60
- database: <%= db_config['database'] %>_test
70
+ database: <%= db_config['base_name'] %>_test
61
71
 
62
72
  production:
63
73
  <<: *default
64
- database: <%= db_config['database'] %>
65
- <% unless ENV['HTM_DBURL'] %>
66
- # WARNING: Production should use HTM_DBURL with SSL
67
- <% end %>
74
+ database: <%= db_config['base_name'] %>_production
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddSoftDeleteToAssociations < ActiveRecord::Migration[7.0]
4
+ def change
5
+ # Add deleted_at to robot_nodes for soft delete support
6
+ unless column_exists?(:robot_nodes, :deleted_at)
7
+ add_column :robot_nodes, :deleted_at, :datetime, null: true
8
+ end
9
+ unless index_exists?(:robot_nodes, :deleted_at)
10
+ add_index :robot_nodes, :deleted_at
11
+ end
12
+
13
+ # Add deleted_at to node_tags for soft delete support
14
+ unless column_exists?(:node_tags, :deleted_at)
15
+ add_column :node_tags, :deleted_at, :datetime, null: true
16
+ end
17
+ unless index_exists?(:node_tags, :deleted_at)
18
+ add_index :node_tags, :deleted_at
19
+ end
20
+
21
+ # Add deleted_at to tags for soft delete support
22
+ unless column_exists?(:tags, :deleted_at)
23
+ add_column :tags, :deleted_at, :datetime, null: true
24
+ end
25
+ unless index_exists?(:tags, :deleted_at)
26
+ add_index :tags, :deleted_at
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddPerformanceIndexes < ActiveRecord::Migration[7.1]
4
+ def change
5
+ # Partial index for soft-delete filter (used in almost every query)
6
+ # This complements idx_nodes_not_deleted_created_at for queries that
7
+ # don't need created_at ordering but still filter by deleted_at IS NULL
8
+ add_index :nodes, :id,
9
+ name: 'idx_nodes_active',
10
+ where: 'deleted_at IS NULL',
11
+ comment: 'Partial index for active (non-deleted) node queries'
12
+
13
+ # Composite index for embedding-based searches on active nodes
14
+ # Helps vector_search and hybrid_search which filter by deleted_at IS NULL
15
+ # and embedding IS NOT NULL
16
+ execute <<-SQL
17
+ CREATE INDEX idx_nodes_active_with_embedding ON nodes (id)
18
+ WHERE deleted_at IS NULL AND embedding IS NOT NULL
19
+ SQL
20
+ end
21
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddTagsTrigramIndex < ActiveRecord::Migration[7.0]
4
+ def up
5
+ # Add GIN trigram index on tags.name for fuzzy search
6
+ # Enables queries like: WHERE name % 'postgrsql' (typo-tolerant)
7
+ # Also speeds up LIKE '%pattern%' queries
8
+ execute <<~SQL
9
+ CREATE INDEX idx_tags_name_trgm ON tags USING gin(name gin_trgm_ops);
10
+ SQL
11
+ end
12
+
13
+ def down
14
+ execute <<~SQL
15
+ DROP INDEX IF EXISTS idx_tags_name_trgm;
16
+ SQL
17
+ end
18
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ class EnableLz4Compression < ActiveRecord::Migration[7.0]
4
+ def up
5
+ # Switch TOAST compression from pglz to lz4 for better read performance
6
+ # LZ4 decompression is ~32% faster than pglz with only marginally lower compression ratio
7
+ # See: https://www.depesz.com/2025/11/29/using-json-json-vs-jsonb-pglz-vs-lz4-key-optimization-parsing-speed/
8
+
9
+ # nodes.metadata - JSONB column for flexible key-value storage
10
+ execute <<~SQL
11
+ ALTER TABLE nodes ALTER COLUMN metadata SET COMPRESSION lz4;
12
+ SQL
13
+
14
+ # nodes.content - TEXT column containing memory content
15
+ execute <<~SQL
16
+ ALTER TABLE nodes ALTER COLUMN content SET COMPRESSION lz4;
17
+ SQL
18
+
19
+ # file_sources.frontmatter - JSONB column for parsed YAML frontmatter
20
+ execute <<~SQL
21
+ ALTER TABLE file_sources ALTER COLUMN frontmatter SET COMPRESSION lz4;
22
+ SQL
23
+
24
+ # Note: Existing rows retain their original compression until rewritten.
25
+ # To recompress existing data, run: VACUUM FULL nodes; VACUUM FULL file_sources;
26
+ # This is optional and can be done during a maintenance window.
27
+ end
28
+
29
+ def down
30
+ # Revert to default pglz compression
31
+ execute <<~SQL
32
+ ALTER TABLE nodes ALTER COLUMN metadata SET COMPRESSION pglz;
33
+ SQL
34
+
35
+ execute <<~SQL
36
+ ALTER TABLE nodes ALTER COLUMN content SET COMPRESSION pglz;
37
+ SQL
38
+
39
+ execute <<~SQL
40
+ ALTER TABLE file_sources ALTER COLUMN frontmatter SET COMPRESSION pglz;
41
+ SQL
42
+ end
43
+ end