htm 0.0.10 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/.dictate.toml +46 -0
  3. data/.envrc +2 -0
  4. data/CHANGELOG.md +86 -3
  5. data/README.md +86 -7
  6. data/Rakefile +14 -2
  7. data/bin/htm_mcp.rb +621 -0
  8. data/config/database.yml +20 -13
  9. data/db/migrate/00010_add_soft_delete_to_associations.rb +29 -0
  10. data/db/migrate/00011_add_performance_indexes.rb +21 -0
  11. data/db/migrate/00012_add_tags_trigram_index.rb +18 -0
  12. data/db/migrate/00013_enable_lz4_compression.rb +43 -0
  13. data/db/schema.sql +49 -92
  14. data/docs/api/index.md +1 -1
  15. data/docs/api/yard/HTM.md +2 -4
  16. data/docs/architecture/index.md +1 -1
  17. data/docs/development/index.md +1 -1
  18. data/docs/getting-started/index.md +1 -1
  19. data/docs/guides/index.md +1 -1
  20. data/docs/images/telemetry-architecture.svg +153 -0
  21. data/docs/telemetry.md +391 -0
  22. data/examples/README.md +171 -1
  23. data/examples/cli_app/README.md +1 -1
  24. data/examples/cli_app/htm_cli.rb +1 -1
  25. data/examples/mcp_client.rb +529 -0
  26. data/examples/sinatra_app/app.rb +1 -1
  27. data/examples/telemetry/README.md +147 -0
  28. data/examples/telemetry/SETUP_README.md +169 -0
  29. data/examples/telemetry/demo.rb +498 -0
  30. data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
  31. data/lib/htm/configuration.rb +261 -70
  32. data/lib/htm/database.rb +46 -22
  33. data/lib/htm/embedding_service.rb +24 -14
  34. data/lib/htm/errors.rb +15 -1
  35. data/lib/htm/jobs/generate_embedding_job.rb +19 -0
  36. data/lib/htm/jobs/generate_propositions_job.rb +103 -0
  37. data/lib/htm/jobs/generate_tags_job.rb +24 -0
  38. data/lib/htm/loaders/markdown_chunker.rb +79 -0
  39. data/lib/htm/loaders/markdown_loader.rb +41 -15
  40. data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
  41. data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
  42. data/lib/htm/long_term_memory/node_operations.rb +209 -0
  43. data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
  44. data/lib/htm/long_term_memory/robot_operations.rb +34 -0
  45. data/lib/htm/long_term_memory/tag_operations.rb +428 -0
  46. data/lib/htm/long_term_memory/vector_search.rb +109 -0
  47. data/lib/htm/long_term_memory.rb +51 -1153
  48. data/lib/htm/models/node.rb +35 -2
  49. data/lib/htm/models/node_tag.rb +31 -0
  50. data/lib/htm/models/robot_node.rb +31 -0
  51. data/lib/htm/models/tag.rb +44 -0
  52. data/lib/htm/proposition_service.rb +169 -0
  53. data/lib/htm/query_cache.rb +214 -0
  54. data/lib/htm/sql_builder.rb +178 -0
  55. data/lib/htm/tag_service.rb +16 -6
  56. data/lib/htm/tasks.rb +8 -2
  57. data/lib/htm/telemetry.rb +224 -0
  58. data/lib/htm/version.rb +1 -1
  59. data/lib/htm.rb +64 -3
  60. data/lib/tasks/doc.rake +1 -1
  61. data/lib/tasks/htm.rake +259 -13
  62. data/mkdocs.yml +96 -96
  63. metadata +75 -18
  64. data/.aigcm_msg +0 -1
  65. data/.claude/settings.local.json +0 -92
  66. data/CLAUDE.md +0 -603
  67. data/examples/cli_app/temp.log +0 -93
  68. data/lib/htm/loaders/paragraph_chunker.rb +0 -112
  69. data/notes/ARCHITECTURE_REVIEW.md +0 -1167
  70. data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
  71. data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
  72. data/notes/next_steps.md +0 -100
  73. data/notes/plan.md +0 -627
  74. data/notes/tag_ontology_enhancement_ideas.md +0 -222
  75. data/notes/timescaledb_removal_summary.md +0 -200
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b552d9b1e6a197c35d1ff2c64d4c8eb94a36a0e67d05efe1c10d305cf8f23d78
4
- data.tar.gz: 58fa199e1a3af9fb9c3a9e8fc0fe9ee779209a44baecbccdfdf6f570f5e7c692
3
+ metadata.gz: 5658951570d47a6988a3e6c5078ae90c83a8bbbaebd4bc18b8a70152d0648ab1
4
+ data.tar.gz: e4216f5bbe7cbbcfb1b78058f16fb5c190a8902f4778874c858acf7401f0da14
5
5
  SHA512:
6
- metadata.gz: 9326f318f1677ea5cad5e5f6cdda4125421a13bde13d1c7bef0a2b2e8096873a7762f11d008c8d0e64365f90272aad58699b39a3af40f0c188f7e961eef6d2df
7
- data.tar.gz: 6f25fa0089349b0872db11427747e525ab46a9528d1ada22be183be260bad40d48ef692f78d922ba0803b817f2f661fa143721bf180882f34c575f4650e65448
6
+ metadata.gz: 3d206a81c9120d7c2ffb8a8afb41056ee93a9b614a7abfabe1e14196e60570daaab048e37cc74601bd04afed0af5290616ff925a7610dc37e8816cb4d797956a
7
+ data.tar.gz: d2977a3e77e2d7a0b6cd886d3edadbfe8c04b3af55e4280d8b70393de41ad019a76fe12e19c1bf141d3f6b604ac4a221caca5ea914c64985521a4ccbe2fbfce9
data/.dictate.toml ADDED
@@ -0,0 +1,46 @@
1
+ # Dictator Configuration for HTM
2
+ # Decree-based structural enforcement
3
+ # https://github.com/seuros/dictator
4
+
5
+ [decree.supreme]
6
+ # Universal structural rules (applies to ALL files)
7
+ trailing_whitespace = "deny"
8
+ tabs_vs_spaces = "spaces"
9
+ tab_width = 2
10
+ final_newline = "require"
11
+ line_endings = "lf"
12
+ blank_line_whitespace = "deny"
13
+
14
+ [decree.ruby]
15
+ # Ruby-specific structural enforcement
16
+ max_line_length = 120
17
+ max_lines = 300
18
+ ignore_comments = true
19
+ ignore_blank_lines = true
20
+ method_visibility_order = ["public", "protected", "private"]
21
+ comment_spacing = true
22
+
23
+ [decree.ruby.linter]
24
+ # External linter - Dictator adds: -A --format json
25
+ command = "rubocop"
26
+
27
+ [decree.frontmatter]
28
+ # YAML frontmatter ordering for .md files
29
+ order = ["title", "slug", "pubDate", "description", "tags", "draft"]
30
+ required = ["title"]
31
+
32
+ # Disable unused language decrees
33
+ [decree.typescript]
34
+ enabled = false
35
+
36
+ [decree.golang]
37
+ enabled = false
38
+
39
+ [decree.rust]
40
+ enabled = false
41
+
42
+ [decree.python]
43
+ enabled = false
44
+
45
+ [decree.kjr]
46
+ enabled = false
data/.envrc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  export RR=`pwd`
4
4
 
5
+ export HTM_EXTRACT_PROPOSITIONS=true
6
+
5
7
  # Database connection - Localhost PostgreSQL
6
8
  export HTM_DBHOST=localhost
7
9
  export HTM_DBPORT=5432
data/CHANGELOG.md CHANGED
@@ -7,6 +7,88 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.0.14] - 2025-12-05
11
+
12
+ ### Added
13
+ - **OpenTelemetry metrics** - Optional observability with zero overhead when disabled
14
+ - New `HTM::Telemetry` module with null object pattern
15
+ - Metrics: `htm.jobs` (counter), `htm.embedding.latency`, `htm.tag.latency`, `htm.search.latency` (histograms), `htm.cache.operations` (counter)
16
+ - Instrumented: `GenerateEmbeddingJob`, `GenerateTagsJob`, search methods, `QueryCache`
17
+ - Enable via `HTM_TELEMETRY_ENABLED=true` or `config.telemetry_enabled = true`
18
+ - Works with 50+ OTLP-compatible backends (Jaeger, Prometheus, Datadog, etc.)
19
+ - Comprehensive documentation in `docs/telemetry.md`
20
+ - 18 new telemetry tests
21
+ - **Telemetry demo with Grafana visualization** - `examples/telemetry/`
22
+ - Live dashboard showing HTM metrics (job counts, latencies, cache hit rates)
23
+ - Uses Homebrew-installed Prometheus and Grafana (no Docker required)
24
+ - Auto-configures Prometheus scrape target
25
+ - Pre-built Grafana dashboard JSON for easy import
26
+ - Interactive shutdown prompt for service management
27
+ - **`htm:db:create` rake task** - Create database if it doesn't exist (respects `RAILS_ENV`)
28
+
29
+ ### Changed
30
+ - **All `htm:db:*` tasks now respect `RAILS_ENV`** - Following Rails conventions
31
+ - Database selection based on environment: `htm_development`, `htm_test`, `htm_production`
32
+ - `rake test` automatically sets `RAILS_ENV=test`
33
+ - Example: `RAILS_ENV=test rake htm:db:setup` operates on `htm_test`
34
+ - **`config/database.yml` refactored** - Extracts base name from `HTM_DBURL` and appends environment suffix
35
+ - `HTM_DBURL=postgresql://...htm_development` + `RAILS_ENV=test` → connects to `htm_test`
36
+ - **`HTM::Database.default_config` now respects `RAILS_ENV`** - Uses `ActiveRecordConfig.load_database_config`
37
+ - **Renamed `htm:db:test` to `htm:db:verify`** - Avoids naming collision with test database namespace
38
+ - `htm:db:verify` verifies database connection
39
+ - `RAILS_ENV=test rake htm:db:*` operates on test database
40
+
41
+ ## [0.0.13] - 2025-12-04
42
+
43
+ ### Changed
44
+ - **MarkdownChunker now uses Baran gem** - Replaced custom ParagraphChunker with Baran's `MarkdownSplitter`
45
+ - Respects markdown structure (headers, code blocks, horizontal rules)
46
+ - Configurable `chunk_size` and `chunk_overlap` settings
47
+ - Returns cursor positions for each chunk
48
+ - **Fuzzy tag search with trigram matching** - Tags now searchable with fuzzy matching via pg_trgm
49
+ - **LongTermMemory modularization** - Refactored into separate concerns for better maintainability
50
+ - Search optimizations for vector, fulltext, and hybrid strategies
51
+
52
+ ### Fixed
53
+ - **Configurable limits** - The following are now configurable (previously hard-coded):
54
+ - `max_embedding_dimension` (default: 2000)
55
+ - `max_tag_depth` (default: 4)
56
+ - Circuit breaker settings: `failure_threshold`, `reset_timeout`, `half_open_max_calls`
57
+ - Relevance scoring weights: `semantic_weight`, `tag_weight`, `recency_weight`, `access_weight`
58
+ - `relevance_recency_half_life_hours` (default: 168 = 1 week)
59
+
60
+ ## [0.0.11] - 2025-12-02
61
+
62
+ ### Added
63
+ - **MCP Server and Client** - Model Context Protocol integration for AI assistants
64
+ - `bin/htm_mcp.rb` - FastMCP-based server exposing HTM tools:
65
+ - `SetRobotTool` - Set robot identity for the session (per-client isolation)
66
+ - `GetRobotTool` - Get current robot information
67
+ - `GetWorkingMemoryTool` - Retrieve working memory for session restore
68
+ - `RememberTool` - Store information with tags and metadata
69
+ - `RecallTool` - Search memories with vector/fulltext/hybrid strategies
70
+ - `ForgetTool` / `RestoreTool` - Soft delete and restore memories
71
+ - `ListTagsTool` - List tags with optional prefix filtering
72
+ - `StatsTool` - Memory usage statistics
73
+ - Resources: `htm://statistics`, `htm://tags/hierarchy`, `htm://memories/recent`
74
+ - STDERR logging to avoid corrupting MCP JSON-RPC protocol
75
+ - Session-based robot identity via `MCPSession` module
76
+ - `examples/mcp_client.rb` - Interactive chat client using ruby_llm-mcp:
77
+ - Prompts for robot name on startup (or uses `HTM_ROBOT_NAME` env var)
78
+ - Session restore: offers to restore working memory from previous session
79
+ - Interactive chat loop with Ollama LLM (gpt-oss model)
80
+ - Tool call logging for visibility
81
+ - Slash commands: `/tools`, `/resources`, `/stats`, `/tags`, `/clear`, `/help`, `/exit`
82
+ - **Session restore feature** - MCP client can restore previous session context
83
+ - `GetWorkingMemoryTool` returns all nodes in working memory for a robot
84
+ - Client prompts user to restore previous session on startup
85
+ - Working memory injected into chat context for continuity
86
+
87
+ ### Fixed
88
+ - **GetWorkingMemoryTool** now uses `joins(:node)` to exclude soft-deleted nodes
89
+ - **StatsTool** fixed scope error (`Node.active` → `Node.count` with default_scope)
90
+ - **MCP tool response parsing** - Extract `.text` from `RubyLLM::MCP::Content` objects
91
+
10
92
  ## [0.0.10] - 2025-12-02
11
93
 
12
94
  ### Added
@@ -228,7 +310,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
228
310
  - **Markdown file loader** - Load markdown files into long-term memory
229
311
  - `FileSource` model to track loaded files with metadata and sync status
230
312
  - `MarkdownLoader` with YAML frontmatter extraction
231
- - `ParagraphChunker` for splitting content into semantic chunks
313
+ - `MarkdownChunker` for splitting content into semantic chunks (uses Baran gem)
232
314
  - DELTA_TIME tolerance (5 seconds) for reliable file change detection
233
315
  - **New HTM API methods** for file operations:
234
316
  - `htm.load_file(path, force: false)` - Load single markdown file
@@ -315,7 +397,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
315
397
  ## [0.0.1] - 2025-10-25
316
398
 
317
399
  ### Added
318
- - Initial release of HTM (Hierarchical Temporary Memory)
400
+ - Initial release of HTM (Hierarchical Temporal Memory)
319
401
  - Two-tier memory system:
320
402
  - Working memory: Token-limited, in-memory active context
321
403
  - Long-term memory: Durable PostgreSQL/TimescaleDB storage
@@ -436,7 +518,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
436
518
  - Working memory size is user-configurable
437
519
  - See ADRs for detailed architectural decisions and rationale
438
520
 
439
- [Unreleased]: https://github.com/madbomber/htm/compare/v0.0.10...HEAD
521
+ [Unreleased]: https://github.com/madbomber/htm/compare/v0.0.12...HEAD
522
+ [0.0.12]: https://github.com/madbomber/htm/compare/v0.0.10...v0.0.12
440
523
  [0.0.10]: https://github.com/madbomber/htm/compare/v0.0.9...v0.0.10
441
524
  [0.0.9]: https://github.com/madbomber/htm/compare/v0.0.8...v0.0.9
442
525
  [0.0.8]: https://github.com/madbomber/htm/compare/v0.0.7...v0.0.8
data/README.md CHANGED
@@ -68,6 +68,12 @@
68
68
  - Source file tracking with re-sync support
69
69
  - YAML frontmatter extraction as metadata
70
70
 
71
+ - **Telemetry (OpenTelemetry)**
72
+ - Optional metrics collection via OpenTelemetry
73
+ - Zero overhead when disabled (null object pattern)
74
+ - Works with 50+ backends (Jaeger, Prometheus, Datadog, etc.)
75
+ - Tracks job latency, search performance, and cache effectiveness
76
+
71
77
  ## Installation
72
78
 
73
79
  Add this line to your application's Gemfile:
@@ -373,6 +379,68 @@ rake htm:jobs:clear_all
373
379
 
374
380
  See `rake -T htm:jobs` for complete list of job management tasks.
375
381
 
382
+ ## Telemetry (OpenTelemetry)
383
+
384
+ HTM includes optional OpenTelemetry-based metrics for production observability. Telemetry is **disabled by default** with zero overhead when off.
385
+
386
+ ### Enabling Telemetry
387
+
388
+ ```ruby
389
+ HTM.configure do |config|
390
+ config.telemetry_enabled = true
391
+ end
392
+
393
+ # Or via environment variable
394
+ # HTM_TELEMETRY_ENABLED=true
395
+ ```
396
+
397
+ ### Configuring the Destination
398
+
399
+ HTM emits metrics via standard OpenTelemetry protocols. Configure your destination using environment variables:
400
+
401
+ ```bash
402
+ # Export to any OTLP-compatible backend
403
+ export OTEL_METRICS_EXPORTER="otlp"
404
+ export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318"
405
+ ```
406
+
407
+ ### Available Metrics
408
+
409
+ | Metric | Type | Attributes | Description |
410
+ |--------|------|------------|-------------|
411
+ | `htm.jobs` | Counter | `job`, `status` | Job execution counts (embedding, tags) |
412
+ | `htm.embedding.latency` | Histogram | `provider`, `status` | Embedding generation time (ms) |
413
+ | `htm.tag.latency` | Histogram | `provider`, `status` | Tag extraction time (ms) |
414
+ | `htm.search.latency` | Histogram | `strategy` | Search operation time (ms) |
415
+ | `htm.cache.operations` | Counter | `operation` | Cache hits/misses |
416
+
417
+ ### Compatible Backends
418
+
419
+ HTM works with any OTLP-compatible observability platform:
420
+
421
+ **Open Source:** Jaeger, Prometheus, Grafana Tempo/Mimir, SigNoz, Uptrace
422
+
423
+ **Commercial:** Datadog, New Relic, Honeycomb, Splunk, Dynatrace, AWS X-Ray, Google Cloud Trace, Azure Monitor
424
+
425
+ ### Optional Dependencies
426
+
427
+ Users who want telemetry should add these gems:
428
+
429
+ ```ruby
430
+ gem 'opentelemetry-sdk'
431
+ gem 'opentelemetry-metrics-sdk'
432
+ gem 'opentelemetry-exporter-otlp' # For OTLP export
433
+ ```
434
+
435
+ ### Design
436
+
437
+ HTM uses a **null object pattern** for telemetry. When disabled or when the SDK is not installed:
438
+ - All metric operations are no-ops
439
+ - Zero runtime overhead
440
+ - No errors or exceptions
441
+
442
+ See [docs/telemetry.md](docs/telemetry.md) for detailed configuration and usage examples.
443
+
376
444
  ## Configuration
377
445
 
378
446
  HTM uses dependency injection for LLM access, allowing you to configure embedding generation, tag extraction, logging, and token counting.
@@ -1160,6 +1228,17 @@ export HTM_LOG_LEVEL="INFO" # Default
1160
1228
 
1161
1229
  This is used by the default logger when `HTM.configure` is called without a custom logger.
1162
1230
 
1231
+ #### HTM_TELEMETRY_ENABLED
1232
+
1233
+ Enable OpenTelemetry metrics collection:
1234
+
1235
+ ```bash
1236
+ export HTM_TELEMETRY_ENABLED="true" # Enable telemetry
1237
+ export HTM_TELEMETRY_ENABLED="false" # Disable (default)
1238
+ ```
1239
+
1240
+ When enabled, HTM emits metrics to configured OpenTelemetry collectors. See [Telemetry](#telemetry-opentelemetry) for details.
1241
+
1163
1242
  ### Quick Setup Examples
1164
1243
 
1165
1244
  #### Local Development (PostgreSQL)
@@ -1400,13 +1479,13 @@ This separation allows you to provide any LLM implementation while HTM handles r
1400
1479
  ## Roadmap
1401
1480
 
1402
1481
  - [x] Phase 1: Foundation (basic two-tier memory)
1403
- - [ ] Phase 2: RAG retrieval (semantic search)
1404
- - [ ] Phase 3: Relationships & tags
1405
- - [ ] Phase 4: Working memory management
1406
- - [ ] Phase 5: Hive mind features
1407
- - [ ] Phase 6: Operations & observability
1408
- - [ ] Phase 7: Advanced features
1409
- - [ ] Phase 8: Production-ready gem
1482
+ - [x] Phase 2: RAG retrieval (semantic search)
1483
+ - [x] Phase 3: Relationships & tags
1484
+ - [x] Phase 4: Working memory management
1485
+ - [x] Phase 5: Hive mind features
1486
+ - [x] Phase 6: Operations & observability
1487
+ - [x] Phase 7: Advanced features
1488
+ - [x] Phase 8: Production-ready gem
1410
1489
 
1411
1490
 
1412
1491
  ## Contributing
data/Rakefile CHANGED
@@ -10,6 +10,18 @@ Rake::TestTask.new(:test) do |t|
10
10
  t.verbose = true
11
11
  end
12
12
 
13
+ # Ensure test task runs with RAILS_ENV=test
14
+ task :test do
15
+ ENV['RAILS_ENV'] = 'test'
16
+ end
17
+
18
+ # Prepend environment setup before test runs
19
+ Rake::Task[:test].enhance [:set_test_env]
20
+
21
+ task :set_test_env do
22
+ ENV['RAILS_ENV'] = 'test'
23
+ end
24
+
13
25
  task default: :test
14
26
 
15
27
  # Load HTM database tasks from lib/tasks/htm.rake
@@ -20,8 +32,8 @@ require_relative "lib/htm/tasks"
20
32
  desc "Run database setup (deprecated: use htm:db:setup)"
21
33
  task :db_setup => "htm:db:setup"
22
34
 
23
- desc "Test database connection (deprecated: use htm:db:test)"
24
- task :db_test => "htm:db:test"
35
+ desc "Verify database connection (deprecated: use htm:db:verify)"
36
+ task :db_test => "htm:db:verify"
25
37
 
26
38
  desc "Run example"
27
39
  task :example do