htm 0.0.18 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +119 -1
- data/README.md +12 -0
- data/Rakefile +104 -18
- data/db/migrate/00001_enable_extensions.rb +9 -5
- data/db/migrate/00002_create_robots.rb +18 -6
- data/db/migrate/00003_create_file_sources.rb +30 -17
- data/db/migrate/00004_create_nodes.rb +60 -48
- data/db/migrate/00005_create_tags.rb +24 -12
- data/db/migrate/00006_create_node_tags.rb +28 -13
- data/db/migrate/00007_create_robot_nodes.rb +40 -26
- data/db/schema.sql +17 -1
- data/db/seeds.rb +34 -34
- data/docs/api/embedding-service.md +140 -110
- data/docs/api/yard/HTM/ActiveRecordConfig.md +6 -0
- data/docs/api/yard/HTM/Config.md +173 -0
- data/docs/api/yard/HTM/ConfigSection.md +28 -0
- data/docs/api/yard/HTM/Database.md +1 -1
- data/docs/api/yard/HTM/Railtie.md +2 -2
- data/docs/api/yard/HTM.md +0 -57
- data/docs/api/yard/index.csv +76 -61
- data/docs/api/yard-reference.md +2 -1
- data/docs/architecture/adrs/003-ollama-embeddings.md +45 -36
- data/docs/architecture/adrs/004-hive-mind.md +1 -1
- data/docs/architecture/adrs/008-robot-identification.md +1 -1
- data/docs/architecture/index.md +11 -9
- data/docs/architecture/overview.md +11 -7
- data/docs/assets/images/balanced-strategy-decay.svg +41 -0
- data/docs/assets/images/class-hierarchy.svg +1 -1
- data/docs/assets/images/eviction-priority.svg +43 -0
- data/docs/assets/images/exception-hierarchy.svg +2 -2
- data/docs/assets/images/hive-mind-shared-memory.svg +52 -0
- data/docs/assets/images/htm-architecture-overview.svg +3 -3
- data/docs/assets/images/htm-core-components.svg +4 -4
- data/docs/assets/images/htm-layered-architecture.svg +1 -1
- data/docs/assets/images/htm-memory-addition-flow.svg +2 -2
- data/docs/assets/images/htm-memory-recall-flow.svg +2 -2
- data/docs/assets/images/memory-topology.svg +53 -0
- data/docs/assets/images/two-tier-memory-architecture.svg +55 -0
- data/docs/database/naming-convention.md +244 -0
- data/docs/database_rake_tasks.md +31 -0
- data/docs/development/rake-tasks.md +80 -35
- data/docs/development/setup.md +76 -44
- data/docs/examples/basic-usage.md +133 -0
- data/docs/examples/config-files.md +170 -0
- data/docs/examples/file-loading.md +208 -0
- data/docs/examples/index.md +116 -0
- data/docs/examples/llm-configuration.md +168 -0
- data/docs/examples/mcp-client.md +172 -0
- data/docs/examples/rails-integration.md +173 -0
- data/docs/examples/robot-groups.md +210 -0
- data/docs/examples/sinatra-integration.md +218 -0
- data/docs/examples/standalone-app.md +216 -0
- data/docs/examples/telemetry.md +224 -0
- data/docs/examples/timeframes.md +143 -0
- data/docs/getting-started/installation.md +97 -40
- data/docs/getting-started/quick-start.md +28 -11
- data/docs/guides/configuration.md +515 -0
- data/docs/guides/file-loading.md +322 -0
- data/docs/guides/getting-started.md +40 -9
- data/docs/guides/index.md +3 -3
- data/docs/guides/mcp-server.md +100 -13
- data/docs/guides/propositions.md +264 -0
- data/docs/guides/recalling-memories.md +4 -4
- data/docs/guides/search-strategies.md +3 -3
- data/docs/guides/tags.md +318 -0
- data/docs/guides/telemetry.md +229 -0
- data/docs/index.md +8 -16
- data/docs/{architecture → robots}/hive-mind.md +8 -111
- data/docs/robots/index.md +73 -0
- data/docs/{guides → robots}/multi-robot.md +3 -3
- data/docs/{guides → robots}/robot-groups.md +8 -7
- data/docs/{architecture → robots}/two-tier-memory.md +13 -149
- data/docs/robots/why-robots.md +85 -0
- data/examples/.envrc +6 -0
- data/examples/.gitignore +2 -0
- data/examples/00_create_examples_db.rb +94 -0
- data/examples/{basic_usage.rb → 01_basic_usage.rb} +12 -16
- data/examples/{custom_llm_configuration.rb → 03_custom_llm_configuration.rb} +13 -3
- data/examples/{file_loader_usage.rb → 04_file_loader_usage.rb} +11 -14
- data/examples/{timeframe_demo.rb → 05_timeframe_demo.rb} +10 -3
- data/examples/{example_app → 06_example_app}/app.rb +15 -15
- data/examples/{cli_app → 07_cli_app}/htm_cli.rb +15 -22
- data/examples/08_sinatra_app/Gemfile.lock +241 -0
- data/examples/{sinatra_app → 08_sinatra_app}/app.rb +19 -18
- data/examples/{mcp_client.rb → 09_mcp_client.rb} +5 -8
- data/examples/{telemetry → 10_telemetry}/SETUP_README.md +1 -1
- data/examples/{telemetry → 10_telemetry}/demo.rb +14 -10
- data/examples/11_robot_groups/README.md +335 -0
- data/examples/{robot_groups → 11_robot_groups/lib}/robot_worker.rb +17 -3
- data/examples/{robot_groups → 11_robot_groups}/multi_process.rb +9 -9
- data/examples/{robot_groups → 11_robot_groups}/same_process.rb +9 -12
- data/examples/{rails_app → 12_rails_app}/Gemfile +3 -0
- data/examples/{rails_app → 12_rails_app}/Gemfile.lock +87 -58
- data/examples/{rails_app → 12_rails_app}/app/controllers/dashboard_controller.rb +10 -6
- data/examples/{rails_app → 12_rails_app}/app/controllers/files_controller.rb +5 -5
- data/examples/{rails_app → 12_rails_app}/app/controllers/memories_controller.rb +11 -7
- data/examples/{rails_app → 12_rails_app}/app/controllers/robots_controller.rb +8 -8
- data/examples/12_rails_app/app/controllers/tags_controller.rb +36 -0
- data/examples/{rails_app → 12_rails_app}/app/views/dashboard/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/files/new.html.erb +5 -2
- data/examples/{rails_app → 12_rails_app}/app/views/memories/_memory_card.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/deleted.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/edit.html.erb +3 -3
- data/examples/{rails_app → 12_rails_app}/app/views/memories/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/robots/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/robots/show.html.erb +4 -4
- data/examples/{rails_app → 12_rails_app}/app/views/search/index.html.erb +1 -1
- data/examples/{rails_app → 12_rails_app}/app/views/tags/index.html.erb +2 -2
- data/examples/{rails_app → 12_rails_app}/app/views/tags/show.html.erb +1 -1
- data/examples/12_rails_app/config/initializers/htm.rb +7 -0
- data/examples/12_rails_app/config/initializers/rack.rb +5 -0
- data/examples/README.md +230 -211
- data/examples/examples_helper.rb +138 -0
- data/lib/htm/config/builder.rb +167 -0
- data/lib/htm/config/database.rb +317 -0
- data/lib/htm/config/defaults.yml +41 -13
- data/lib/htm/config/section.rb +74 -0
- data/lib/htm/config/validator.rb +83 -0
- data/lib/htm/config.rb +65 -361
- data/lib/htm/database.rb +85 -127
- data/lib/htm/errors.rb +14 -0
- data/lib/htm/integrations/sinatra.rb +13 -44
- data/lib/htm/job_adapter.rb +75 -1
- data/lib/htm/jobs/generate_embedding_job.rb +3 -4
- data/lib/htm/jobs/generate_propositions_job.rb +4 -5
- data/lib/htm/jobs/generate_tags_job.rb +16 -15
- data/lib/htm/loaders/defaults_loader.rb +23 -0
- data/lib/htm/loaders/markdown_loader.rb +17 -15
- data/lib/htm/loaders/xdg_config_loader.rb +9 -9
- data/lib/htm/long_term_memory/fulltext_search.rb +14 -14
- data/lib/htm/long_term_memory/hybrid_search.rb +396 -229
- data/lib/htm/long_term_memory/node_operations.rb +24 -23
- data/lib/htm/long_term_memory/relevance_scorer.rb +23 -20
- data/lib/htm/long_term_memory/robot_operations.rb +4 -4
- data/lib/htm/long_term_memory/tag_operations.rb +91 -77
- data/lib/htm/long_term_memory/vector_search.rb +4 -5
- data/lib/htm/long_term_memory.rb +13 -13
- data/lib/htm/mcp/cli.rb +115 -8
- data/lib/htm/mcp/resources.rb +4 -3
- data/lib/htm/mcp/server.rb +5 -4
- data/lib/htm/mcp/tools.rb +37 -28
- data/lib/htm/migration.rb +72 -0
- data/lib/htm/models/file_source.rb +52 -31
- data/lib/htm/models/node.rb +224 -108
- data/lib/htm/models/node_tag.rb +49 -28
- data/lib/htm/models/robot.rb +38 -27
- data/lib/htm/models/robot_node.rb +63 -35
- data/lib/htm/models/tag.rb +126 -123
- data/lib/htm/observability.rb +45 -41
- data/lib/htm/proposition_service.rb +76 -7
- data/lib/htm/railtie.rb +2 -2
- data/lib/htm/robot_group.rb +30 -18
- data/lib/htm/sequel_config.rb +215 -0
- data/lib/htm/sql_builder.rb +14 -16
- data/lib/htm/tag_service.rb +78 -0
- data/lib/htm/tasks.rb +3 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/workflows/remember_workflow.rb +213 -0
- data/lib/htm.rb +27 -22
- data/lib/tasks/db.rake +0 -2
- data/lib/tasks/doc.rake +2 -2
- data/lib/tasks/files.rake +11 -18
- data/lib/tasks/htm.rake +190 -62
- data/lib/tasks/jobs.rake +179 -54
- data/lib/tasks/tags.rake +8 -13
- data/mkdocs.yml +33 -8
- data/scripts/backfill_parent_tags.rb +376 -0
- data/scripts/normalize_plural_tags.rb +335 -0
- metadata +168 -86
- data/docs/api/yard/HTM/Configuration.md +0 -240
- data/docs/telemetry.md +0 -391
- data/examples/rails_app/app/controllers/tags_controller.rb +0 -30
- data/examples/sinatra_app/Gemfile.lock +0 -166
- data/lib/htm/active_record_config.rb +0 -104
- /data/examples/{config_file_example → 02_config_file_example}/README.md +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/config/htm.local.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/custom_config.yml +0 -0
- /data/examples/{config_file_example → 02_config_file_example}/show_config.rb +0 -0
- /data/examples/{example_app → 06_example_app}/Rakefile +0 -0
- /data/examples/{cli_app → 07_cli_app}/README.md +0 -0
- /data/examples/{sinatra_app → 08_sinatra_app}/Gemfile +0 -0
- /data/examples/{telemetry → 10_telemetry}/README.md +0 -0
- /data/examples/{telemetry → 10_telemetry}/grafana/dashboards/htm-metrics.json +0 -0
- /data/examples/{rails_app → 12_rails_app}/.gitignore +0 -0
- /data/examples/{rails_app → 12_rails_app}/Procfile.dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/README.md +0 -0
- /data/examples/{rails_app → 12_rails_app}/Rakefile +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/application.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/assets/stylesheets/inter-font.css +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/application_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/controllers/search_controller.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/application.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/javascript/controllers/index.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/files/show.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/layouts/application.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/index.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/memories/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/robots/new.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_navbar.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/app/views/shared/_stat_card.html.erb +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/dev +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rails +0 -0
- /data/examples/{rails_app → 12_rails_app}/bin/rake +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/application.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/boot.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/database.yml +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/environment.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/importmap.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/routes.rb +0 -0
- /data/examples/{rails_app → 12_rails_app}/config/tailwind.config.js +0 -0
- /data/examples/{rails_app → 12_rails_app}/config.ru +0 -0
- /data/examples/{rails_app → 12_rails_app}/log/.keep +0 -0
- /data/examples/{rails_app → 12_rails_app}/tmp/local_secret.txt +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5765fbc2b92d89be3f2bae2f53b4353a76343efd3d092456138bea73ac7803d9
|
|
4
|
+
data.tar.gz: e594cc16f789745267ce527dcc182fbd533b7484c36e09887f702007388803a3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 78b4b7e226b9911b429e7e0d8735dd0d50d1e56abf7e9e5f494ab50321e33e574605d6a3991d4f0ce18a95ae6dd8ee7928004e3d455730aa388ee2515ffc53c3
|
|
7
|
+
data.tar.gz: c19a0c15d79342d08e406724724cf64afc68c75ea26f1bf332b4a9b661f3ebe9d50e3f9dbf41db780c0542d4cd7a77ac0bb0da9bf45b5ea7d171def21b4ffc4a
|
data/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,117 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **`sslmode` database configuration support** - SSL mode now extracted from URL and included when building URL
|
|
12
|
+
- `parse_database_url` extracts `sslmode` from URL query string (e.g., `?sslmode=require`)
|
|
13
|
+
- `build_database_url` includes `sslmode` as query parameter when set
|
|
14
|
+
- `reconcile_from_url` syncs `sslmode` along with other database components
|
|
15
|
+
- Default `sslmode` is `prefer` (from `defaults.yml`)
|
|
16
|
+
- **`htm:db:purge_all` rake task** - Permanently remove all soft-deleted records from database
|
|
17
|
+
- Displays record counts by table before deletion
|
|
18
|
+
- Detects and removes orphaned join table entries (`node_tags`, `robot_nodes`)
|
|
19
|
+
- Detects and removes orphaned propositions (where `source_node_id` no longer exists)
|
|
20
|
+
- Detects and removes orphaned robots (with no associated memory nodes)
|
|
21
|
+
- Requires confirmation before proceeding
|
|
22
|
+
- Deletes in correct order for referential integrity
|
|
23
|
+
- **`htm:jobs:process_propositions` rake task** - Incremental proposition extraction for unprocessed nodes
|
|
24
|
+
- Uses `ruby-progressbar` with ETA display
|
|
25
|
+
- Only processes nodes not yet extracted (tracks via `source_node_id` metadata)
|
|
26
|
+
- Added to `htm:jobs:process_all` task chain
|
|
27
|
+
- **Rake task passthrough in MCP CLI** - Run rake tasks via `htm_mcp rake <task>`
|
|
28
|
+
- `htm_mcp rake htm:db:stats` - Run any HTM rake task
|
|
29
|
+
- `htm_mcp rake -T` / `htm_mcp rake --tasks` - List available tasks
|
|
30
|
+
- **Pattern filtering** - `htm_mcp rake -T htm:jobs` filters to matching namespace (like standard rake)
|
|
31
|
+
- **Meta-response filtering in PropositionService** - Filters LLM responses that ask for input
|
|
32
|
+
- `META_RESPONSE_PATTERNS` constant with common patterns ("please provide", "I need the text", etc.)
|
|
33
|
+
- `meta_response?` method for detecting invalid responses
|
|
34
|
+
- Prevents storing "Please provide the text" as propositions
|
|
35
|
+
- **Progress bars for job processing tasks** - Visual progress with ETA for long-running operations
|
|
36
|
+
- `htm:jobs:process_embeddings` - Shows progress when generating embeddings
|
|
37
|
+
- `htm:jobs:process_tags` - Shows progress when extracting tags
|
|
38
|
+
- Format: `Processing: |████████████████| 50/100 (50%) ETA: 00:01:30`
|
|
39
|
+
|
|
40
|
+
### Changed
|
|
41
|
+
- **Centralized HTM loading in rake tasks** - Single `require_relative` in `lib/htm/tasks.rb`
|
|
42
|
+
- HTM is now loaded once before any rake task files are loaded
|
|
43
|
+
- Removed ~25 redundant `require 'htm'` statements from individual rake tasks
|
|
44
|
+
- Ensures local development codebase is always used via `require_relative`
|
|
45
|
+
- Removed `$LOAD_PATH` manipulation that was previously needed for `require 'htm'`
|
|
46
|
+
- **Removed `:validate` rake task dependency** - Config validation now automatic
|
|
47
|
+
- `HTM::Config.new` handles all validation at require time (environment, URL/component reconciliation, naming convention)
|
|
48
|
+
- Rake tasks no longer need explicit validation step
|
|
49
|
+
- **Cleaned up Rakefile** - Removed unnecessary code
|
|
50
|
+
- Removed deprecated `db_setup` and `db_test` backward-compatibility tasks
|
|
51
|
+
- Removed redundant `task :test` block (`:set_test_env` prerequisite handles environment setup)
|
|
52
|
+
- **PropositionService validation now fully configurable** - Moved hardcoded constants to `defaults.yml`
|
|
53
|
+
- `proposition.min_length` (default: 10) - Minimum characters for valid proposition
|
|
54
|
+
- `proposition.max_length` (default: 1000) - Maximum characters for valid proposition
|
|
55
|
+
- `proposition.min_words` (default: 5) - Minimum words for valid proposition
|
|
56
|
+
- Added `min_length`, `max_length`, `min_words` class methods that read from config
|
|
57
|
+
- **Improved proposition extraction prompt** - Better quality propositions
|
|
58
|
+
- Added explicit BAD/GOOD examples for pronoun replacement
|
|
59
|
+
- Added context enrichment examples (e.g., "wiring" → "solar panel wiring for Oklahoma barndominium")
|
|
60
|
+
- System prompt now explicitly prevents meta-responses
|
|
61
|
+
- Increased specificity requirements for self-contained facts
|
|
62
|
+
|
|
63
|
+
### Fixed
|
|
64
|
+
- **Test database isolation** - Two-layer protection prevents tests from polluting development/production
|
|
65
|
+
- `Rakefile`: `set_test_env` task now ALWAYS overrides `HTM_DATABASE__URL` to test database
|
|
66
|
+
- Uses `#{service_name}_test` pattern (e.g., `htm_test`) based on `HTM_SERVICE__NAME` env var
|
|
67
|
+
- `test_helper.rb`: Safety check aborts with helpful message if database URL doesn't contain `_test`
|
|
68
|
+
- Prevents accidental test execution against non-test databases
|
|
69
|
+
|
|
70
|
+
## [0.0.20] - 2025-12-22
|
|
71
|
+
### Added
|
|
72
|
+
- **Fiber-based job backend** - New `:fiber` backend for I/O-bound background jobs
|
|
73
|
+
- Uses `async` gem for cooperative concurrency
|
|
74
|
+
- Non-blocking execution ideal for LLM API calls
|
|
75
|
+
- `JobAdapter.enqueue_parallel(jobs)` for concurrent job execution
|
|
76
|
+
- Fiber backend runs jobs with `Async::Barrier` for parallel coordination
|
|
77
|
+
- **RememberWorkflow using simple_flow** - Parallel processing pipeline for node enrichment
|
|
78
|
+
- Orchestrates save_node → (embedding, tags, propositions in parallel) → finalize
|
|
79
|
+
- Uses `SimpleFlow::Pipeline` with dependency-based step execution
|
|
80
|
+
- Configurable concurrency model (`:auto`, `:threads`, `:async`)
|
|
81
|
+
- Visualization support: `to_mermaid` and `execution_plan` methods
|
|
82
|
+
|
|
83
|
+
### Changed
|
|
84
|
+
- **async gem is now a required dependency** - Previously optional, now always available
|
|
85
|
+
- Enables fiber-based concurrency for all HTM installations
|
|
86
|
+
- `:fiber` is now the default job backend (was `:thread`)
|
|
87
|
+
- **Default job backend changed from `:thread` to `:fiber`** - Better performance for I/O-bound LLM operations
|
|
88
|
+
|
|
89
|
+
### Removed
|
|
90
|
+
- **`JobAdapter.async_available?` method** - No longer needed since async is always available
|
|
91
|
+
|
|
92
|
+
### Fixed
|
|
93
|
+
- **`connection_timeout` default inconsistency** - Unified default to 60 seconds across all files
|
|
94
|
+
- `test/configuration_test.rb`, `docs/guides/configuration.md`, and `db/seeds.rb` now match `defaults.yml`
|
|
95
|
+
|
|
96
|
+
### Dependencies
|
|
97
|
+
- Added `async` (~> 2.0) as required runtime dependency
|
|
98
|
+
- Added `simple_flow` for workflow orchestration
|
|
99
|
+
|
|
100
|
+
## [0.0.19] - 2025-12-21
|
|
101
|
+
|
|
102
|
+
### Changed
|
|
103
|
+
- **Reorganized "Robots!" documentation** - Consolidated 5 robot-related docs into `docs/robots/` subdirectory
|
|
104
|
+
- Created `docs/robots/index.md` as section overview with navigation table and architecture diagram
|
|
105
|
+
- Moved `docs/robots.md` → `docs/robots/why-robots.md`
|
|
106
|
+
- Moved `docs/guides/multi-robot.md` → `docs/robots/multi-robot.md`
|
|
107
|
+
- Moved `docs/guides/robot-groups.md` → `docs/robots/robot-groups.md`
|
|
108
|
+
- Moved `docs/architecture/two-tier-memory.md` → `docs/robots/two-tier-memory.md`
|
|
109
|
+
- Moved `docs/architecture/hive-mind.md` → `docs/robots/hive-mind.md`
|
|
110
|
+
- Updated `mkdocs.yml` navigation with logical information flow
|
|
111
|
+
- Fixed 30+ broken relative links across documentation files
|
|
112
|
+
- **Extracted inline SVG graphics to standalone files** - Improved documentation maintainability
|
|
113
|
+
- From `hive-mind.md`: `hive-mind-shared-memory.svg`, `memory-topology.svg`
|
|
114
|
+
- From `two-tier-memory.md`: `two-tier-memory-architecture.svg`, `eviction-priority.svg`, `balanced-strategy-decay.svg`
|
|
115
|
+
- All SVGs now in `docs/assets/images/` and referenced via markdown image syntax
|
|
116
|
+
|
|
117
|
+
### Fixed
|
|
118
|
+
- **ConfigSection.md unrecognized link warning** - Wrapped method signatures in backticks to prevent MkDocs interpreting bracket parameters as links
|
|
119
|
+
|
|
9
120
|
## [0.0.18] - 2025-12-20
|
|
10
121
|
### Added
|
|
11
122
|
- **Anyway::Config-based configuration system** - Replaced custom Configuration class with robust multi-source config management
|
|
@@ -689,7 +800,14 @@ HTM.config.embedding.model
|
|
|
689
800
|
- Working memory size is user-configurable
|
|
690
801
|
- See ADRs for detailed architectural decisions and rationale
|
|
691
802
|
|
|
692
|
-
[Unreleased]: https://github.com/madbomber/htm/compare/v0.0.
|
|
803
|
+
[Unreleased]: https://github.com/madbomber/htm/compare/v0.0.19...HEAD
|
|
804
|
+
[0.0.19]: https://github.com/madbomber/htm/compare/v0.0.18...v0.0.19
|
|
805
|
+
[0.0.18]: https://github.com/madbomber/htm/compare/v0.0.17...v0.0.18
|
|
806
|
+
[0.0.17]: https://github.com/madbomber/htm/compare/v0.0.15...v0.0.17
|
|
807
|
+
[0.0.15]: https://github.com/madbomber/htm/compare/v0.0.14...v0.0.15
|
|
808
|
+
[0.0.14]: https://github.com/madbomber/htm/compare/v0.0.13...v0.0.14
|
|
809
|
+
[0.0.13]: https://github.com/madbomber/htm/compare/v0.0.11...v0.0.13
|
|
810
|
+
[0.0.11]: https://github.com/madbomber/htm/compare/v0.0.10...v0.0.11
|
|
693
811
|
[0.0.12]: https://github.com/madbomber/htm/compare/v0.0.10...v0.0.12
|
|
694
812
|
[0.0.10]: https://github.com/madbomber/htm/compare/v0.0.9...v0.0.10
|
|
695
813
|
[0.0.9]: https://github.com/madbomber/htm/compare/v0.0.8...v0.0.9
|
data/README.md
CHANGED
|
@@ -1,4 +1,16 @@
|
|
|
1
|
+
|
|
1
2
|
<div align="center">
|
|
3
|
+
<div style="background: linear-gradient(135deg, #90EE90 0%, #32CD32 100%); border: 4px solid #228B22; border-radius: 12px; padding: 20px; margin: 20px auto; max-width: 800px; box-shadow: 0 8px 16px rgba(34, 139, 34, 0.3);">
|
|
4
|
+
<p style="color: #000000; font-size: 42px; font-weight: bold; margin: 0;">
|
|
5
|
+
🚀 v0.0.20 🚀
|
|
6
|
+
</p>
|
|
7
|
+
<p style="color: #000; font-size: 27px; font-weight: bold; margin: 10px 0 0 0; line-height: 1.6;">
|
|
8
|
+
<strong>Fiber-based concurrency</strong> is now the default.<br/>
|
|
9
|
+
Parallel embedding, tagging, and proposition extraction via simple_flow.
|
|
10
|
+
</p>
|
|
11
|
+
</div>
|
|
12
|
+
|
|
13
|
+
|
|
2
14
|
<h1>HTM</h1>
|
|
3
15
|
<img src="docs/assets/images/htm_demo.gif" alt="Tree of Knowledge is Growing" width="400">
|
|
4
16
|
|
data/Rakefile
CHANGED
|
@@ -10,21 +10,27 @@ Rake::TestTask.new(:test) do |t|
|
|
|
10
10
|
t.verbose = true
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
# Ensure test task runs with HTM_ENV=test (takes priority over RAILS_ENV/RACK_ENV)
|
|
14
|
-
task :test do
|
|
15
|
-
ENV['HTM_ENV'] = 'test'
|
|
16
|
-
end
|
|
17
|
-
|
|
18
13
|
# Prepend environment setup before test runs
|
|
19
14
|
Rake::Task[:test].enhance [:set_test_env]
|
|
20
15
|
|
|
21
16
|
task :set_test_env do
|
|
22
17
|
ENV['HTM_ENV'] = 'test'
|
|
23
|
-
|
|
24
|
-
#
|
|
25
|
-
|
|
26
|
-
|
|
18
|
+
|
|
19
|
+
# Build test database name from service name + environment
|
|
20
|
+
# Uses HTM_SERVICE__NAME env var if set, otherwise defaults to 'htm'
|
|
21
|
+
service_name = ENV['HTM_SERVICE__NAME'] || 'htm'
|
|
22
|
+
test_db_name = "#{service_name}_test"
|
|
23
|
+
|
|
24
|
+
# ALWAYS use the test database - never allow tests to run against other databases
|
|
25
|
+
# This prevents accidental pollution of development/production data
|
|
26
|
+
test_db_url = "postgresql://#{ENV['USER']}@localhost:5432/#{test_db_name}"
|
|
27
|
+
|
|
28
|
+
if ENV['HTM_DATABASE__URL'] && !ENV['HTM_DATABASE__URL'].include?('_test')
|
|
29
|
+
warn "WARNING: HTM_DATABASE__URL was set to '#{ENV['HTM_DATABASE__URL']}'"
|
|
30
|
+
warn " Overriding to use test database: #{test_db_url}"
|
|
27
31
|
end
|
|
32
|
+
|
|
33
|
+
ENV['HTM_DATABASE__URL'] = test_db_url
|
|
28
34
|
end
|
|
29
35
|
|
|
30
36
|
task default: :test
|
|
@@ -33,21 +39,101 @@ task default: :test
|
|
|
33
39
|
# This uses the same loader that external applications use
|
|
34
40
|
require_relative "lib/htm/tasks"
|
|
35
41
|
|
|
36
|
-
#
|
|
37
|
-
|
|
38
|
-
|
|
42
|
+
# =============================================================================
|
|
43
|
+
# Examples Tasks
|
|
44
|
+
# =============================================================================
|
|
45
|
+
|
|
46
|
+
# Prepend environment setup before running any example
|
|
47
|
+
task :set_examples_env do
|
|
48
|
+
ENV['HTM_ENV'] = 'examples'
|
|
49
|
+
|
|
50
|
+
# Build examples database name from service name + environment
|
|
51
|
+
service_name = ENV['HTM_SERVICE__NAME'] || 'htm'
|
|
52
|
+
examples_db_name = "#{service_name}_examples"
|
|
39
53
|
|
|
40
|
-
|
|
41
|
-
|
|
54
|
+
# ALWAYS use the examples database
|
|
55
|
+
examples_db_url = "postgresql://#{ENV['USER']}@localhost:5432/#{examples_db_name}"
|
|
42
56
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
57
|
+
if ENV['HTM_DATABASE__URL'] && !ENV['HTM_DATABASE__URL'].include?('_examples')
|
|
58
|
+
warn "WARNING: HTM_DATABASE__URL was set to '#{ENV['HTM_DATABASE__URL']}'"
|
|
59
|
+
warn " Overriding to use examples database: #{examples_db_url}"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
ENV['HTM_DATABASE__URL'] = examples_db_url
|
|
46
63
|
end
|
|
47
64
|
|
|
65
|
+
namespace :examples do
|
|
66
|
+
desc "Set up examples database (create + setup schema)"
|
|
67
|
+
task setup: :set_examples_env do
|
|
68
|
+
Rake::Task['htm:db:create'].invoke rescue nil
|
|
69
|
+
Rake::Task['htm:db:setup'].invoke
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
desc "Reset examples database (drop + create + setup)"
|
|
73
|
+
task reset: :set_examples_env do
|
|
74
|
+
Rake::Task['htm:db:reset'].invoke
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
desc "Run basic_usage example"
|
|
78
|
+
task basic: :set_examples_env do
|
|
79
|
+
ruby "examples/01_basic_usage.rb"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
desc "Run all standalone examples"
|
|
83
|
+
task :all => :set_examples_env do
|
|
84
|
+
examples = %w[
|
|
85
|
+
examples/01_basic_usage.rb
|
|
86
|
+
examples/03_custom_llm_configuration.rb
|
|
87
|
+
examples/04_file_loader_usage.rb
|
|
88
|
+
examples/05_timeframe_demo.rb
|
|
89
|
+
]
|
|
90
|
+
examples.each do |example|
|
|
91
|
+
if File.exist?(example)
|
|
92
|
+
puts "\n#{'=' * 60}"
|
|
93
|
+
puts "Running: #{example}"
|
|
94
|
+
puts "#{'=' * 60}"
|
|
95
|
+
ruby example
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
desc "Show examples database status"
|
|
101
|
+
task status: :set_examples_env do
|
|
102
|
+
require_relative 'lib/htm'
|
|
103
|
+
puts "Examples Environment Status"
|
|
104
|
+
puts "=" * 40
|
|
105
|
+
puts "HTM_ENV: #{ENV['HTM_ENV']}"
|
|
106
|
+
puts "Database URL: #{ENV['HTM_DATABASE__URL']}"
|
|
107
|
+
puts "Expected database: #{HTM.config.expected_database_name}"
|
|
108
|
+
if HTM.config.database_configured?
|
|
109
|
+
puts "Database configured: Yes"
|
|
110
|
+
begin
|
|
111
|
+
HTM::ActiveRecordConfig.establish_connection!
|
|
112
|
+
if HTM::ActiveRecordConfig.connected?
|
|
113
|
+
puts "Database connected: Yes"
|
|
114
|
+
puts "\nTable counts:"
|
|
115
|
+
%w[nodes robots tags file_sources].each do |table|
|
|
116
|
+
count = ActiveRecord::Base.connection.execute("SELECT COUNT(*) FROM #{table}").first['count']
|
|
117
|
+
puts " #{table}: #{count}"
|
|
118
|
+
end
|
|
119
|
+
else
|
|
120
|
+
puts "Database connected: No"
|
|
121
|
+
end
|
|
122
|
+
rescue => e
|
|
123
|
+
puts "Database connected: No (#{e.message})"
|
|
124
|
+
end
|
|
125
|
+
else
|
|
126
|
+
puts "Database configured: No"
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
desc "Run example (alias for examples:basic)"
|
|
132
|
+
task :example => 'examples:basic'
|
|
133
|
+
|
|
48
134
|
desc "Run timeframe demo"
|
|
49
135
|
task :timeframe_demo do
|
|
50
|
-
ruby "examples/
|
|
136
|
+
ruby "examples/05_timeframe_demo.rb"
|
|
51
137
|
end
|
|
52
138
|
|
|
53
139
|
desc "Show gem stats"
|
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
require_relative '../../lib/htm/migration'
|
|
4
|
+
|
|
5
|
+
class EnableExtensions < HTM::Migration
|
|
4
6
|
def up
|
|
5
|
-
|
|
6
|
-
|
|
7
|
+
run "CREATE EXTENSION IF NOT EXISTS vector"
|
|
8
|
+
run "CREATE EXTENSION IF NOT EXISTS pg_trgm"
|
|
9
|
+
run "CREATE EXTENSION IF NOT EXISTS pg_search"
|
|
7
10
|
end
|
|
8
11
|
|
|
9
12
|
def down
|
|
10
|
-
|
|
11
|
-
|
|
13
|
+
run "DROP EXTENSION IF EXISTS pg_search"
|
|
14
|
+
run "DROP EXTENSION IF EXISTS pg_trgm"
|
|
15
|
+
run "DROP EXTENSION IF EXISTS vector"
|
|
12
16
|
end
|
|
13
17
|
end
|
|
@@ -1,11 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
require_relative '../../lib/htm/migration'
|
|
4
|
+
|
|
5
|
+
class CreateRobots < HTM::Migration
|
|
6
|
+
def up
|
|
7
|
+
create_table(:robots) do
|
|
8
|
+
primary_key :id
|
|
9
|
+
String :name, text: true
|
|
10
|
+
DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
|
|
11
|
+
DateTime :last_active, default: Sequel::CURRENT_TIMESTAMP
|
|
9
12
|
end
|
|
13
|
+
|
|
14
|
+
run "COMMENT ON TABLE robots IS 'Registry of all LLM robots using the HTM system'"
|
|
15
|
+
run "COMMENT ON COLUMN robots.name IS 'Human-readable name for the robot'"
|
|
16
|
+
run "COMMENT ON COLUMN robots.created_at IS 'When the robot was first registered'"
|
|
17
|
+
run "COMMENT ON COLUMN robots.last_active IS 'Last time the robot accessed the system'"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def down
|
|
21
|
+
drop_table(:robots)
|
|
10
22
|
end
|
|
11
23
|
end
|
|
@@ -1,25 +1,38 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
3
|
+
require_relative '../../lib/htm/migration'
|
|
4
|
+
|
|
5
|
+
class CreateFileSources < HTM::Migration
|
|
6
|
+
def up
|
|
7
|
+
create_table(:file_sources) do
|
|
8
|
+
primary_key :id
|
|
9
|
+
String :file_path, text: true, null: false
|
|
10
|
+
String :file_hash, size: 64
|
|
11
|
+
DateTime :mtime
|
|
12
|
+
Integer :file_size
|
|
13
|
+
column :frontmatter, :jsonb, default: Sequel.lit("'{}'::jsonb")
|
|
14
|
+
DateTime :last_synced_at
|
|
15
|
+
DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
|
|
16
|
+
DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
|
|
14
17
|
end
|
|
15
18
|
|
|
16
|
-
add_index :file_sources, :file_path, unique: true, name:
|
|
17
|
-
add_index :file_sources, :file_hash, name:
|
|
18
|
-
add_index :file_sources, :last_synced_at, name:
|
|
19
|
+
add_index :file_sources, :file_path, unique: true, name: :idx_file_sources_path_unique
|
|
20
|
+
add_index :file_sources, :file_hash, name: :idx_file_sources_hash
|
|
21
|
+
add_index :file_sources, :last_synced_at, name: :idx_file_sources_last_synced
|
|
22
|
+
|
|
23
|
+
run "COMMENT ON TABLE file_sources IS 'Source file metadata for loaded documents'"
|
|
24
|
+
run "COMMENT ON COLUMN file_sources.file_path IS 'Absolute path to source file'"
|
|
25
|
+
run "COMMENT ON COLUMN file_sources.file_hash IS 'SHA-256 hash of file content'"
|
|
26
|
+
run "COMMENT ON COLUMN file_sources.mtime IS 'File modification time'"
|
|
27
|
+
run "COMMENT ON COLUMN file_sources.file_size IS 'File size in bytes'"
|
|
28
|
+
run "COMMENT ON COLUMN file_sources.frontmatter IS 'Parsed YAML frontmatter'"
|
|
29
|
+
run "COMMENT ON COLUMN file_sources.last_synced_at IS 'When file was last synced to HTM'"
|
|
19
30
|
|
|
20
31
|
# LZ4 compression for better read performance on JSONB column
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
32
|
+
run "ALTER TABLE file_sources ALTER COLUMN frontmatter SET COMPRESSION lz4"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def down
|
|
36
|
+
drop_table(:file_sources)
|
|
24
37
|
end
|
|
25
38
|
end
|
|
@@ -1,82 +1,94 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
3
|
+
require_relative '../../lib/htm/migration'
|
|
4
|
+
|
|
5
|
+
class CreateNodes < HTM::Migration
|
|
6
|
+
def up
|
|
7
|
+
create_table(:nodes) do
|
|
8
|
+
primary_key :id
|
|
9
|
+
String :content, text: true, null: false
|
|
10
|
+
Integer :access_count, default: 0, null: false
|
|
11
|
+
DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
|
|
12
|
+
DateTime :updated_at, default: Sequel::CURRENT_TIMESTAMP
|
|
13
|
+
DateTime :last_accessed, default: Sequel::CURRENT_TIMESTAMP
|
|
14
|
+
Integer :token_count
|
|
15
|
+
column :embedding, 'vector(2000)'
|
|
16
|
+
Integer :embedding_dimension
|
|
17
|
+
String :content_hash, size: 64
|
|
18
|
+
DateTime :deleted_at
|
|
19
|
+
Bignum :source_id
|
|
20
|
+
Integer :chunk_position
|
|
21
|
+
column :metadata, :jsonb, default: Sequel.lit("'{}'::jsonb"), null: false
|
|
19
22
|
end
|
|
20
23
|
|
|
21
24
|
# Basic indexes for common queries
|
|
22
|
-
add_index :nodes, :created_at, name:
|
|
23
|
-
add_index :nodes, :updated_at, name:
|
|
24
|
-
add_index :nodes, :last_accessed, name:
|
|
25
|
-
add_index :nodes, :access_count, name:
|
|
26
|
-
add_index :nodes, :content_hash, unique: true, name:
|
|
27
|
-
add_index :nodes, :deleted_at, name:
|
|
28
|
-
add_index :nodes, :source_id, name:
|
|
29
|
-
add_index :nodes, [:source_id, :chunk_position], name:
|
|
25
|
+
add_index :nodes, :created_at, name: :idx_nodes_created_at
|
|
26
|
+
add_index :nodes, :updated_at, name: :idx_nodes_updated_at
|
|
27
|
+
add_index :nodes, :last_accessed, name: :idx_nodes_last_accessed
|
|
28
|
+
add_index :nodes, :access_count, name: :idx_nodes_access_count
|
|
29
|
+
add_index :nodes, :content_hash, unique: true, name: :idx_nodes_content_hash_unique
|
|
30
|
+
add_index :nodes, :deleted_at, name: :idx_nodes_deleted_at
|
|
31
|
+
add_index :nodes, :source_id, name: :idx_nodes_source_id
|
|
32
|
+
add_index :nodes, [:source_id, :chunk_position], name: :idx_nodes_source_chunk_position
|
|
33
|
+
|
|
34
|
+
# Comments
|
|
35
|
+
run "COMMENT ON TABLE nodes IS 'Core memory storage for conversation messages and context'"
|
|
36
|
+
run "COMMENT ON COLUMN nodes.content IS 'The conversation message/utterance content'"
|
|
37
|
+
run "COMMENT ON COLUMN nodes.access_count IS 'Number of times this node has been accessed/retrieved'"
|
|
38
|
+
run "COMMENT ON COLUMN nodes.created_at IS 'When this memory was created'"
|
|
39
|
+
run "COMMENT ON COLUMN nodes.updated_at IS 'When this memory was last modified'"
|
|
40
|
+
run "COMMENT ON COLUMN nodes.last_accessed IS 'When this memory was last accessed'"
|
|
41
|
+
run "COMMENT ON COLUMN nodes.token_count IS 'Number of tokens in the content (for context budget management)'"
|
|
42
|
+
run "COMMENT ON COLUMN nodes.embedding IS 'Vector embedding (max 2000 dimensions) for semantic search'"
|
|
43
|
+
run "COMMENT ON COLUMN nodes.embedding_dimension IS 'Actual number of dimensions used in the embedding vector (max 2000)'"
|
|
44
|
+
run "COMMENT ON COLUMN nodes.content_hash IS 'SHA-256 hash of content for deduplication'"
|
|
45
|
+
run "COMMENT ON COLUMN nodes.deleted_at IS 'Soft delete timestamp - node is considered deleted when set'"
|
|
46
|
+
run "COMMENT ON COLUMN nodes.source_id IS 'Reference to source file (for file-loaded nodes)'"
|
|
47
|
+
run "COMMENT ON COLUMN nodes.chunk_position IS 'Position within source file (0-indexed)'"
|
|
48
|
+
run "COMMENT ON COLUMN nodes.metadata IS 'Flexible metadata storage (memory_type, importance, source, etc.)'"
|
|
30
49
|
|
|
31
50
|
# Partial index for efficiently querying non-deleted nodes
|
|
32
|
-
|
|
51
|
+
run "CREATE INDEX idx_nodes_not_deleted_created_at ON nodes (created_at) WHERE deleted_at IS NULL"
|
|
33
52
|
|
|
34
53
|
# GIN index for JSONB metadata queries
|
|
35
|
-
|
|
54
|
+
run "CREATE INDEX idx_nodes_metadata ON nodes USING gin(metadata)"
|
|
36
55
|
|
|
37
56
|
# Vector similarity search index (HNSW for better performance)
|
|
38
|
-
|
|
57
|
+
run <<-SQL
|
|
39
58
|
CREATE INDEX idx_nodes_embedding ON nodes
|
|
40
59
|
USING hnsw (embedding vector_cosine_ops)
|
|
41
60
|
WITH (m = 16, ef_construction = 64)
|
|
42
61
|
SQL
|
|
43
62
|
|
|
44
63
|
# Full-text search on conversation content
|
|
45
|
-
|
|
46
|
-
CREATE INDEX idx_nodes_content_gin ON nodes
|
|
47
|
-
USING gin(to_tsvector('english', content))
|
|
48
|
-
SQL
|
|
64
|
+
run "CREATE INDEX idx_nodes_content_gin ON nodes USING gin(to_tsvector('english', content))"
|
|
49
65
|
|
|
50
66
|
# Trigram indexes for fuzzy matching on conversation content
|
|
51
|
-
|
|
52
|
-
CREATE INDEX idx_nodes_content_trgm ON nodes
|
|
53
|
-
USING gin(content gin_trgm_ops)
|
|
54
|
-
SQL
|
|
67
|
+
run "CREATE INDEX idx_nodes_content_trgm ON nodes USING gin(content gin_trgm_ops)"
|
|
55
68
|
|
|
56
69
|
# Check constraint for embedding dimensions
|
|
57
|
-
|
|
70
|
+
run <<-SQL
|
|
58
71
|
ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
|
|
59
72
|
CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
|
|
60
73
|
SQL
|
|
61
74
|
|
|
62
75
|
# Partial index for active (non-deleted) node queries
|
|
63
|
-
|
|
64
|
-
name: 'idx_nodes_active',
|
|
65
|
-
where: 'deleted_at IS NULL'
|
|
76
|
+
run "CREATE INDEX idx_nodes_active ON nodes (id) WHERE deleted_at IS NULL"
|
|
66
77
|
|
|
67
78
|
# Composite index for embedding-based searches on active nodes
|
|
68
|
-
|
|
69
|
-
CREATE INDEX idx_nodes_active_with_embedding ON nodes (id)
|
|
70
|
-
WHERE deleted_at IS NULL AND embedding IS NOT NULL
|
|
71
|
-
SQL
|
|
79
|
+
run "CREATE INDEX idx_nodes_active_with_embedding ON nodes (id) WHERE deleted_at IS NULL AND embedding IS NOT NULL"
|
|
72
80
|
|
|
73
81
|
# LZ4 compression for better read performance
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
ALTER TABLE nodes ALTER COLUMN content SET COMPRESSION lz4;
|
|
77
|
-
SQL
|
|
82
|
+
run "ALTER TABLE nodes ALTER COLUMN metadata SET COMPRESSION lz4"
|
|
83
|
+
run "ALTER TABLE nodes ALTER COLUMN content SET COMPRESSION lz4"
|
|
78
84
|
|
|
79
85
|
# Foreign key to file_sources table
|
|
80
|
-
|
|
86
|
+
alter_table(:nodes) do
|
|
87
|
+
add_foreign_key [:source_id], :file_sources, on_delete: :set_null
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def down
|
|
92
|
+
drop_table(:nodes)
|
|
81
93
|
end
|
|
82
94
|
end
|
|
@@ -1,20 +1,32 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
require_relative '../../lib/htm/migration'
|
|
4
|
+
|
|
5
|
+
class CreateTags < HTM::Migration
|
|
6
|
+
def up
|
|
7
|
+
create_table(:tags) do
|
|
8
|
+
primary_key :id
|
|
9
|
+
String :name, text: true, null: false
|
|
10
|
+
DateTime :created_at, default: Sequel::CURRENT_TIMESTAMP
|
|
11
|
+
DateTime :deleted_at
|
|
9
12
|
end
|
|
10
13
|
|
|
11
|
-
add_index :tags, :name, unique: true, name:
|
|
12
|
-
add_index :tags, :
|
|
13
|
-
|
|
14
|
+
add_index :tags, :name, unique: true, name: :idx_tags_name_unique
|
|
15
|
+
add_index :tags, :deleted_at, name: :idx_tags_deleted_at
|
|
16
|
+
|
|
17
|
+
# Pattern matching index for prefix queries
|
|
18
|
+
run "CREATE INDEX idx_tags_name_pattern ON tags USING btree (name text_pattern_ops)"
|
|
14
19
|
|
|
15
20
|
# GIN trigram index for fuzzy search (typo-tolerant queries)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
21
|
+
run "CREATE INDEX idx_tags_name_trgm ON tags USING gin(name gin_trgm_ops)"
|
|
22
|
+
|
|
23
|
+
run "COMMENT ON TABLE tags IS 'Unique tag names for categorization'"
|
|
24
|
+
run "COMMENT ON COLUMN tags.name IS 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'"
|
|
25
|
+
run "COMMENT ON COLUMN tags.created_at IS 'When this tag was created'"
|
|
26
|
+
run "COMMENT ON COLUMN tags.deleted_at IS 'Soft delete timestamp'"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def down
|
|
30
|
+
drop_table(:tags)
|
|
19
31
|
end
|
|
20
32
|
end
|